changeset 355:5d06ad313f7d

WhooshIndex object creation at start up, implemented some functions for working with indexes
author Michael Mayorov <marchael@kb.csu.ru>
date Wed, 06 Jul 2011 03:09:05 +0000
parents ae63dc7cf921
children d91abc361954
files MoinMoin/app.py MoinMoin/script/__init__.py MoinMoin/search/indexing.py MoinMoin/storage/backends/indexing.py
diffstat 4 files changed, 80 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/app.py	Mon Jul 04 12:38:17 2011 +0000
+++ b/MoinMoin/app.py	Wed Jul 06 03:09:05 2011 +0000
@@ -171,11 +171,15 @@
     index_uri = app.cfg.router_index_uri
     # Just initialize with unprotected backends.
     unprotected_mapping = [(ns, backend) for ns, backend, acls in ns_mapping]
-    unprotected_storage = router.RouterBackend(unprotected_mapping, index_uri=index_uri)
+    unprotected_storage = router.RouterBackend(unprotected_mapping, index_uri=index_uri,
+                                               index_dir=app.cfg.index_dir
+                                              )
     # Protect each backend with the acls provided for it in the mapping at position 2
     amw = acl.AclWrapperBackend
     protected_mapping = [(ns, amw(app.cfg, backend, **acls)) for ns, backend, acls in ns_mapping]
-    storage = router.RouterBackend(protected_mapping, index_uri=index_uri)
+    storage = router.RouterBackend(protected_mapping, index_uri=index_uri,
+                                   index_dir=app.cfg.index_dir
+                                  )
     return unprotected_storage, storage
 
 def deinit_backends(app):
@@ -190,7 +194,9 @@
     if xmlfile:
         app.cfg.load_xml = None
         tmp_backend = router.RouterBackend([('/', memory.MemoryBackend())],
-                                           index_uri='sqlite://')
+                                           index_uri='sqlite://',
+                                           index_dir=app.cfg.index_dir
+                                          )
         unserialize(tmp_backend, xmlfile)
         # TODO optimize this, maybe unserialize could count items it processed
         item_count = 0
--- a/MoinMoin/script/__init__.py	Mon Jul 04 12:38:17 2011 +0000
+++ b/MoinMoin/script/__init__.py	Wed Jul 06 03:09:05 2011 +0000
@@ -22,8 +22,6 @@
 
     from MoinMoin.script.maint.index import IndexOperations
     manager.add_command("index", IndexOperations())
-    from MoinMoin.script.maint.update_indexes import UpdateIndexes
-    manager.add_command("update_indexes", UpdateIndexes())
     from MoinMoin.script.account.create import Create_User
     manager.add_command("account_create", Create_User())
     from MoinMoin.script.account.disable import Disable_User
--- a/MoinMoin/search/indexing.py	Mon Jul 04 12:38:17 2011 +0000
+++ b/MoinMoin/search/indexing.py	Wed Jul 06 03:09:05 2011 +0000
@@ -6,7 +6,6 @@
 """
 
 import os
-import logging
 from flask import current_app as app
 
 from whoosh.fields import Schema, TEXT, ID, IDLIST, NUMERIC, DATETIME
@@ -15,6 +14,9 @@
 
 from MoinMoin.search.analyzers import *
 from MoinMoin.error import FatalError
+from MoinMoin import log
+logging = log.getLogger(__name__)
+
 '''
 for text items, it will be duplication. For "binary" items, it will only store
 what the filter code outputs (e.g. if it is a PDF, it will only store what
@@ -60,10 +62,10 @@
                'all_revisions_index': 'all_revisions_schema',
               }
 
-    def __init__(self, index_dir=None):
-        index_dir = index_dir or app.cfg.index_dir
+    def __init__(self, indexdir=None):
+        indexdir = indexdir or app.cfg.index_dir
         for index_name, index_schema in self.indexes.items():
-            self.open_index(index_dir, index_name, index_schema, create=True)
+            self.open_index(indexdir, index_name, index_schema, create=True)
 
     def open_index(self, indexdir, indexname, schema, create=False):
         """
--- a/MoinMoin/storage/backends/indexing.py	Mon Jul 04 12:38:17 2011 +0000
+++ b/MoinMoin/storage/backends/indexing.py	Wed Jul 06 03:09:05 2011 +0000
@@ -39,8 +39,9 @@
     """
     def __init__(self, *args, **kw):
         index_uri = kw.pop('index_uri', None)
+        index_dir = kw.pop('index_dir', None)
         super(IndexingBackendMixin, self).__init__(*args, **kw)
-        self._index = ItemIndex(index_uri)
+        self._index = ItemIndex(index_uri, index_dir)
 
     def close(self):
         self._index.close()
@@ -207,11 +208,15 @@
 from sqlalchemy import create_engine, select
 from sqlalchemy.sql import and_, exists, asc, desc
 
+# Importing Whoosh stuff
+from whoosh.writing import AsyncWriter
+from MoinMoin.search.indexing import WhooshIndex
+
 class ItemIndex(object):
     """
     Index for Items/Revisions
     """
-    def __init__(self, index_uri):
+    def __init__(self, index_uri, index_dir):
         metadata = MetaData()
         metadata.bind = create_engine(index_uri, echo=False)
 
@@ -250,10 +255,17 @@
         self.item_kvstore = KVStore(item_kvmeta)
         self.rev_kvstore = KVStore(rev_kvmeta)
 
+        # Whoosh stuff
+        self.index_object = WhooshIndex(indexdir=index_dir)
+
     def close(self):
         engine = self.metadata.bind
         engine.dispose()
 
+    def close_whoosh(self):
+        self.index_object.all_revisions_index.close()
+        self.index_object.latest_revisions_index.close()
+
     def index_rebuild(self, backend):
         self.metadata.drop_all()
         self.metadata.create_all()
@@ -276,6 +288,12 @@
         if result:
             return result[0]
 
+    def get_item_id_whoosh(self, uuid):
+        with self.index_object.latest_revisions_index.searcher() as searcher:
+            result = searcher.document(uuid=uuid)
+        if result:
+            return result
+
     def update_item(self, metas):
         """
         update an item with item-level metadata <metas>
@@ -292,6 +310,16 @@
         self.item_kvstore.store_kv(item_id, metas)
         return item_id
 
+    def update_item_whoosh(self, metas):
+        with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
+            found_doc_number = latest_revs_searcher.document_number(uuid=metas[UUID])
+        with AsyncWriter(self.index_object.latest_revisions_index) as async_writer:
+            if found_doc_number:
+                async_writer.delete_document(found_doc_number)
+                async_writer.add_document(**metas)
+            else:
+                async_writer.add_document(**metas)
+
     def cache_in_item(self, item_id, rev_id, rev_metas):
         """
         cache some important values from current revision into item for easy availability
@@ -319,6 +347,15 @@
             self.item_kvstore.store_kv(item_id, {})
             item_table.delete().where(item_table.c.id == item_id).execute()
 
+    def remove_item_whoosh(self, metas):
+        with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
+            found_doc_number = latest_revs_searcher.document_number(uuid=metas[UUID],
+                                                                    name=metas[NAME].lower()
+                                                                   )
+        if found_doc_number >= 0:
+            with AsyncWriter(self.index_object.latest_revisions_index) as async_writer:
+                async_writer.delete_document(found_doc_number)
+
     def add_rev(self, uuid, revno, metas):
         """
         add a new revision <revno> for item <uuid> with metadata <metas>
@@ -346,6 +383,23 @@
         self.cache_in_item(item_id, rev_id, metas)
         return rev_id
 
+    def add_rev_whoosh(self, uuid, revno, metas):
+        with self.index_object.all_revisions_index.searcher() as all_revs_searcher:
+            with self.index_object.latest_revisions_index.searcher() as latest_revs_searcher:
+                all_found_document = all_revs_searcher.document(uuid=uuid, rev_no=revno)
+                latest_found_document = latest_revs_searcher.document(uuid=uuid, rev_no=revno)
+
+        metas["uuid"] = uuid
+        metas["rev_no"] = revno
+
+        if all_found_document:
+            with AsyncWriter(self.index_object.all_revisions_index) as all_async_writer:
+                all_async_writer.add_document(**metas)
+
+        if latest_found_document:
+            with AsyncWriter(self.index_object.latest_revisions_index) as latest_async_writer:
+                latest_async_writer.add_document(**metas)
+
     def remove_rev(self, uuid, revno):
         """
         remove a revision <revno> of item <uuid>
@@ -370,6 +424,15 @@
             self.rev_kvstore.store_kv(rev_id, {})
             rev_table.delete().where(rev_table.c.id == rev_id).execute()
 
+    def remove_rev_whoosh(self, uuid, revno):
+        with self.index_object.all_revisions_index.searcher() as all_revs_searcher:
+            found_doc_number = latest_revs_searcher.document_number(uuid=uuid,
+                                                                    rev_no=revno
+                                                                   )
+        if found_doc_number >= 0:
+            with AsyncWriter(self.index_object.all_revisions_index) as async_writer:
+                async_writer.delete_document(found_doc_number)
+
     def get_uuid_revno_name(self, rev_id):
         """
         get item uuid and revision number by rev_id