changeset 5282:bee5567d7084

xapian: remove assumption that xapian db is a directory Xapian can work with stub DBs, that is a small text file that contains kind of a pointer to the real xapian index directory. The stub DB filename can be given to xapian instead of a index directory path (at least when searching, i had some troubles giving a stub db file to indexing). Thus, "dir" was renamed to "db" at some places. Other places assuming the db is a directory were fixed or removed, except in the index rebuilding code. Refactored some related code. Moved xapian specific code from BaseIndex to XapianIndex subclass.
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sun, 15 Nov 2009 16:12:50 +0100
parents 5f0ec1f315bc
children f68e18305d05
files MoinMoin/search/Xapian/indexing.py MoinMoin/search/_tests/test_search.py MoinMoin/search/builtin.py
diffstat 3 files changed, 40 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/Xapian/indexing.py	Sun Nov 15 12:45:30 2009 +0100
+++ b/MoinMoin/search/Xapian/indexing.py	Sun Nov 15 16:12:50 2009 +0100
@@ -17,6 +17,7 @@
 from MoinMoin.support import xappy
 from MoinMoin.search.builtin import BaseIndex
 from MoinMoin.search.Xapian.tokenizer import WikiAnalyzer
+from MoinMoin.util import filesys
 
 from MoinMoin.Page import Page
 from MoinMoin import config, wikiutil
@@ -118,6 +119,10 @@
 
 class XapianIndex(BaseIndex):
 
+    def __init__(self, request):
+        super(XapianIndex, self).__init__(request)
+        self.db = os.path.join(self.main_dir, 'index')
+
     def _main_dir(self):
         """ Get the directory of the xapian index """
         if self.request.cfg.xapian_index_dir:
@@ -127,8 +132,22 @@
             return os.path.join(self.request.cfg.cache_dir, 'xapian')
 
     def exists(self):
-        """ Check if the Xapian index exists """
-        return BaseIndex.exists(self) and os.listdir(self.dir)
+        """ Check if index exists """
+        return os.path.exists(self.db)
+
+    def mtime(self):
+        """ Modification time of the index """
+        return os.path.getmtime(self.db)
+
+    def touch(self):
+        """ Touch the index """
+        filesys.touch(self.db)
+
+    def get_search_connection(self):
+        return MoinSearchConnection(self.db)
+
+    def get_indexer_connection(self):
+        return MoinIndexerConnection(self.db)
 
     def _search(self, query, sort='weight', historysearch=0):
         """
@@ -146,7 +165,7 @@
                 else:
                     break
             except IndexError:
-                searcher = MoinSearchConnection(self.dir)
+                searcher = self.get_search_connection()
                 timestamp = self.mtime()
                 break
 
@@ -173,7 +192,7 @@
         """
         try:
             request = self._indexingRequest(self.request)
-            connection = MoinIndexerConnection(self.dir)
+            connection = self.get_indexer_connection()
             self.touch()
             try:
                 done_count = 0
@@ -324,7 +343,7 @@
             # likely it (== all of its revisions, all of its attachments) got either renamed or nuked
             wikiname = request.cfg.interwikiname or u'Self'
 
-            sc = MoinSearchConnection(self.dir)
+            sc = self.get_search_connection()
             docs_to_delete = sc.get_all_documents_with_fields(wikiname=wikiname, pagename=pagename)
                                                               # any page rev, any attachment
             sc.close()
@@ -527,13 +546,16 @@
             pages = request.rootpage.getPageList(user='', exists=1)
 
         # rebuilding the DB: delete it and add everything
+        # XXX assumes that self.db is a xapian index directory
+        # XXX killing the index this way leads to searches failing, if the
+        # XXX wiki tries to use the index while index is rebuilt.
         if mode == 'rebuild':
-            for fname in os.listdir(self.dir):
-                os.unlink(os.path.join(self.dir, fname))
+            for fname in os.listdir(self.db):
+                os.unlink(os.path.join(self.db, fname))
             mode = 'add'
-
+ 
         try:
-            connection = MoinIndexerConnection(self.dir)
+            connection = self.get_indexer_connection()
             self.touch()
             try:
                 logging.info("indexing %d pages..." % len(pages))
--- a/MoinMoin/search/_tests/test_search.py	Sun Nov 15 12:45:30 2009 +0100
+++ b/MoinMoin/search/_tests/test_search.py	Sun Nov 15 16:12:50 2009 +0100
@@ -359,8 +359,8 @@
         return XapianSearch(self.request, query)
 
     def get_moin_search_connection(self):
-        from MoinMoin.search.Xapian import MoinSearchConnection
-        return  MoinSearchConnection(os.path.join(self.request.cfg.cache_dir, 'xapian/index'))
+        from MoinMoin.search.Xapian import XapianIndex
+        return XapianIndex(self.request).get_search_connection()
 
     def setup_class(self):
 
--- a/MoinMoin/search/builtin.py	Sun Nov 15 12:45:30 2009 +0100
+++ b/MoinMoin/search/builtin.py	Sun Nov 15 16:12:50 2009 +0100
@@ -17,7 +17,6 @@
 
 from MoinMoin import wikiutil, config, caching
 from MoinMoin.Page import Page
-from MoinMoin.util import filesys
 from MoinMoin.search.results import getSearchResults, Match, TextMatch, TitleMatch, getSearchResults
 
 ##############################################################################
@@ -101,26 +100,25 @@
         @param request: current request
         """
         self.request = request
-        main_dir = self._main_dir()
-        self.dir = os.path.join(main_dir, 'index')
-        if not os.path.exists(self.dir):
-            os.makedirs(self.dir)
-        self.update_queue = IndexerQueue(request, main_dir, 'indexer-queue')
+        self.main_dir = self._main_dir()
+        if not os.path.exists(self.main_dir):
+            os.makedirs(self.main_dir)
+        self.update_queue = IndexerQueue(request, self.main_dir, 'indexer-queue')
 
     def _main_dir(self):
         raise NotImplemented('...')
 
     def exists(self):
         """ Check if index exists """
-        return os.path.exists(self.dir)
+        raise NotImplemented('...')
 
     def mtime(self):
         """ Modification time of the index """
-        return os.path.getmtime(self.dir)
+        raise NotImplemented('...')
 
     def touch(self):
         """ Touch the index """
-        filesys.touch(self.dir)
+        raise NotImplemented('...')
 
     def _search(self, query):
         """ Actually perfom the search