changeset 5029:3d9fe8725332

Xapian2009: Document's mtime is retrieved in a right way for indexing.
author Dmitrijs Milajevs <dimazest@gmail.com>
date Fri, 21 Aug 2009 21:46:03 +0200
parents aafcd2b5597a
children 7d01d6bbdcab
files MoinMoin/search/Xapian/indexing.py MoinMoin/search/_tests/test_search.py
diffstat 2 files changed, 33 insertions(+), 54 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/Xapian/indexing.py	Thu Aug 20 19:42:38 2009 +0200
+++ b/MoinMoin/search/Xapian/indexing.py	Fri Aug 21 21:46:03 2009 +0200
@@ -206,6 +206,25 @@
 
         connection.close()
 
+    def _get_document(self, connection, doc_id, mtime, mode):
+        document = None
+
+        if mode == 'update':
+            try:
+                doc = connection.get_document(doc_id)
+                docmtime = long(doc.data['mtime'][0])
+                if mtime > docmtime:
+                    document = doc
+            except KeyError:
+                document = xappy.UnprocessedDocument()
+                document.id = doc_id
+        elif mode == 'add':
+            document = xappy.UnprocessedDocument()
+            document.id = doc_id
+
+        return document
+
+
     def _index_file(self, request, writer, filename, mode='update'):
         """ index a file as it were a page named pagename
             Assumes that the write lock is acquired
@@ -217,26 +236,12 @@
             itemid = "%s:%s" % (wikiname, os.path.join(fs_rootpage, filename))
             mtime = os.path.getmtime(filename)
             mtime = wikiutil.timestamp2version(mtime)
-            if mode == 'update':
-                try:
-                    doc = connection.get_document(itemid)
-                    docmtime = long(doc.data['mtime'])
-                    updated = mtime > docmtime
-                    logging.debug("itemid %r: mtime %r > docmtime %r == updated %r" % (itemid, mtime, docmtime, updated))
-                except KeyError:
-                    updated = True
-                    doc = xappy.UnprocessedDocument()
-                    doc.id = itemid
-                    updated = mtime > docmtime
 
-            elif mode == 'add':
-                updated = True
-                doc = xappy.UnprocessedDocument()
-                doc.id = itemid
+            doc = self._get_document(connection, itemid, mtime, mode)
 
-            logging.debug("%s %r" % (filename, updated))
+            logging.debug("%s %r" % (filename, doc))
 
-            if updated:
+            if doc:
                 doc.fields.append(xappy.Field('wikiname', wikiname))
                 doc.fields.append(xappy.Field('pagename', fs_rootpage))
                 doc.fields.append(xappy.Field('attachment', filename)) # XXX we should treat files like real pages, not attachments
@@ -349,23 +354,12 @@
             filename = AttachFile.getFilename(request, pagename, att)
             itemid = "%s:%s//%s" % (wikiname, pagename, att)
             mtime = wikiutil.timestamp2version(os.path.getmtime(filename))
-            if mode == 'update':
-                try:
-                    doc = connection.get_document(itemid)
-                    docmtime = long(doc.data['mtime'])
-                    updated = mtime > docmtime
-                except KeyError:
-                    updated = True
-                    doc = xappy.UnprocessedDocument()
-                    doc.id = itemid
-            elif mode == 'add':
-                updated = True
-                doc = xappy.UnprocessedDocument()
-                doc.id = itemid
 
-            logging.debug("%s %s %r" % (pagename, att, updated))
+            doc = self._get_document(connection, itemid, mtime, mode)
 
-            if updated:
+            logging.debug("%s %s %r" % (pagename, att, doc))
+
+            if doc:
                 doc.fields.append(xappy.Field('wikiname', wikiname))
                 doc.fields.append(xappy.Field('pagename', pagename))
                 doc.fields.append(xappy.Field('attachment', att))
@@ -404,28 +398,12 @@
         language, stem_language = self._get_languages(page)
         categories = self._get_categories(page)
         domains = tuple(self._get_domains(page))
-        updated = False
 
-        if mode == 'update':
-            try:
-                doc = connection.get_document(itemid)
-                docmtime = long(doc.data['mtime'])
-                updated = mtime > docmtime
-                logging.debug("itemid %r: mtime %r > docmtime %r == updated %r" % (itemid, mtime, docmtime, updated))
-            except KeyError:
-                updated = True
-                doc = xappy.UnprocessedDocument()
-                doc.id = itemid
+        doc = self._get_document(connection, itemid, mtime, mode)
 
-        elif mode == 'add':
-            updated = True
-            doc = xappy.UnprocessedDocument()
-            doc.id = itemid
+        logging.debug("%s %r" % (pagename, doc))
 
-
-        logging.debug("%s %r" % (pagename, updated))
-
-        if updated:
+        if doc:
             doc.fields.append(xappy.Field('wikiname', wikiname))
             doc.fields.append(xappy.Field('pagename', pagename))
             doc.fields.append(xappy.Field('attachment', '')) # this is a real page, not an attachment
@@ -449,7 +427,7 @@
             logging.debug("%s (replace %r)" % (pagename, itemid))
             connection.replace(doc)
 
-        return updated
+        return bool(doc)
 
     def _remove_item(self, request, connection, page, attachment=None):
         wikiname = request.cfg.interwikiname or u'Self'
--- a/MoinMoin/search/_tests/test_search.py	Thu Aug 20 19:42:38 2009 +0200
+++ b/MoinMoin/search/_tests/test_search.py	Fri Aug 21 21:46:03 2009 +0200
@@ -265,15 +265,16 @@
 
     def test_create_page(self):
         self.pages['TestCreatePage'] = 'some text' # Moin serarch must search this page
+
         create_page(self.request, 'TestCreatePage', self.pages['TestCreatePage'])
+        time.sleep(1) # Wait while created pages are being indexed in other thread.
 
-        time.sleep(1) # Wait while created pages are being indexed in other thread.
         result = self.search(u'TestCreatePage')
 
         nuke_page(self.request, 'TestCreatePage')
         time.sleep(1) # Wait while the xapian index is being updated.
+
         del self.pages['TestCreatePage']
-
         assert len(result.hits) == 1