changeset 400:ea001bcb5f90

Fixed issue with runtime indexation https://bitbucket.org/marchael/moin-2.0/issue/9/content-entered-via-wiki-ui-is-not-indexed Fixes issue with content indexing in building\updating script
author Michael Mayorov <marchael@kb.csu.ru>
date Sat, 06 Aug 2011 06:03:50 +0000
parents fdd1b2dec689
children 215d7a8f5b3e
files MoinMoin/script/maint/index.py MoinMoin/storage/backends/indexing.py
diffstat 2 files changed, 21 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/script/maint/index.py	Sat Aug 06 03:11:22 2011 +0000
+++ b/MoinMoin/script/maint/index.py	Sat Aug 06 06:03:50 2011 +0000
@@ -16,9 +16,10 @@
 from MoinMoin.search.indexing import WhooshIndex
 from MoinMoin.config import MTIME, NAME, CONTENTTYPE
 from MoinMoin.error import FatalError
-from MoinMoin.storage.error import NoSuchItemError
+from MoinMoin.storage.error import NoSuchItemError, NoSuchRevisionError
 from MoinMoin.util.mime import Type
 from MoinMoin.search.revision_converter import backend_to_index
+from MoinMoin.converter import convert_to_indexable
 
 from MoinMoin import log
 logging = log.getLogger(__name__)
@@ -61,15 +62,22 @@
             with MultiSegmentWriter(all_rev_index, procs, limitmb) as all_rev_writer:
                 with MultiSegmentWriter(latest_rev_index, procs, limitmb) as latest_rev_writer:
                     for item in backend.iter_items_noindex():
-                        for rev_no in item.list_revisions():
+                        try:
                             if "all_revisions_index" in indexnames:
-                                revision = item.get_revision(rev_no)
-                                metadata = backend_to_index(revision, rev_no, all_rev_field_names, interwikiname)
-                                all_rev_writer.add_document(**metadata)
+                                for rev_no in item.list_revisions():
+                                    revision = item.get_revision(rev_no)
+                                    rev_content = convert_to_indexable(revision)
+                                    metadata = backend_to_index(revision, rev_no, all_rev_field_names, rev_content, interwikiname)
+                                    all_rev_writer.add_document(**metadata)
+                            else:
+                                revision = item.get_revision(-1)
+                                rev_no = revision.revno
+                                rev_content = convert_to_indexable(revision)
+                        except NoSuchRevisionError: # item has no such revision
+                            continue
                         # revision is now the latest revision of this item
                         if "latest_revisions_index" in indexnames and "rev_no" in locals():
-                            revision = item.get_revision(rev_no)
-                            metadata = backend_to_index(revision, rev_no, latest_rev_field_names, interwikiname)
+                            metadata = backend_to_index(revision, rev_no, latest_rev_field_names, rev_content, interwikiname)
                             latest_rev_writer.add_document(**metadata)
 
         def update_index(indexnames_schemas):
@@ -102,7 +110,8 @@
                 with latest_rev_index.writer() as latest_rev_writer:
                     for item, rev_no in latest_documents:
                         revision = item.get_revision(rev_no)
-                        converted_rev = backend_to_index(revision, rev_no, latest_rev_field_names, interwikiname)
+                        rev_content = convert_to_indexable(revision)
+                        converted_rev = backend_to_index(revision, rev_no, latest_rev_field_names, rev_content, interwikiname)
                         found = latest_rev_searcher.document(name_exact=item.name,
                                                              wikiname=interwikiname
                                                             )
@@ -130,7 +139,8 @@
                     for item, rev_nos in create_documents:
                         for rev_no in rev_nos:
                             revision = item.get_revision(rev_no)
-                            converted_rev = backend_to_index(revision, rev_no, all_rev_field_names, interwikiname)
+                            rev_content = convert_to_indexable(revision)
+                            converted_rev = backend_to_index(revision, rev_no, all_rev_field_names, rev_content, interwikiname)
                             all_rev_writer.add_document(**converted_rev)
 
         def clean_index(indexnames_schemas):
--- a/MoinMoin/storage/backends/indexing.py	Sat Aug 06 03:11:22 2011 +0000
+++ b/MoinMoin/storage/backends/indexing.py	Sat Aug 06 06:03:50 2011 +0000
@@ -400,7 +400,9 @@
                                                                   wikiname=self.wikiname
                                                                  )
         logging.debug("Processing: name %s revno %s" % (rev[NAME], revno))
+        rev.seek(0) # for a new revision, file pointer points to EOF, rewind first
         rev_content = convert_to_indexable(rev)
+        logging.debug("Indexable content: %r" % (rev_content[:250], ))
         if not all_found_document:
             field_names = self.index_object.all_revisions_index.schema.names()
             with AsyncWriter(self.index_object.all_revisions_index) as async_writer: