changeset 791:84e26894de5d pytest2

refactored some search code, make it more flexible, reduce code duplication moved stuff dealing with whoosh / indexes from frontend.views to indexing mixin. added a .searcher method to indexing mixin that returns a searcher either for all or latest revs index. used that method also for .search and .search_page to reduce duplication. make the .query_parser method more intelligent, so it automatically chooses a MultifieldParser if there are multiple default_fields. fix callers.
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sun, 28 Aug 2011 17:50:24 +0200
parents 8ac1f87be5c2
children 911ccaa08db6
files MoinMoin/apps/frontend/views.py MoinMoin/script/maint/reduce_revisions.py MoinMoin/script/maint/set_meta.py MoinMoin/storage/backends/indexing.py
diffstat 4 files changed, 38 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/apps/frontend/views.py	Sun Aug 28 16:29:27 2011 +0200
+++ b/MoinMoin/apps/frontend/views.py	Sun Aug 28 17:50:24 2011 +0200
@@ -143,19 +143,14 @@
 
 
 def _search(search_form, item_name):
-    from MoinMoin.search.indexing import WhooshIndex
-    from whoosh.qparser import QueryParser, MultifieldParser
-    from MoinMoin.search.analyzers import item_name_analyzer
-    from whoosh import highlight
     query = search_form['q'].value
-    pagenum = 1 # We start from first page
-    pagelen = search_form['pagelen'].value
-    index_object = WhooshIndex()
-    ix = index_object.all_revisions_index if request.values.get('search_in_all') else index_object.latest_revisions_index
-    with ix.searcher() as searcher:
-        mparser = MultifieldParser(["name_exact", "name", "content"], schema=ix.schema)
-        q = mparser.parse(query)
-        results = searcher.search_page(q, int(pagenum), pagelen=int(pagelen))
+    pagenum = 1  # We start from first page
+    pagelen = int(search_form['pagelen'].value)
+    all_revs = bool(request.values.get('search_in_all'))
+    qp = flaskg.storage.query_parser(["name_exact", "name", "content"], all_revs=all_revs)
+    q = qp.parse(query)
+    with flaskg.storage.searcher(all_revs) as searcher:
+        results = searcher.search_page(q, pagenum, pagelen)
         return render_template('search_results.html',
                                results=results,
                                query=query,
@@ -164,7 +159,6 @@
                               )
 
 
-
 @frontend.route('/<itemname:item_name>', defaults=dict(rev=-1), methods=['GET', 'POST'])
 @frontend.route('/+show/<int:rev>/<itemname:item_name>', methods=['GET', 'POST'])
 def show_item(item_name, rev):
--- a/MoinMoin/script/maint/reduce_revisions.py	Sun Aug 28 16:29:27 2011 +0200
+++ b/MoinMoin/script/maint/reduce_revisions.py	Sun Aug 28 17:50:24 2011 +0200
@@ -26,7 +26,7 @@
     def run(self, query):
         storage = app.unprotected_storage
         if query:
-            qp = storage.query_parser("name_exact", all_revs=False)
+            qp = storage.query_parser(["name_exact", ], all_revs=False)
             q = qp.parse(query)
         else:
             q = Every()
--- a/MoinMoin/script/maint/set_meta.py	Sun Aug 28 16:29:27 2011 +0200
+++ b/MoinMoin/script/maint/set_meta.py	Sun Aug 28 17:50:24 2011 +0200
@@ -44,7 +44,7 @@
                   "only a key you want to delete (with -r set).")
 
         if query:
-            qp = storage.query_parser("name_exact", all_revs=False)
+            qp = storage.query_parser(["name_exact", ], all_revs=False)
             q = qp.parse(query)
         else:
             q = Every()
--- a/MoinMoin/storage/backends/indexing.py	Sun Aug 28 16:29:27 2011 +0200
+++ b/MoinMoin/storage/backends/indexing.py	Sun Aug 28 17:50:24 2011 +0200
@@ -57,8 +57,11 @@
         item.publish_metadata()
         return item
 
-    def query_parser(self, default_field, all_revs=False):
-        return self._index.query_parser(default_field, all_revs=all_revs)
+    def query_parser(self, default_fields, all_revs=False):
+        return self._index.query_parser(default_fields, all_revs=all_revs)
+
+    def searcher(self, all_revs=False):
+        return self._index.searcher(all_revs=all_revs)
 
     def search(self, q, all_revs=False, **kw):
         return self._index.search(q, all_revs=all_revs, **kw)
@@ -169,7 +172,7 @@
     # TODO by intercepting write() to index data written to a revision
 
 from whoosh.writing import AsyncWriter
-from whoosh.qparser import QueryParser
+from whoosh.qparser import QueryParser, MultifieldParser
 
 from MoinMoin.search.indexing import WhooshIndex
 
@@ -267,30 +270,44 @@
                 logging.debug("Latest revisions: removing %d", latest_doc_number)
                 async_writer.delete_document(latest_doc_number)
 
-    def query_parser(self, default_field, all_revs=False):
+    def query_parser(self, default_fields, all_revs=False):
         if all_revs:
             schema = self.index_object.all_revisions_schema
         else:
             schema = self.index_object.latest_revisions_schema
-        return QueryParser(default_field, schema=schema)
+        if len(default_fields) > 1:
+            qp = MultifieldParser(default_fields, schema=schema)
+        elif len(default_fields) == 1:
+            qp = QueryParser(default_fields[0], schema=schema)
+        else:
+            raise ValueError("default_fields list must at least contain one field name")
+        return qp
 
-    def search(self, q, all_revs=False, **kw):
+    def searcher(self, all_revs=False):
+        """
+        Get a searcher for the right index. Always use this with "with":
+
+        with storage.searcher(all_revs) as searcher:
+            # your code
+
+        If you do not need the searcher itself or the Result object, but rather
+        the found documents, better use search() or search_page(), see below.
+        """
         if all_revs:
             ix = self.index_object.all_revisions_index
         else:
             ix = self.index_object.latest_revisions_index
-        with ix.searcher() as searcher:
+        return ix.searcher()
+
+    def search(self, q, all_revs=False, **kw):
+        with self.searcher(all_revs) as searcher:
             # Note: callers must consume everything we yield, so the for loop
             # ends and the "with" is left to close the index files.
             for hit in searcher.search(q, **kw):
                 yield hit.fields()
 
     def search_page(self, q, all_revs=False, pagenum=1, pagelen=10, **kw):
-        if all_revs:
-            ix = self.index_object.all_revisions_index
-        else:
-            ix = self.index_object.latest_revisions_index
-        with ix.searcher() as searcher:
+        with self.searcher(all_revs) as searcher:
             # Note: callers must consume everything we yield, so the for loop
             # ends and the "with" is left to close the index files.
             for hit in searcher.search_page(q, pagenum, pagelen=pagelen, **kw):