changeset 1241:cba856bc0c05

estimate numer of hits correctly
author Franz Pletz <fpletz AT franz-pletz DOT org>
date Thu, 10 Aug 2006 17:43:50 +0200
parents d2eadfef54b8
children d58efa0c4ce8
files MoinMoin/search/Xapian.py MoinMoin/search/builtin.py MoinMoin/search/results.py MoinMoin/support/xapwrap/index.py
diffstat 4 files changed, 28 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/Xapian.py	Thu Aug 10 17:22:56 2006 +0200
+++ b/MoinMoin/search/Xapian.py	Thu Aug 10 17:43:50 2006 +0200
@@ -262,7 +262,7 @@
             mtime = wikiutil.timestamp2version(mtime)
             if mode == 'update':
                 query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', itemid))
-                enq, docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
+                enq, mset, docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
                 if docs:
                     doc = docs[0] # there should be only one
                     uid = doc['uid']
@@ -375,7 +375,7 @@
             # you can just call database.replace_document(uid_term, doc)
             # -> done in xapwrap.index.Index.index()
             query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', itemid))
-            enq, docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
+            enq, mset, docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
             if docs:
                 doc = docs[0] # there should be only one
                 uid = doc['uid']
@@ -429,7 +429,7 @@
             mtime = wikiutil.timestamp2version(os.path.getmtime(filename))
             if mode == 'update':
                 query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', att_itemid))
-                enq, docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', ])
+                enq, mset, docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', ])
                 if debug: request.log("##%r %r" % (filename, docs))
                 if docs:
                     doc = docs[0] # there should be only one
--- a/MoinMoin/search/builtin.py	Thu Aug 10 17:22:56 2006 +0200
+++ b/MoinMoin/search/builtin.py	Thu Aug 10 17:43:50 2006 +0200
@@ -374,9 +374,17 @@
         # when xapian was used, we won't need to sort manually
         if self.request.cfg.xapian_search:
             self.sort = None
+            mset = self._xapianMset
+            estimated_hits = (
+                (mset.get_matches_estimated() == mset.get_matches_upper_bound() and
+                    mset.get_matches_estimated() == mset.get_matches_lower_bound()) and
+                '' or 'about',
+                mset.get_matches_estimated())
+        else:
+            estimated_hits = None
 
         return getSearchResults(self.request, self.query, hits, start,
-                self.sort)
+                self.sort, estimated_hits)
         
 
     # ----------------------------------------------------------------
@@ -412,7 +420,7 @@
                 self.request.log("xapianSearch: query = %r" %
                         query.get_description())
                 query = xapwrap.index.QObjQuery(query)
-                enq, hits = index.search(query, sort=self.sort)
+                enq, mset, hits = index.search(query, sort=self.sort)
                 clock.stop('_xapianQuery')
                 #self.request.log("xapianSearch: finds: %r" % hits)
                 def dict_decode(d):
@@ -425,6 +433,7 @@
                 pages = [dict_decode(hit['values']) for hit in hits]
                 self.request.log("xapianSearch: finds pages: %r" % pages)
                 self._xapianEnquire = enq
+                self._xapianMset = mset
                 self._xapianIndex = index
             except BaseIndex.LockedException:
                 pass
--- a/MoinMoin/search/results.py	Thu Aug 10 17:22:56 2006 +0200
+++ b/MoinMoin/search/results.py	Thu Aug 10 17:43:50 2006 +0200
@@ -244,11 +244,12 @@
     """
     # Public functions --------------------------------------------------
     
-    def __init__(self, query, hits, pages, elapsed, sort=None):
+    def __init__(self, query, hits, pages, elapsed, sort, estimated_hits):
         self.query = query # the query
         self.hits = hits # hits list
         self.pages = pages # number of pages in the wiki
         self.elapsed = elapsed # search time
+        self.estimated_hits = estimated_hits # about how much hits?
 
         if sort == 'weight':
             self._sortByWeight()
@@ -279,12 +280,17 @@
         @return formatted statistics
         """
         _ = request.getText
+
+        if not self.estimated_hits:
+            self.estimated_hits = ('', len(self.hits))
+
         output = [
             formatter.paragraph(1, attr={'class': 'searchstats'}),
             _("Results %(bs)s%(hitsFrom)d - %(hitsTo)d%(be)s "
-                    "of about %(bs)s%(hits)d%(be)s results out of about "
-                    "%(pages)d pages.") %
-                   {'hits': len(self.hits), 'pages': self.pages,
+                    "of %(aboutHits)s %(bs)s%(hits)d%(be)s results out of"
+                    "about %(pages)d pages.") %
+                {'aboutHits': self.estimated_hits[0],
+                    'hits': self.estimated_hits[1], 'pages': self.pages,
                     'hitsFrom': hitsFrom + 1,
                     'hitsTo': hitsFrom + request.cfg.search_results_per_page,
                     'bs': formatter.strong(1), 'be': formatter.strong(0)},
@@ -811,7 +817,7 @@
         self.matchLabel = (_('match'), _('matches'))
 
 
-def getSearchResults(request, query, hits, start, sort=None):
+def getSearchResults(request, query, hits, start, sort, estimated_hits):
     result_hits = []
     for wikiname, page, attachment, match in hits:
         if wikiname in (request.cfg.interwikiname, 'Self'): # a local match
@@ -825,5 +831,6 @@
                 attachment, match, page))
     elapsed = time.time() - start
     count = request.rootpage.getPageCount()
-    return SearchResults(query, result_hits, count, elapsed, sort)
+    return SearchResults(query, result_hits, count, elapsed, sort,
+            estimated_hits)
 
--- a/MoinMoin/support/xapwrap/index.py	Thu Aug 10 17:22:56 2006 +0200
+++ b/MoinMoin/support/xapwrap/index.py	Thu Aug 10 17:43:50 2006 +0200
@@ -635,7 +635,7 @@
                         valRes[valName] = xapDoc.get_value(valueIndex)
                     thisResult['values'] = valRes
                 results.append(thisResult)
-            return enq, results
+            return enq, mset, results
         except:
             del enq, mset
             raise