changeset 1197:fc2d00e2bb6b

case-sensitive searches work again by using moinSearch for post processing
author Franz Pletz <fpletz AT franz-pletz DOT org>
date Sat, 22 Jul 2006 17:24:41 +0200
parents 98b9469ce6ac
children 277b97ba0700
files MoinMoin/search/builtin.py MoinMoin/search/queryparser.py docs/CHANGES.fpletz
diffstat 3 files changed, 49 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/builtin.py	Fri Jul 21 01:55:19 2006 +0200
+++ b/MoinMoin/search/builtin.py	Sat Jul 22 17:24:41 2006 +0200
@@ -393,7 +393,7 @@
         """
         pages = None
         index = self._xapianIndex(self.request)
-        if index: #and self.query.xapian_wanted():
+        if index and self.query.xapian_wanted():
             self.request.clock.start('_xapianSearch')
             try:
                 from MoinMoin.support import xapwrap
@@ -408,8 +408,9 @@
                     for k, v in d.items():
                         d[k] = d[k].decode(config.charset)
                     return d
-                pages = [{'uid': hit['uid'], 'values': dict_decode(hit['values'])}
-                        for hit in hits]
+                #pages = [{'uid': hit['uid'], 'values': dict_decode(hit['values'])}
+                #        for hit in hits]
+                pages = [dict_decode(hit['values']) for hit in hits]
                 self.request.log("xapianSearch: finds pages: %r" % pages)
                 self._xapianEnquire = enq
                 self._xapianIndex = index
@@ -418,9 +419,11 @@
             #except AttributeError:
             #    pages = []
             self.request.clock.stop('_xapianSearch')
-            return self._getHits(hits, self._xapianMatch)
-        else:
-            return self._moinSearch(pages)
+
+            if not self.query.xapian_need_postproc():
+                return self._getHits(hits, self._xapianMatch)
+        
+        return self._moinSearch(pages)
 
     def _xapianMatchDecider(self, term, pos):
         if term[0] == 'S':      # TitleMatch
--- a/MoinMoin/search/queryparser.py	Fri Jul 21 01:55:19 2006 +0200
+++ b/MoinMoin/search/queryparser.py	Sat Jul 22 17:24:41 2006 +0200
@@ -177,6 +177,12 @@
             wanted = wanted and term.xapian_wanted()
         return wanted
 
+    def xapian_need_postproc(self):
+        for term in self._subterms:
+            if term.xapian_need_postproc():
+                return True
+        return False
+
     def xapian_term(self, request, allterms):
         # sort negated terms
         terms = []
@@ -301,8 +307,12 @@
             return []
 
     def xapian_wanted(self):
+        # XXX: Add option for term-based matching
         return not self.use_re
 
+    def xapian_need_postproc(self):
+        return self.case
+
     def xapian_term(self, request, allterms):
         if self.use_re:
             # basic regex matching per term
@@ -332,7 +342,7 @@
                     t = [UnicodeQuery(w) for w, pos in analyzer.tokenize(t)]
                 queries.append(Query(Query.OP_AND, t))
 
-            if stemmed:
+            if not self.case and stemmed:
                 self._build_re(' '.join(stemmed), use_re=False,
                         case=self.case, stemmed=True)
 
@@ -383,7 +393,8 @@
         for match in self.search_re.finditer(page.page_name):
             if page.request.cfg.xapian_stemming:
                 # somewhere in regular word
-                if page.page_name[match.start()] not in config.chars_upper and \
+                if not self.case and \
+                        page.page_name[match.start()] not in config.chars_upper and \
                         page.page_name[match.start()-1] in config.chars_lower:
                     continue
 
@@ -408,7 +419,10 @@
             return []
 
     def xapian_wanted(self):
-        return not self.use_re
+        return True             # only easy regexps possible
+
+    def xapian_need_postproc(self):
+        return self.case
 
     def xapian_term(self, request, allterms):
         if self.use_re:
@@ -444,7 +458,7 @@
 
                 queries.append(Query(Query.OP_AND, t))
 
-            if stemmed:
+            if not self.case and stemmed:
                 self._build_re(' '.join(stemmed), use_re=False,
                         case=self.case, stemmed=True)
 
@@ -522,7 +536,10 @@
             return []
 
     def xapian_wanted(self):
-        return not self.use_re
+        return True             # only easy regexps possible
+
+    def xapian_need_postproc(self):
+        return self.case
 
     def xapian_term(self, request, allterms):
         prefix = Xapian.Index.prefixMap['linkto']
@@ -560,7 +577,7 @@
         self._pattern = pattern.lower()
         self.negated = 0
         self.use_re = use_re
-        self.case = case
+        self.case = False       # not case-sensitive!
         self.xapian_called = False
         self._build_re(self._pattern, use_re=use_re, case=case)
 
@@ -582,7 +599,10 @@
             return [Match()]
 
     def xapian_wanted(self):
-        return not self.use_re
+        return True             # only easy regexps possible
+
+    def xapian_need_postproc(self):
+        return False            # case-sensitivity would make no sense
 
     def xapian_term(self, request, allterms):
         self.xapian_called = True
--- a/docs/CHANGES.fpletz	Fri Jul 21 01:55:19 2006 +0200
+++ b/docs/CHANGES.fpletz	Sat Jul 22 17:24:41 2006 +0200
@@ -19,9 +19,15 @@
         1. regexp for whole word (all lowercase), or
         2. just the root of the word
     * Subpages: Add positions for complete (!) pagenames into the index
+    * Check if permissions/acls are always obeyed
+
+  ToDo (low priority):
     * Case-sensitive searches / Regexp on multiple terms: Graceful
       fallback to and/or merge with moinSearch based on nodes xapian can
       handle in the search term tree
+      * currently, xapian will fetch relevant pages and feed those into
+        _moinSearch for doing the real hard stuff it can't handle
+      -> need for a query optimizer, after SoC?
 
   New Features:
     * Faster search thanks to Xapian
@@ -176,3 +182,10 @@
       cooperation between moinSearch and Xapian for case-sensitive
       searches (code buried): We probably need a rather big rewrite!
 
+2006-07-21
+2006-07-22
+    * Final thoughts: No query optimizer for now. Case-sensitive
+      sensitive search is done by querying Xapian with the lowercased
+      terms and run _moinSearch over the relevant pages with the same
+      query.
+