changeset 1236:d2d160c344b7

improved highlighting code to work better with stemming and special searches, extended SystemInfo macro
author Franz Pletz <fpletz AT franz-pletz DOT org>
date Tue, 08 Aug 2006 22:39:15 +0200
parents c6ae37934d31
children 0a947454dec7
files MoinMoin/macro/SystemInfo.py MoinMoin/search/queryparser.py docs/CHANGES.fpletz
diffstat 3 files changed, 15 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/macro/SystemInfo.py	Tue Aug 08 22:10:25 2006 +0200
+++ b/MoinMoin/macro/SystemInfo.py	Tue Aug 08 22:39:15 2006 +0200
@@ -123,6 +123,7 @@
                 _('N/A'))
     row(_('Xapian search'), '%s, %s, %s'
             % (xapState[request.cfg.xapian_search], available, mtime))
+    row(_('Xapian stemming'), xapState[request.cfg.xapian_stemming])
 
     row(_('Active threads'), t_count or _('N/A'))
     buf.write(u'</dl>')
--- a/MoinMoin/search/queryparser.py	Tue Aug 08 22:10:25 2006 +0200
+++ b/MoinMoin/search/queryparser.py	Tue Aug 08 22:39:15 2006 +0200
@@ -336,7 +336,7 @@
                     tmp = []
                     for w, s, pos in analyzer.tokenize(t, flat_stemming=False):
                         tmp.append(UnicodeQuery(Query.OP_OR, (w, s)))
-                        stemmed.append(w)
+                        stemmed.append(s)
                     t = tmp
                 else:
                     # just not stemmed
@@ -344,8 +344,10 @@
                 queries.append(Query(Query.OP_AND, t))
 
             if not self.case and stemmed:
-                self._build_re(' '.join(stemmed), use_re=False,
-                        case=self.case, stemmed=True)
+                new_pat = ' '.join(stemmed)
+                self._pattern = new_pat
+                self._build_re(new_pat, use_re=False, case=self.case,
+                        stemmed=True)
 
         # titlesearch OR parsed wikiwords
         return Query(Query.OP_OR,
@@ -457,7 +459,7 @@
                         tmp.append(UnicodeQuery(Query.OP_OR,
                             ['%s%s' % (Xapian.Index.prefixMap['title'], j)
                                 for j in (w, s)]))
-                        stemmed.append(w)
+                        stemmed.append(s)
                     t = tmp
                 else:
                     # just not stemmed
@@ -467,8 +469,10 @@
                 queries.append(Query(Query.OP_AND, t))
 
             if not self.case and stemmed:
-                self._build_re(' '.join(stemmed), use_re=False,
-                        case=self.case, stemmed=True)
+                new_pat = ' '.join(stemmed)
+                self._pattern = new_pat
+                self._build_re(new_pat, use_re=False, case=self.case,
+                        stemmed=True)
 
         return Query(Query.OP_AND, queries)
 
@@ -635,6 +639,7 @@
             pattern = self.pattern
             return UnicodeQuery('%s%s' % (prefix, pattern))
 
+
 class CategorySearch(TextSearch):
     """ Search the pages belonging to a category """
 
@@ -655,7 +660,7 @@
         return u'%s!"%s"' % (neg, unicode(self._pattern))
 
     def highlight_re(self):
-        return ""
+        return u'(Category%s)' % self._pattern
 
     def xapian_wanted(self):
         return True             # only easy regexps possible
--- a/docs/CHANGES.fpletz	Tue Aug 08 22:10:25 2006 +0200
+++ b/docs/CHANGES.fpletz	Tue Aug 08 22:39:15 2006 +0200
@@ -12,9 +12,6 @@
     * Write/update documentation for all the new search stuff
     * Reevaluate Xapwrap, possibly drop it and rip out usable stuff
       (i.e. ExceptionTranslator)
-    * Add stemming support for highlighting stuff:
-        1. regexp for whole word (all lowercase), or
-        2. just the root of the word
 
   ToDo (low priority):
     * Case-sensitive searches / Regexp on multiple terms: Graceful
@@ -223,4 +220,6 @@
 
 2006-08-08
     * added some more timers for regression testing
+    * improved highlighting code to work better with stemming and
+      special searches, extended SystemInfo macro