changeset 1200:b953b5ff4877

CategorySearch is live
author Franz Pletz <fpletz AT franz-pletz DOT org>
date Mon, 24 Jul 2006 00:56:17 +0200
parents 5ce3bea2e66c
children f29d1f51dbfa
files MoinMoin/search/Xapian.py MoinMoin/search/queryparser.py docs/CHANGES.fpletz
diffstat 3 files changed, 84 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/Xapian.py	Sat Jul 22 22:11:11 2006 +0200
+++ b/MoinMoin/search/Xapian.py	Mon Jul 24 00:56:17 2006 +0200
@@ -320,11 +320,17 @@
     def _get_categories(self, page):
         body = page.get_raw_body()
 
-        sep = re.search(r'----*\r?\n', body)
-        if not sep:
+        prev, next = (0, 1)
+        pos = 0
+        while next:
+            if next != 1:
+                pos += next.end()
+            prev, next = next, re.search(r'----*\r?\n', body[pos:])
+
+        if not prev or prev == 1:
             return []
-        
-        return re.findall('Category(.*)\r?\n', body[sep.end():])
+
+        return re.findall(r'Category([^\s]+)', body[pos:])
 
     def _index_page(self, writer, page, mode='update'):
         """ Index a page - assumes that the write lock is acquired
--- a/MoinMoin/search/queryparser.py	Sat Jul 22 22:11:11 2006 +0200
+++ b/MoinMoin/search/queryparser.py	Mon Jul 24 00:56:17 2006 +0200
@@ -20,8 +20,6 @@
 except ImportError:
     pass
 
-CATEGORY_RE = re.compile('----\(-\*\)\(\\r\)\?\\n\)\(\.\*\)Category(.*)\\b', re.U)
-
 #############################################################################
 ### query objects
 #############################################################################
@@ -274,9 +272,10 @@
         matches = []
 
         # Search in page name
-        results = self.titlesearch.search(page)
-        if results:
-            matches.extend(results)
+        if self.titlesearch:
+            results = self.titlesearch.search(page)
+            if results:
+                matches.extend(results)
 
         # Search in page body
         body = page.get_raw_body()
@@ -629,6 +628,57 @@
             pattern = self.pattern
             return UnicodeQuery('%s%s' % (prefix, pattern))
 
+class CategorySearch(TextSearch):
+    """ Search the pages belonging to a category """
+
+    def __init__(self, *args, **kwargs):
+        TextSearch.__init__(self, *args, **kwargs)
+        self.titlesearch = None
+
+    def _build_re(self, pattern, **kwargs):
+        kwargs['use_re'] = True
+        TextSearch._build_re(self,
+                r'(----(-*)(\r)?\n)(.*)Category%s\b' % pattern, **kwargs)
+
+    def costs(self):
+        return 5000 # cheaper than a TextSearch
+
+    def __unicode__(self):
+        neg = self.negated and '-' or ''
+        return u'%s!"%s"' % (neg, unicode(self._pattern))
+
+    def highlight_re(self):
+        return ""
+
+    def xapian_wanted(self):
+        return True             # only easy regexps possible
+
+    def xapian_need_postproc(self):
+        return self.case
+
+    def xapian_term(self, request, allterms):
+        self.xapian_called = True
+        prefix = Xapian.Index.prefixMap['category']
+        if self.use_re:
+            # basic regex matching per term
+            terms = []
+            found = None
+            n = len(prefix)
+            for term in allterms():
+                if prefix == term[:n]:
+                    found = True
+                    if self.search_re.match(term[n+1:]):
+                        terms.append(term)
+                elif found:
+                    continue
+
+            if not terms:
+                return Query()
+            return Query(Query.OP_OR, terms)
+        else:
+            pattern = self._pattern.lower()
+            return UnicodeQuery('%s:%s' % (prefix, pattern))
+
 
 ##############################################################################
 ### Parse Query
@@ -715,6 +765,7 @@
         case = self.case
         linkto = False
         lang = False
+        category = False
 
         for m in modifiers:
             if "title".startswith(m):
@@ -727,8 +778,20 @@
                 linkto = True
             elif "language".startswith(m):
                 lang = True
+            elif "category".startswith(m):
+                category = True
 
-        if lang:
+        # oh, let's better call xapian if we encouter this nasty regexp ;)
+        if not category:
+            cat_re = re.compile(r'----\(-\*\)\(\\r\)\?\\n\)\(\.\*\)Category(.*)\\b', re.U)
+            cat_match = cat_re.search(text)
+            if cat_match:
+                text = cat_match.groups()[0]
+                category = True
+
+        if category:
+            obj = CategorySearch(text, use_re=False, case=case)
+        elif lang:
             obj = LanguageSearch(text, use_re=regex, case=False)
         elif linkto:
             obj = LinkSearch(text, use_re=regex, case=case)
--- a/docs/CHANGES.fpletz	Sat Jul 22 22:11:11 2006 +0200
+++ b/docs/CHANGES.fpletz	Mon Jul 24 00:56:17 2006 +0200
@@ -30,6 +30,8 @@
     * Faster search thanks to Xapian
     * Searching for languages with new prefix lang/language, i.e. lang:de
       Note: Currently only available when Xapian is used
+    * CategorySearch with prefix category or with the regexp previously
+      used (autodetected as CategorySearch)
     * New config options:
         xapian_search        0      enables xapian-powered search
         xapian_index_dir     None   directory for xapian indices
@@ -187,3 +189,6 @@
       query.
     * Indexing of categories
 
+2006-07-23
+    * CategorySearch is live
+