changeset 5019:a33dae41f921

Xapian2009: _build_re() does not set self.pattern and self.search_re, but returns them. CategorySearch is subclass of BaseFieldSearch.
author Dmitrijs Milajevs <dimazest@gmail.com>
date Sun, 16 Aug 2009 15:43:12 +0200
parents 67578c72e2d9
children 60ec79504cb4
files MoinMoin/search/builtin.py MoinMoin/search/queryparser.py
diffstat 2 files changed, 30 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/builtin.py	Sun Aug 16 09:52:24 2009 +0200
+++ b/MoinMoin/search/builtin.py	Sun Aug 16 15:43:12 2009 +0200
@@ -672,7 +672,7 @@
         finally:
             clock.stop('_xapianSearch')
 
-        # some postprocessing by _moinSearch is required
-        return MoinSearch(self.request, self.query, self.sort, self.mtime, self.historysearch, pages=None)._search()
+        # some postprocessing by MoinSearch is required
+        return MoinSearch(self.request, self.query, self.sort, self.mtime, self.historysearch, pages=pages)._search()
 
 
--- a/MoinMoin/search/queryparser.py	Sun Aug 16 09:52:24 2009 +0200
+++ b/MoinMoin/search/queryparser.py	Sun Aug 16 15:43:12 2009 +0200
@@ -60,7 +60,7 @@
         if case:
             self._tag += 'case:'
 
-        self._build_re(self._pattern, use_re=use_re, case=case)
+        self.pattern, self.search_re = self._build_re(self._pattern, use_re=use_re, case=case)
 
     def __str__(self):
         return unicode(self).encode(config.charset, 'replace')
@@ -119,19 +119,14 @@
     def _build_re(self, pattern, use_re=False, case=False, stemmed=False):
         """ Make a regular expression out of a text pattern """
         flags = case and re.U or (re.I | re.U)
-        if use_re:
-            try:
-                self.search_re = re.compile(pattern, flags)
-            except re.error:
-                pattern = re.escape(pattern)
-                self.pattern = pattern
-                self.search_re = re.compile(pattern, flags)
-            else:
-                self.pattern = pattern
-        else:
+
+        try:
+            search_re = re.compile(pattern, flags)
+        except re.error:
             pattern = re.escape(pattern)
-            self.search_re = re.compile(pattern, flags)
-            self.pattern = pattern
+            search_re = re.compile(pattern, flags)
+
+        return pattern, search_re
 
     def _get_query_for_search_re(self, connection, field_to_check=None):
         """
@@ -339,17 +334,6 @@
 
     costs = 10000
 
-    def __init__(self, pattern, use_re=False, case=False):
-        """ Init a text search
-
-        @param pattern: pattern to search for, ascii string or unicode
-        @param use_re: treat pattern as re of plain text, bool
-        @param case: do case sensitive search, bool
-        """
-        super(TextSearch, self).__init__(pattern, use_re, case)
-
-        self.titlesearch = TitleSearch(self._pattern, use_re=use_re, case=case)
-
     def highlight_re(self):
         return u"(%s)" % self.pattern
 
@@ -357,10 +341,9 @@
         matches = []
 
         # Search in page name
-        if self.titlesearch:
-            results = self.titlesearch.search(page)
-            if results:
-                matches.extend(results)
+        results = TitleSearch(self._pattern, use_re=self.use_re, case=self.case)._get_matches(page)
+        if results:
+            matches.extend(results)
 
         # Search in page body
         body = page.get_raw_body()
@@ -398,16 +381,14 @@
 
                 queries.append(Query(connection.OP_AND, t))
 
+            # XXX Is it required to change pattern and search_re here?
             if not self.case and stemmed:
                 new_pat = ' '.join(stemmed)
                 self._pattern = new_pat
-                self._build_re(new_pat, use_re=False, case=self.case, stemmed=True)
+                self.pattern, self.search_re = self._build_re(new_pat, use_re=False, case=self.case, stemmed=True)
 
-        # titlesearch OR parsed wikiwords
-        return Query(Query.OP_OR,
-                     # XXX allterms for titlesearch
-                     [self.titlesearch.xapian_term(request, connection),
-                      Query(Query.OP_AND, queries)])
+        title_query = TitleSearch(self._pattern, use_re=self.use_re, case=self.case).xapian_term(request, connection)
+        return Query(Query.OP_OR, [title_query, Query(Query.OP_AND, queries)])
 
 
 class TitleSearch(BaseExpression):
@@ -557,18 +538,14 @@
             return []
 
 
-class CategorySearch(TextSearch):
+class CategorySearch(BaseFieldSearch):
     """ Search the pages belonging to a category """
 
     _tag = 'category:'
+    _field_to_search = 'category'
     costs = 5000 # cheaper than a TextSearch
 
-    def __init__(self, pattern, use_re=False, case=True):
-        super(CategorySearch, self).__init__(pattern, use_re, case=case)
-
-        self.titlesearch = None
-
-    def _build_re(self, pattern, **kwargs):
+    def _get_matches(self, page):
         """ match categories like this:
             ... some page text ...
             ----
@@ -580,12 +557,16 @@
                   must be on a single line either directly below the ---- or
                   directly below some comment lines.
         """
-        kwargs['use_re'] = True
-        # XXX This breaks xapian_term because xapian index stores just categories (without "-----").
-        # Thus, self._get_query_for_search_re() can not mach anything, and empty query is returned.
-        TextSearch._build_re(self,
-                             r'(?m)(^-----*\s*\r?\n)(^##.*\r?\n)*^(?!##)(.*)\b%s\b' % pattern,
-                             **kwargs)
+        matches = []
+
+        pattern = r'(?m)(^-----*\s*\r?\n)(^##.*\r?\n)*^(?!##)(.*)\b%s\b' % self.pattern
+        search_re = self._build_re(pattern, use_re=self.use_re, case=self.case)[1] # we need only a regexp, but not a pattern
+
+        body = page.get_raw_body()
+        for match in search_re.finditer(body):
+            matches.append(TextMatch(re_match=match))
+
+        return matches
 
     def highlight_re(self):
         return u'(\\b%s\\b)' % self._pattern