changeset 4972:af698a181b01

Xapian2009: AndExpression and TextSearch xapian_term() refactoring. It does not receive allterms parameter, but xappy.SearchConnection. Index.search takes AndExpression as a query and converts it to a xapian query by xapian_term(). Xapian.Index.allterms() was deleted.
author Dmitrijs Milajevs <dimazest@gmail.com>
date Sat, 01 Aug 2009 17:58:53 +0200
parents 21bc8092a009
children 2f16bd87444d
files MoinMoin/search/Xapian.py MoinMoin/search/builtin.py MoinMoin/search/queryparser.py
diffstat 3 files changed, 31 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/Xapian.py	Fri Jul 31 17:24:07 2009 +0200
+++ b/MoinMoin/search/Xapian.py	Sat Aug 01 17:58:53 2009 +0200
@@ -213,7 +213,7 @@
 
     def _check_version(self):
         """ Checks if the correct version of Xapian is installed """
-        # XXX xappy checks version on import! 
+        # XXX xappy checks version on import!
         # every version greater than or equal to XAPIAN_MIN_VERSION is allowed
         XAPIAN_MIN_VERSION = (1, 0, 0)
         major, minor, revision = xapian.major_version(), xapian.minor_version(), xapian.revision()
@@ -263,6 +263,7 @@
             kw['sortby'] = 'pagename'
 
         # Try to get first 1000 hits.
+        query = query.xapian_term(self.request, searcher)
         hits = searcher.search(query, 0, 1000, **kw)
         # If there are more hits, get them all
         if hits.more_matches:
@@ -293,14 +294,6 @@
 
         writer.close()
 
-    def allterms(self):
-        """ Fetches all terms in the Xapian index """
-        db = xapidx.ExceptionTranslater.openIndex(True, self.dir)
-        i = db.allterms_begin()
-        while i != db.allterms_end():
-            yield i.get_term()
-            i.next()
-
     def termpositions(self, uid, term):
         """ Fetches all positions of a term in a document
 
--- a/MoinMoin/search/builtin.py	Fri Jul 31 17:24:07 2009 +0200
+++ b/MoinMoin/search/builtin.py	Sat Aug 01 17:58:53 2009 +0200
@@ -634,8 +634,7 @@
         clock.start('_xapianSearch')
         try:
             clock.start('_xapianQuery')
-            query = self.query.xapian_term(self.request, index.allterms)
-            search_results = index.search(query, sort=self.sort, historysearch=self.historysearch)
+            search_results = index.search(self.query, sort=self.sort, historysearch=self.historysearch)
             clock.stop('_xapianQuery')
             logging.debug("_xapianSearch: finds: %r" % search_results)
             self._xapianIndex = index
@@ -648,7 +647,7 @@
                   'wikiname': r.data['wikiname'][0],
                   'pagename': r.data['pagename'][0],
                   'attachment': r.data['attachment'][0],
-                  'revision': r.data['revision'][0]}
+                  'revision': r.data.get('revision',[0])[0]}
                  for r in search_results]
 
         try:
--- a/MoinMoin/search/queryparser.py	Fri Jul 31 17:24:07 2009 +0200
+++ b/MoinMoin/search/queryparser.py	Sat Aug 01 17:58:53 2009 +0200
@@ -206,15 +206,15 @@
                 return True
         return False
 
-    def xapian_term(self, request, allterms):
+    def xapian_term(self, request, connection):
         # sort negated terms
         terms = []
         not_terms = []
         for term in self._subterms:
             if not term.negated:
-                terms.append(term.xapian_term(request, allterms))
+                terms.append(term.xapian_term(request, connection))
             else:
-                not_terms.append(term.xapian_term(request, allterms))
+                not_terms.append(term.xapian_term(request, connection))
 
         # prepare query for not negated terms
         if not terms:
@@ -363,13 +363,17 @@
     def xapian_need_postproc(self):
         return self.case
 
-    def xapian_term(self, request, allterms):
+    def xapian_term(self, request, connection):
+        # XXX next version of xappy (>0.5) will provide Query class
+        # it should be used.
         if self.use_re:
-            # basic regex matching per term
-            terms = [term for term in allterms() if self.search_re.match(term)]
-            if not terms:
-                return Query()
-            queries = [Query(Query.OP_OR, terms)]
+            # XXX
+            pass
+#             basic regex matching per term
+#             terms = [term for term in allterms() if self.search_re.match(term)]
+#             if not terms:
+#                 return Query()
+#             queries = [Query(Query.OP_OR, terms)]
         else:
             analyzer = Xapian.WikiAnalyzer(request=request, language=request.cfg.language_default)
             terms = self._pattern.split()
@@ -377,18 +381,22 @@
             # all parsed wikiwords, AND'ed
             queries = []
             stemmed = []
-            for t in terms:
+
+            for term in terms:
                 if request.cfg.xapian_stemming:
                     # stemmed OR not stemmed
-                    tmp = []
-                    for w, s, pos in analyzer.tokenize(t, flat_stemming=False):
-                        tmp.append(UnicodeQuery(Query.OP_OR, (w, s)))
+                    t = []
+                    for w, s, pos in analyzer.tokenize(term, flat_stemming=False):
+                        query_word = connection.query_field('content', w)
+                        query_stemmed = connection.query_field('content', s)
+                        # XXX UnicodeQuery was used here!
+                        t.append(Query(Query.OP_OR, [query_word, query_stemmed]))
                         stemmed.append(s)
-                    t = tmp
                 else:
                     # just not stemmed
-                    t = [UnicodeQuery(w) for w, pos in analyzer.tokenize(t)]
-                queries.append(Query(Query.OP_AND, t))
+                    t = [connection.query_field('content', w) for w, pos in analyzer.tokenize(term)]
+
+                queries.append(Query(connection.OP_AND, t))
 
             if not self.case and stemmed:
                 new_pat = ' '.join(stemmed)
@@ -397,8 +405,9 @@
 
         # titlesearch OR parsed wikiwords
         return Query(Query.OP_OR,
-                (self.titlesearch.xapian_term(request, allterms),
-                    Query(Query.OP_AND, queries)))
+                     # XXX allterms for titlesearch
+                     [self.titlesearch.xapian_term(request, []),
+                      Query(Query.OP_AND, queries)])
 
 
 class TitleSearch(BaseExpression):