changeset 5013:008009d8cfa0

Xapian2009: Costs computatin for expressions was refactored.
author Dmitrijs Milajevs <dimazest@gmail.com>
date Sat, 15 Aug 2009 19:03:39 +0200
parents 7ab1128bae9a
children c56728051226
files MoinMoin/search/queryparser.py
diffstat 1 files changed, 16 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/queryparser.py	Sat Aug 15 18:59:59 2009 +0200
+++ b/MoinMoin/search/queryparser.py	Sat Aug 15 19:03:39 2009 +0200
@@ -37,6 +37,10 @@
 class BaseExpression(object):
     """ Base class for all search terms """
 
+    # costs is estimated time to calculate this term.
+    # Number is relative to other terms and has no real unit.
+    # It allows to do the fast searches first.
+    costs = 0
     _tag = ""
 
     def __init__(self, pattern, use_re=False, case=False):
@@ -105,14 +109,6 @@
         logging.debug("%s returning %r" % (self.__class__, result))
         return result
 
-    def costs(self):
-        """ Return estimated time to calculate this term
-
-        Number is relative to other terms and has no real unit.
-        It allows to do the fast searches first.
-        """
-        return 0
-
     def highlight_re(self):
         """ Return a regular expression of what the term searches for
 
@@ -180,21 +176,18 @@
 
     def __init__(self, *terms):
         self._subterms = list(terms)
-        self._costs = 0
-        for t in self._subterms:
-            self._costs += t.costs()
         self.negated = 0
 
     def append(self, expression):
         """ Append another term """
         self._subterms.append(expression)
-        self._costs += expression.costs()
 
     def subterms(self):
         return self._subterms
 
+    @property
     def costs(self):
-        return self._costs
+        return sum([t.costs for t in self._subterms])
 
     def __unicode__(self):
         result = ''
@@ -232,9 +225,7 @@
         return None
 
     def sortByCost(self):
-        tmp = [(term.costs(), term) for term in self._subterms]
-        tmp.sort()
-        self._subterms = [item[1] for item in tmp]
+        self._subterms.sort(key=lambda t: t.costs)
 
     def search(self, page):
         """ Search for each term, cheap searches first """
@@ -364,6 +355,8 @@
     additional TitleSearch term.
     """
 
+    costs = 10000
+
     def __init__(self, pattern, use_re=False, case=False):
         """ Init a text search
 
@@ -375,9 +368,6 @@
 
         self.titlesearch = TitleSearch(self._pattern, use_re=use_re, case=case)
 
-    def costs(self):
-        return 10000
-
     def highlight_re(self):
         return u"(%s)" % self.pattern
 
@@ -446,9 +436,7 @@
     """ Term searches in pattern in page title only """
 
     _tag = 'title:'
-
-    def costs(self):
-        return 100
+    costs = 100
 
     def pageFilter(self):
         """ Page filter function for single title search """
@@ -528,6 +516,7 @@
 
     _tag = 'linkto:'
     _field_to_search = 'linkto'
+    costs = 5000 # cheaper than a TextSearch
 
     def __init__(self, pattern, use_re=False, case=True):
         """ Init a link search
@@ -542,9 +531,6 @@
         self._textpattern = '(' + pattern.replace('/', '|') + ')' # used for search in text
         self.textsearch = TextSearch(self._textpattern, use_re=True, case=case)
 
-    def costs(self):
-        return 5000 # cheaper than a TextSearch
-
     def highlight_re(self):
         return u"(%s)" % self._textpattern
 
@@ -578,6 +564,7 @@
 
     _tag = 'language:'
     _field_to_search = 'lang'
+    costs = 5000 # cheaper than a TextSearch
 
     def __init__(self, pattern, use_re=False, case=False):
         """ Init a language search
@@ -589,9 +576,6 @@
         # iso language code, always lowercase and not case-sensitive
         super(LanguageSearch, self).__init__(pattern.lower(), use_re, case=False)
 
-    def costs(self):
-        return 5000 # cheaper than a TextSearch
-
     def _get_matches(self, page):
 
         if self.pattern == page.pi['language']:
@@ -602,10 +586,12 @@
     def xapian_wanted(self):
         return True # only easy regexps possible
 
+
 class CategorySearch(TextSearch):
     """ Search the pages belonging to a category """
 
     _tag = 'category:'
+    costs = 5000 # cheaper than a TextSearch
 
     def __init__(self, pattern, use_re=False, case=True):
         super(CategorySearch, self).__init__(pattern, use_re, case=case)
@@ -631,9 +617,6 @@
                              r'(?m)(^-----*\s*\r?\n)(^##.*\r?\n)*^(?!##)(.*)\b%s\b' % pattern,
                              **kwargs)
 
-    def costs(self):
-        return 5000 # cheaper than a TextSearch
-
     def highlight_re(self):
         return u'(\\b%s\\b)' % self._pattern
 
@@ -656,6 +639,7 @@
 
     _tag = 'mimetype:'
     _field_to_search = 'mimetype'
+    costs = 5000 # cheaper than a TextSearch
 
     def __init__(self, pattern, use_re=False, case=False):
         """ Init a mimetype search
@@ -667,9 +651,6 @@
         # always lowercase and not case-sensitive
         super(MimetypeSearch, self).__init__(pattern.lower(), use_re, case=False)
 
-    def costs(self):
-        return 5000 # cheaper than a TextSearch
-
     def _get_matches(self, page):
 
         page_mimetype = u'text/%s' % page.pi['format']
@@ -688,6 +669,7 @@
 
     _tag = 'domain:'
     _field_to_search = 'domain'
+    costs = 5000 # cheaper than a TextSearch
 
     def __init__(self, pattern, use_re=False, case=False):
         """ Init a domain search
@@ -699,9 +681,6 @@
         # always lowercase and not case-sensitive
         super(DomainSearch, self).__init__(pattern.lower(), use_re, case=False)
 
-    def costs(self):
-        return 5000 # cheaper than a TextSearch
-
     def _get_matches(self, page):
         checks = {'underlay': page.isUnderlayPage,
                   'standard': page.isStandardPage,