changeset 2955:c8a3731ce61c

search: add tests, add debug logging, fix bugs (negative search terms failing, and others)
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Fri, 30 Nov 2007 23:41:48 +0100
parents 96319d867986
children a8809feee20c
files MoinMoin/search/_tests/test_search.py MoinMoin/search/builtin.py MoinMoin/search/queryparser.py
diffstat 3 files changed, 158 insertions(+), 52 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/_tests/test_search.py	Fri Nov 30 15:30:20 2007 +0100
+++ b/MoinMoin/search/_tests/test_search.py	Fri Nov 30 23:41:48 2007 +0100
@@ -2,10 +2,13 @@
 """
     MoinMoin - MoinMoin.search Tests
 
-    @copyright: 2005 by Nir Soffer <nirs@freeshell.org>
+    @copyright: 2005 by Nir Soffer <nirs@freeshell.org>,
+                2007 by MoinMoin:ThomasWaldmann
     @license: GNU GPL, see COPYING for details.
 """
 
+import py
+
 from MoinMoin import search
 
 
@@ -41,7 +44,7 @@
             ("a -b c", '["a" -"b" "c"]'),
             ("aaa bbb -ccc", '["aaa" "bbb" -"ccc"]'),
             ("title:aaa title:bbb -title:ccc", '[!"aaa" !"bbb" -!"ccc"]'),
-            ("aaa OR bbb", '["aaa" "OR" "bbb"]'),
+            ("aaa or bbb", '["aaa" or "bbb"]'),
             ]:
             result = parser.parse_query(query)
             assert str(result) == wanted
@@ -49,18 +52,44 @@
 
 class TestSearch:
     """ search: test search """
+    doesnotexist = u'jfhsdaASDLASKDJ'
 
     def testTitleSearchFrontPage(self):
         """ search: title search for FrontPage """
-        query, wanted  = "title:FrontPage", 1
-        result = search.searchPages(self.request, query)
-        assert len(result.hits) == wanted
+        result = search.searchPages(self.request, u"title:FrontPage")
+        assert len(result.hits) == 1
+
+    def testTitleSearchAND(self):
+        """ search: title search with AND expression """
+        result = search.searchPages(self.request, u"title:Help title:Index")
+        assert len(result.hits) == 1
+
+    def testTitleSearchOR(self):
+        """ search: title search with OR expression """
+        result = search.searchPages(self.request, u"title:FrontPage or title:RecentChanges")
+        assert len(result.hits) == 2
+
+    def testTitleSearchNegatedFindAll(self):
+        """ search: negated title search for some pagename that does not exist results in all pagenames """
+        result = search.searchPages(self.request, u"-title:%s" % self.doesnotexist)
+        assert len(result.hits) > 100 # XXX should be "all"
 
     def testTitleSearchNegativeTerm(self):
-        """ search: title search for FrontPage """
-        helpon_count = len(search.searchPages(self.request, "title:HelpOn").hits)
-        result = search.searchPages(self.request, "title:HelpOn -title:Acl")
-        assert len(result.hits) == helpon_count - 1
+        """ search: title search for a AND expression with a negative term """
+        helpon_count = len(search.searchPages(self.request, u"title:HelpOn").hits)
+        result = search.searchPages(self.request, u"title:HelpOn -title:Acl")
+        assert len(result.hits) == helpon_count - 1 # finds all HelpOn* except one
+
+    def testFullSearchNegatedFindAll(self):
+        """ search: negated full search for some string that does not exist results in all pages """
+        result = search.searchPages(self.request, u"-%s" % self.doesnotexist)
+        assert len(result.hits) > 100 # XXX should be "all"
+
+    def testFullSearchNegativeTerm(self):
+        """ search: full search for a AND expression with a negative term """
+        helpon_count = len(search.searchPages(self.request, u"HelpOn").hits)
+        result = search.searchPages(self.request, u"HelpOn -ACL")
+        assert 0 < len(result.hits) < helpon_count
 
 
 coverage_modules = ['MoinMoin.search']
--- a/MoinMoin/search/builtin.py	Fri Nov 30 15:30:20 2007 +0100
+++ b/MoinMoin/search/builtin.py	Fri Nov 30 23:41:48 2007 +0100
@@ -5,7 +5,7 @@
     @copyright: 2005 MoinMoin:FlorianFesti,
                 2005 MoinMoin:NirSoffer,
                 2005 MoinMoin:AlexanderSchremmer,
-                2006 MoinMoin:ThomasWaldmann,
+                2006-2007 MoinMoin:ThomasWaldmann,
                 2006 MoinMoin:FranzPletz
     @license: GNU GPL, see COPYING for details
 """
@@ -454,12 +454,15 @@
         start = time.time()
         if self.request.cfg.xapian_search:
             hits = self._xapianSearch()
+            logging.debug("search: _xapianSearch found %d hits" % len(hits))
         else:
             hits = self._moinSearch()
+            logging.debug("search: moinSearch found %d hits" % len(hits))
 
         # important - filter deleted pages or pages the user may not read!
         if not self.filtered:
             hits = self._filter(hits)
+            logging.debug("search: after filtering: %d hits" % len(hits))
 
         # when xapian was used, we can estimate the numer of matches
         # Note: hits can't be estimated by xapian with historysearch enabled
@@ -619,6 +622,7 @@
 
     def _getHits(self, pages, matchSearchFunction):
         """ Get the hit tuples in pages through matchSearchFunction """
+        logging.debug("search: _getHits searching in %d pages ..." % len(pages))
         hits = []
         revisionCache = {}
         fs_rootpage = self.fs_rootpage
@@ -633,6 +637,7 @@
             wikiname = valuedict['wikiname']
             pagename = valuedict['pagename']
             attachment = valuedict['attachment']
+            logging.debug("search: _getHits processing %r %r %r" % (wikiname, pagename, attachment))
 
             if 'revision' in valuedict and valuedict['revision']:
                 revision = int(valuedict['revision'])
@@ -653,6 +658,7 @@
                         hits.append((wikiname, page, attachment, matches))
                 else:
                     matches = matchSearchFunction(page=page, uid=uid)
+                    logging.debug("search: matchSearchFunction %r returned %r" % (matchSearchFunction, matches))
                     if matches:
                         if not self.historysearch and \
                                 pagename in revisionCache and \
--- a/MoinMoin/search/queryparser.py	Fri Nov 30 15:30:20 2007 +0100
+++ b/MoinMoin/search/queryparser.py	Fri Nov 30 23:41:48 2007 +0100
@@ -5,12 +5,14 @@
     @copyright: 2005 MoinMoin:FlorianFesti,
                 2005 MoinMoin:NirSoffer,
                 2005 MoinMoin:AlexanderSchremmer,
-                2006 MoinMoin:ThomasWaldmann,
+                2006-2007 MoinMoin:ThomasWaldmann,
                 2006 MoinMoin:FranzPletz
     @license: GNU GPL, see COPYING for details
 """
 
 import re
+import logging
+
 from MoinMoin import config, wikiutil
 from MoinMoin.search.results import Match, TitleMatch, TextMatch
 
@@ -139,12 +141,18 @@
         if terms:
             # Create and return a filter function
             def filter(name):
-                """ A function that return True if all terms filter name """
+                """ A function that returns True if all terms filter name """
+                result = None
                 for term in terms:
-                    filter = term.pageFilter()
-                    if not filter(name):
-                        return False
-                return True
+                    _filter = term.pageFilter()
+                    t = _filter(name)
+                    if t is False:
+                        result = False
+                        break
+                    elif t is True:
+                        result = True
+                logging.debug("search: pageFilter AND returns %r" % result)
+                return result
             return filter
 
         return None
@@ -221,6 +229,34 @@
 
     operator = ' or '
 
+    def pageFilter(self):
+        """ Return a page filtering function
+
+        This function is used to filter page list before we search it.
+
+        Return a function that gets a page name, and return bool, or None.
+        """
+        # Sort terms by cost, then get all title searches
+        self.sortByCost()
+        terms = [term for term in self._subterms if isinstance(term, TitleSearch)]
+        if terms:
+            # Create and return a filter function
+            def filter(name):
+                """ A function that returns True if any term filters name """
+                result = None
+                for term in terms:
+                    _filter = term.pageFilter()
+                    t = _filter(name)
+                    if t is True:
+                        return True
+                    elif t is False:
+                        result = False
+                logging.debug("search: pageFilter OR returns %r" % result)
+                return result
+            return filter
+
+        return None
+
     def search(self, page):
         """ Search page with terms
 
@@ -274,6 +310,7 @@
         return u"(%s)" % self._pattern
 
     def search(self, page):
+        logging.debug("search: TextSearch searching page %r for (negated = %r) %r" % (page.page_name, self.negated, self._pattern))
         matches = []
 
         # Search in page name
@@ -305,13 +342,18 @@
                 matches.append(TextMatch(re_match=match))
 
         # Decide what to do with the results.
-        if ((self.negated and matches) or
-            (not self.negated and not matches)):
-            return None
-        elif matches:
-            return matches
-        else:
-            return []
+        if self.negated:
+            if matches:
+                result = None
+            else:
+                result = [Match()] # represents "matched" (but as it was a negative match, we have nothing to show)
+        else: # not negated
+            if matches:
+                result = matches
+            else:
+                result = None
+        logging.debug("search: TextSearch returning %r" % result)
+        return result
 
     def xapian_wanted(self):
         # XXX: Add option for term-based matching
@@ -357,6 +399,7 @@
                 (self.titlesearch.xapian_term(request, allterms),
                     Query(Query.OP_AND, queries)))
 
+
 class TitleSearch(BaseExpression):
     """ Term searches in pattern in page title only """
 
@@ -387,11 +430,14 @@
         """ Page filter function for single title search """
         def filter(name):
             match = self.search_re.search(name)
-            return bool(self.negated) ^ bool(match)
+            result = bool(self.negated) ^ bool(match)
+            logging.debug("search: pageFilter title returns %r (%r)" % (result, self.pattern))
+            return result
         return filter
 
     def search(self, page):
-        # Get matches in page name
+        """ Get matches in page name """
+        logging.debug("search: TitleSearch searching page %r for (negated = %r) %r" % (page.page_name, self.negated, self._pattern))
         matches = []
         for match in self.search_re.finditer(page.page_name):
             if page.request.cfg.xapian_stemming:
@@ -414,13 +460,18 @@
             else:
                 matches.append(TitleMatch(re_match=match))
 
-        if ((self.negated and matches) or
-            (not self.negated and not matches)):
-            return None
-        elif matches:
-            return matches
-        else:
-            return []
+        if self.negated:
+            if matches:
+                result = None
+            else:
+                result = [Match()] # represents "matched" (but as it was a negative match, we have nothing to show)
+        else: # not negated
+            if matches:
+                result = matches
+            else:
+                result = None
+        logging.debug("search: TitleSearch returning %r" % result)
+        return result
 
     def xapian_wanted(self):
         return True # only easy regexps possible
@@ -522,7 +573,8 @@
         return u"(%s)" % self._textpattern
 
     def search(self, page):
-        # Get matches in page name
+        # Get matches in page links
+        logging.debug("search: LinkSearch searching page %r for (negated = %r) %r" % (page.page_name, self.negated, self._pattern))
         matches = []
         Found = True
 
@@ -542,13 +594,18 @@
                 matches.append(TextMatch(0, 0))
 
         # Decide what to do with the results.
-        if ((self.negated and matches) or
-            (not self.negated and not matches)):
-            return None
-        elif matches:
-            return matches
-        else:
-            return []
+        if self.negated:
+            if matches:
+                result = None
+            else:
+                result = [Match()] # represents "matched" (but as it was a negative match, we have nothing to show)
+        else: # not negated
+            if matches:
+                result = matches
+            else:
+                result = None
+        logging.debug("search: LinkSearch returning %r" % result)
+        return result
 
     def xapian_wanted(self):
         return True # only easy regexps possible
@@ -607,6 +664,7 @@
         return ""
 
     def search(self, page):
+        logging.debug("search: LanguageSearch searching page %r for (negated = %r) %r" % (page.page_name, self.negated, self._pattern))
         match = False
         body = page.getPageHeader()
 
@@ -614,12 +672,18 @@
             match = True
 
         # Decide what to do with the results.
-        if self.negated and match:
-            return None
-        elif match or (self.negated and not match):
-            return [Match()]
-        else:
-            return []
+        if self.negated:
+            if match:
+                result = None
+            else:
+                result = [Match()] # represents "matched" (but as it was a negative match, we have nothing to show)
+        else: # not negated
+            if match:
+                result = [Match()] # represents "matched" (but we have nothing to show)
+            else:
+                result = None
+        logging.debug("search: LanguageSearch returning %r" % result)
+        return result
 
     def xapian_wanted(self):
         return True # only easy regexps possible
@@ -791,6 +855,7 @@
         return ""
 
     def search(self, page):
+        logging.debug("search: DomainSearch searching page %r for (negated = %r) %r" % (page.page_name, self.negated, self._pattern))
         checks = {'underlay': page.isUnderlayPage,
                   'standard': page.isStandardPage,
                   'system': lambda page: wikiutil.isSystemPage(page.request, page.page_name),
@@ -802,12 +867,18 @@
             match = False
 
         # Decide what to do with the results.
-        if self.negated and match:
-            return None
-        elif match or (self.negated and not match):
-            return [Match()]
-        else:
-            return []
+        if self.negated:
+            if match:
+                result = None
+            else:
+                result = [Match()] # represents "matched" (but as it was a negative match, we have nothing to show)
+        else: # not negated
+            if match:
+                result = [Match()] # represents "matched" (but we have nothing to show)
+            else:
+                result = None
+        logging.debug("search: DomainSearch returning %r" % result)
+        return result
 
     def xapian_wanted(self):
         return True # only easy regexps possible