Mercurial > moin > 1.9
changeset 2955:c8a3731ce61c
search: add tests, add debug logging, fix bugs (negative search terms failing, and others)
author | Thomas Waldmann <tw AT waldmann-edv DOT de> |
---|---|
date | Fri, 30 Nov 2007 23:41:48 +0100 |
parents | 96319d867986 |
children | a8809feee20c |
files | MoinMoin/search/_tests/test_search.py MoinMoin/search/builtin.py MoinMoin/search/queryparser.py |
diffstat | 3 files changed, 158 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/MoinMoin/search/_tests/test_search.py Fri Nov 30 15:30:20 2007 +0100 +++ b/MoinMoin/search/_tests/test_search.py Fri Nov 30 23:41:48 2007 +0100 @@ -2,10 +2,13 @@ """ MoinMoin - MoinMoin.search Tests - @copyright: 2005 by Nir Soffer <nirs@freeshell.org> + @copyright: 2005 by Nir Soffer <nirs@freeshell.org>, + 2007 by MoinMoin:ThomasWaldmann @license: GNU GPL, see COPYING for details. """ +import py + from MoinMoin import search @@ -41,7 +44,7 @@ ("a -b c", '["a" -"b" "c"]'), ("aaa bbb -ccc", '["aaa" "bbb" -"ccc"]'), ("title:aaa title:bbb -title:ccc", '[!"aaa" !"bbb" -!"ccc"]'), - ("aaa OR bbb", '["aaa" "OR" "bbb"]'), + ("aaa or bbb", '["aaa" or "bbb"]'), ]: result = parser.parse_query(query) assert str(result) == wanted @@ -49,18 +52,44 @@ class TestSearch: """ search: test search """ + doesnotexist = u'jfhsdaASDLASKDJ' def testTitleSearchFrontPage(self): """ search: title search for FrontPage """ - query, wanted = "title:FrontPage", 1 - result = search.searchPages(self.request, query) - assert len(result.hits) == wanted + result = search.searchPages(self.request, u"title:FrontPage") + assert len(result.hits) == 1 + + def testTitleSearchAND(self): + """ search: title search with AND expression """ + result = search.searchPages(self.request, u"title:Help title:Index") + assert len(result.hits) == 1 + + def testTitleSearchOR(self): + """ search: title search with OR expression """ + result = search.searchPages(self.request, u"title:FrontPage or title:RecentChanges") + assert len(result.hits) == 2 + + def testTitleSearchNegatedFindAll(self): + """ search: negated title search for some pagename that does not exist results in all pagenames """ + result = search.searchPages(self.request, u"-title:%s" % self.doesnotexist) + assert len(result.hits) > 100 # XXX should be "all" def testTitleSearchNegativeTerm(self): - """ search: title search for FrontPage """ - helpon_count = len(search.searchPages(self.request, "title:HelpOn").hits) - result = search.searchPages(self.request, "title:HelpOn -title:Acl") - assert len(result.hits) == helpon_count - 1 + """ search: title search for a AND expression with a negative term """ + helpon_count = len(search.searchPages(self.request, u"title:HelpOn").hits) + result = search.searchPages(self.request, u"title:HelpOn -title:Acl") + assert len(result.hits) == helpon_count - 1 # finds all HelpOn* except one + + def testFullSearchNegatedFindAll(self): + """ search: negated full search for some string that does not exist results in all pages """ + result = search.searchPages(self.request, u"-%s" % self.doesnotexist) + assert len(result.hits) > 100 # XXX should be "all" + + def testFullSearchNegativeTerm(self): + """ search: full search for a AND expression with a negative term """ + helpon_count = len(search.searchPages(self.request, u"HelpOn").hits) + result = search.searchPages(self.request, u"HelpOn -ACL") + assert 0 < len(result.hits) < helpon_count coverage_modules = ['MoinMoin.search']
--- a/MoinMoin/search/builtin.py Fri Nov 30 15:30:20 2007 +0100 +++ b/MoinMoin/search/builtin.py Fri Nov 30 23:41:48 2007 +0100 @@ -5,7 +5,7 @@ @copyright: 2005 MoinMoin:FlorianFesti, 2005 MoinMoin:NirSoffer, 2005 MoinMoin:AlexanderSchremmer, - 2006 MoinMoin:ThomasWaldmann, + 2006-2007 MoinMoin:ThomasWaldmann, 2006 MoinMoin:FranzPletz @license: GNU GPL, see COPYING for details """ @@ -454,12 +454,15 @@ start = time.time() if self.request.cfg.xapian_search: hits = self._xapianSearch() + logging.debug("search: _xapianSearch found %d hits" % len(hits)) else: hits = self._moinSearch() + logging.debug("search: moinSearch found %d hits" % len(hits)) # important - filter deleted pages or pages the user may not read! if not self.filtered: hits = self._filter(hits) + logging.debug("search: after filtering: %d hits" % len(hits)) # when xapian was used, we can estimate the numer of matches # Note: hits can't be estimated by xapian with historysearch enabled @@ -619,6 +622,7 @@ def _getHits(self, pages, matchSearchFunction): """ Get the hit tuples in pages through matchSearchFunction """ + logging.debug("search: _getHits searching in %d pages ..." % len(pages)) hits = [] revisionCache = {} fs_rootpage = self.fs_rootpage @@ -633,6 +637,7 @@ wikiname = valuedict['wikiname'] pagename = valuedict['pagename'] attachment = valuedict['attachment'] + logging.debug("search: _getHits processing %r %r %r" % (wikiname, pagename, attachment)) if 'revision' in valuedict and valuedict['revision']: revision = int(valuedict['revision']) @@ -653,6 +658,7 @@ hits.append((wikiname, page, attachment, matches)) else: matches = matchSearchFunction(page=page, uid=uid) + logging.debug("search: matchSearchFunction %r returned %r" % (matchSearchFunction, matches)) if matches: if not self.historysearch and \ pagename in revisionCache and \
--- a/MoinMoin/search/queryparser.py Fri Nov 30 15:30:20 2007 +0100 +++ b/MoinMoin/search/queryparser.py Fri Nov 30 23:41:48 2007 +0100 @@ -5,12 +5,14 @@ @copyright: 2005 MoinMoin:FlorianFesti, 2005 MoinMoin:NirSoffer, 2005 MoinMoin:AlexanderSchremmer, - 2006 MoinMoin:ThomasWaldmann, + 2006-2007 MoinMoin:ThomasWaldmann, 2006 MoinMoin:FranzPletz @license: GNU GPL, see COPYING for details """ import re +import logging + from MoinMoin import config, wikiutil from MoinMoin.search.results import Match, TitleMatch, TextMatch @@ -139,12 +141,18 @@ if terms: # Create and return a filter function def filter(name): - """ A function that return True if all terms filter name """ + """ A function that returns True if all terms filter name """ + result = None for term in terms: - filter = term.pageFilter() - if not filter(name): - return False - return True + _filter = term.pageFilter() + t = _filter(name) + if t is False: + result = False + break + elif t is True: + result = True + logging.debug("search: pageFilter AND returns %r" % result) + return result return filter return None @@ -221,6 +229,34 @@ operator = ' or ' + def pageFilter(self): + """ Return a page filtering function + + This function is used to filter page list before we search it. + + Return a function that gets a page name, and return bool, or None. + """ + # Sort terms by cost, then get all title searches + self.sortByCost() + terms = [term for term in self._subterms if isinstance(term, TitleSearch)] + if terms: + # Create and return a filter function + def filter(name): + """ A function that returns True if any term filters name """ + result = None + for term in terms: + _filter = term.pageFilter() + t = _filter(name) + if t is True: + return True + elif t is False: + result = False + logging.debug("search: pageFilter OR returns %r" % result) + return result + return filter + + return None + def search(self, page): """ Search page with terms @@ -274,6 +310,7 @@ return u"(%s)" % self._pattern def search(self, page): + logging.debug("search: TextSearch searching page %r for (negated = %r) %r" % (page.page_name, self.negated, self._pattern)) matches = [] # Search in page name @@ -305,13 +342,18 @@ matches.append(TextMatch(re_match=match)) # Decide what to do with the results. - if ((self.negated and matches) or - (not self.negated and not matches)): - return None - elif matches: - return matches - else: - return [] + if self.negated: + if matches: + result = None + else: + result = [Match()] # represents "matched" (but as it was a negative match, we have nothing to show) + else: # not negated + if matches: + result = matches + else: + result = None + logging.debug("search: TextSearch returning %r" % result) + return result def xapian_wanted(self): # XXX: Add option for term-based matching @@ -357,6 +399,7 @@ (self.titlesearch.xapian_term(request, allterms), Query(Query.OP_AND, queries))) + class TitleSearch(BaseExpression): """ Term searches in pattern in page title only """ @@ -387,11 +430,14 @@ """ Page filter function for single title search """ def filter(name): match = self.search_re.search(name) - return bool(self.negated) ^ bool(match) + result = bool(self.negated) ^ bool(match) + logging.debug("search: pageFilter title returns %r (%r)" % (result, self.pattern)) + return result return filter def search(self, page): - # Get matches in page name + """ Get matches in page name """ + logging.debug("search: TitleSearch searching page %r for (negated = %r) %r" % (page.page_name, self.negated, self._pattern)) matches = [] for match in self.search_re.finditer(page.page_name): if page.request.cfg.xapian_stemming: @@ -414,13 +460,18 @@ else: matches.append(TitleMatch(re_match=match)) - if ((self.negated and matches) or - (not self.negated and not matches)): - return None - elif matches: - return matches - else: - return [] + if self.negated: + if matches: + result = None + else: + result = [Match()] # represents "matched" (but as it was a negative match, we have nothing to show) + else: # not negated + if matches: + result = matches + else: + result = None + logging.debug("search: TitleSearch returning %r" % result) + return result def xapian_wanted(self): return True # only easy regexps possible @@ -522,7 +573,8 @@ return u"(%s)" % self._textpattern def search(self, page): - # Get matches in page name + # Get matches in page links + logging.debug("search: LinkSearch searching page %r for (negated = %r) %r" % (page.page_name, self.negated, self._pattern)) matches = [] Found = True @@ -542,13 +594,18 @@ matches.append(TextMatch(0, 0)) # Decide what to do with the results. - if ((self.negated and matches) or - (not self.negated and not matches)): - return None - elif matches: - return matches - else: - return [] + if self.negated: + if matches: + result = None + else: + result = [Match()] # represents "matched" (but as it was a negative match, we have nothing to show) + else: # not negated + if matches: + result = matches + else: + result = None + logging.debug("search: LinkSearch returning %r" % result) + return result def xapian_wanted(self): return True # only easy regexps possible @@ -607,6 +664,7 @@ return "" def search(self, page): + logging.debug("search: LanguageSearch searching page %r for (negated = %r) %r" % (page.page_name, self.negated, self._pattern)) match = False body = page.getPageHeader() @@ -614,12 +672,18 @@ match = True # Decide what to do with the results. - if self.negated and match: - return None - elif match or (self.negated and not match): - return [Match()] - else: - return [] + if self.negated: + if match: + result = None + else: + result = [Match()] # represents "matched" (but as it was a negative match, we have nothing to show) + else: # not negated + if match: + result = [Match()] # represents "matched" (but we have nothing to show) + else: + result = None + logging.debug("search: LanguageSearch returning %r" % result) + return result def xapian_wanted(self): return True # only easy regexps possible @@ -791,6 +855,7 @@ return "" def search(self, page): + logging.debug("search: DomainSearch searching page %r for (negated = %r) %r" % (page.page_name, self.negated, self._pattern)) checks = {'underlay': page.isUnderlayPage, 'standard': page.isStandardPage, 'system': lambda page: wikiutil.isSystemPage(page.request, page.page_name), @@ -802,12 +867,18 @@ match = False # Decide what to do with the results. - if self.negated and match: - return None - elif match or (self.negated and not match): - return [Match()] - else: - return [] + if self.negated: + if match: + result = None + else: + result = [Match()] # represents "matched" (but as it was a negative match, we have nothing to show) + else: # not negated + if match: + result = [Match()] # represents "matched" (but we have nothing to show) + else: + result = None + logging.debug("search: DomainSearch returning %r" % result) + return result def xapian_wanted(self): return True # only easy regexps possible