changeset 1376:649b60a4064a

domain-secific search (for system pages)
author Franz Pletz <fpletz AT franz-pletz DOT org>
date Sat, 19 Aug 2006 13:35:49 +0200
parents d5741f2b8292
children bb37beca7545
files MoinMoin/action/fullsearch.py MoinMoin/macro/AdvancedSearch.py MoinMoin/search/queryparser.py docs/CHANGES.fpletz
diffstat 4 files changed, 81 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/action/fullsearch.py	Sat Aug 19 13:17:44 2006 +0200
+++ b/MoinMoin/action/fullsearch.py	Sat Aug 19 13:35:49 2006 +0200
@@ -67,6 +67,7 @@
         language = request.form.get('language',
                 [request.cfg.language_default])[0]
         mimetype = request.form.get('mimetype', [0])[0]
+        underlay = request.form.get('underlay', [0])[0]
         
         word_re = re.compile(r'(\"[\w\s]+"|\w+)')
         needle = ''
@@ -74,6 +75,8 @@
             needle += 'language:%s ' % language
         if mimetype:
             needle += 'mimetype:%s ' % mimetype
+        if underlay:
+            needle += 'domain:underlay '
         if categories:
             needle += '(%s) ' % ' or '.join(['category:%s' % cat
                 for cat in word_re.findall(categories)])
--- a/MoinMoin/macro/AdvancedSearch.py	Sat Aug 19 13:17:44 2006 +0200
+++ b/MoinMoin/macro/AdvancedSearch.py	Sat Aug 19 13:35:49 2006 +0200
@@ -94,7 +94,10 @@
                 ('', '<input type="checkbox" name="titlesearch" value="1">%s</input>' %
                 _('Search only in titles')),
                 ('', '<input type="checkbox" name="case" value="1">%s</input>' %
-                _('Case-sensitive search')))
+                _('Case-sensitive search')),
+                ('', '<input type="checkbox" name="underlay" value="1">%s'
+                    '</input>' % _('Only system pages')),
+            )
     ])
     
     html = [
--- a/MoinMoin/search/queryparser.py	Sat Aug 19 13:17:44 2006 +0200
+++ b/MoinMoin/search/queryparser.py	Sat Aug 19 13:35:49 2006 +0200
@@ -697,7 +697,7 @@
 
 
 class MimetypeSearch(BaseExpression):
-    """ Search for files beloging to a specific mimetype """
+    """ Search for files belonging to a specific mimetype """
 
     def __init__(self, pattern, use_re=False, case=True):
         """ Init a mimetype search
@@ -760,6 +760,72 @@
             return UnicodeQuery('%s%s' % (prefix, pattern))
 
 
+class DomainSearch(BaseExpression):
+    """ Search for pages belonging to a specific domain """
+
+    def __init__(self, pattern, use_re=False, case=True):
+        """ Init a mimetype search
+
+        @param pattern: pattern to search for, ascii string or unicode
+        @param use_re: treat pattern as re of plain text, bool
+        @param case: do case sensitive search, bool 
+        """
+        self._pattern = pattern.lower()
+        self.negated = 0
+        self.use_re = use_re
+        self.case = False       # not case-sensitive!
+        self.xapian_called = False
+        self._build_re(self._pattern, use_re=use_re, case=case)
+
+    def costs(self):
+        return 5000 # cheaper than a TextSearch
+
+    def __unicode__(self):
+        neg = self.negated and '-' or ''
+        return u'%s!"%s"' % (neg, unicode(self._pattern))
+
+    def highlight_re(self):
+        return ""
+
+    def search(self, page):
+        # We just use (and trust ;)) xapian for this.. deactivated for _moinSearch
+        if not self.xapian_called:
+            return []
+        else:
+            return [Match()]
+
+    def xapian_wanted(self):
+        return True             # only easy regexps possible
+
+    def xapian_need_postproc(self):
+        return False            # case-sensitivity would make no sense
+
+    def xapian_term(self, request, allterms):
+        self.xapian_called = True
+        prefix = Xapian.Index.prefixMap['domain']
+        if self.use_re:
+            # basic regex matching per term
+            terms = []
+            found = None
+            n = len(prefix)
+            for term in allterms():
+                if prefix == term[:n]:
+                    found = True
+                    if self.search_re.match(term[n+1:]):
+                        terms.append(term)
+                elif found:
+                    continue
+
+            if not terms:
+                return Query()
+            return Query(Query.OP_OR, terms)
+        else:
+            pattern = self._pattern
+            return UnicodeQuery('%s:%s' % (prefix, pattern))
+
+
+
+
 ##############################################################################
 ### Parse Query
 ##############################################################################
@@ -847,6 +913,7 @@
         lang = False
         category = False
         mimetype = False
+        domain = False
 
         for m in modifiers:
             if "title".startswith(m):
@@ -863,6 +930,8 @@
                 category = True
             elif "mimetype".startswith(m):
                 mimetype = True
+            elif "domain".startswith(m):
+                domain = True
 
         # oh, let's better call xapian if we encouter this nasty regexp ;)
         if not category:
@@ -881,6 +950,8 @@
             obj = LanguageSearch(text, use_re=regex, case=False)
         elif linkto:
             obj = LinkSearch(text, use_re=regex, case=case)
+        elif domain:
+            obj = DomainSearch(text, use_re=regex, case=False)
         elif title_search:
             obj = TitleSearch(text, use_re=regex, case=case)
         else:
--- a/docs/CHANGES.fpletz	Sat Aug 19 13:17:44 2006 +0200
+++ b/docs/CHANGES.fpletz	Sat Aug 19 13:35:49 2006 +0200
@@ -28,6 +28,7 @@
         - LanguageSearch: language:de
         - CategorySearch: category:Homepage
         - MimetypeSearch: mimetype:image/png (for attachments/files)
+        - DomainSearch: domain:underlay
       Note: Currently only available when Xapian is used
     * New config options:
         xapian_search        0      enables xapian-powered search
@@ -254,4 +255,5 @@
 2006-08-19
     * mimetype search works (more or less)
     * minor bugfixes (i18n etc.)
+    * domain-specific search (underlay -> system pages)