changeset 1374:aa33bb2b40d6

mimetype-search is live, i18n fixes
author Franz Pletz <fpletz AT franz-pletz DOT org>
date Sat, 19 Aug 2006 13:10:44 +0200
parents 90cb8fe71cdf
children d5741f2b8292
files MoinMoin/action/fullsearch.py MoinMoin/macro/AdvancedSearch.py MoinMoin/search/Xapian.py MoinMoin/search/queryparser.py
diffstat 4 files changed, 89 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/action/fullsearch.py	Fri Aug 18 18:06:42 2006 +0200
+++ b/MoinMoin/action/fullsearch.py	Sat Aug 19 13:10:44 2006 +0200
@@ -66,11 +66,14 @@
         timeframe = request.form.get('time', [''])[0].strip()
         language = request.form.get('language',
                 [request.cfg.language_default])[0]
+        mimetype = request.form.get('mimetype', [0])[0]
         
         word_re = re.compile(r'(\"[\w\s]+"|\w+)')
         needle = ''
         if language:
             needle += 'language:%s ' % language
+        if mimetype:
+            needle += 'mimetype:%s ' % mimetype
         if categories:
             needle += '(%s) ' % ' or '.join(['category:%s' % cat
                 for cat in word_re.findall(categories)])
--- a/MoinMoin/macro/AdvancedSearch.py	Fri Aug 18 18:06:42 2006 +0200
+++ b/MoinMoin/macro/AdvancedSearch.py	Sat Aug 19 13:10:44 2006 +0200
@@ -13,6 +13,8 @@
 from MoinMoin import config, wikiutil, search
 from MoinMoin.i18n import languages
 
+import mimetypes
+
 Dependencies = ['pages']
 
 try:
@@ -42,18 +44,18 @@
             f.table_cell(0),
             f.table_row(0),
         ]) for txt, input_field in (
-            ('containing all the following terms',
+            (_('containing all the following terms'),
                 '<input type="text" name="and_terms" size="30">'),
-            ('containing one or more of the following '
-                'terms', '<input type="text" name="or_terms" size="30">'),
-            ('not containing the following terms',
+            (_('containing one or more of the following terms'),
+                '<input type="text" name="or_terms" size="30">'),
+            (_('not containing the following terms'),
                 '<input type="text" name="not_terms" size="30">'),
             #('containing only one of the following terms',
             #    '<input type="text" name="xor_terms" size="30">'),
             # TODO: dropdown-box?
-            ('belonging to one of the following categories',
+            (_('belonging to one of the following categories'),
                 '<input type="text" name="categories" size="30">'),
-            ('edited in the the following timeframe (XXX)',
+            (_('edited in the the following timeframe (XXX)'),
                 '<input type="text" name="time" size="30" value="until now">'),
         )])
     ])
@@ -68,7 +70,6 @@
         u'</select>',
     ])
 
-    import mimetypes
     ft_dropdown = ''.join([
         u'<select name="mimetype" size="1">',
         u'<option value="" selected>%s</option>' % _('any type'),
@@ -88,8 +89,8 @@
             f.table_cell(0),
             f.table_row(0),
             ]) for txt in (
-                ('Language', lang_dropdown),
-                ('File Type (XXX)', ft_dropdown),
+                (_('Language'), lang_dropdown),
+                (_('File Type'), ft_dropdown),
                 ('', '<input type="checkbox" name="titlesearch" value="1">%s</input>' %
                 _('Search only in titles')),
                 ('', '<input type="checkbox" name="case" value="1">%s</input>' %
--- a/MoinMoin/search/Xapian.py	Fri Aug 18 18:06:42 2006 +0200
+++ b/MoinMoin/search/Xapian.py	Sat Aug 19 13:10:44 2006 +0200
@@ -457,11 +457,14 @@
                 xlanguage = xapdoc.Keyword('lang', language)
                 xstem_language = xapdoc.Keyword('stem_lang', stem_language)
                 mimetype, att_content = self.contentfilter(filename)
-                xmimetype = xapdoc.TextField('mimetype', mimetype, True)
+                xmimetype = xapdoc.Keyword('mimetype', mimetype)
                 xcontent = xapdoc.TextField('content', att_content)
-                doc = xapdoc.Document(textFields=(xcontent, xmimetype, ),
-                                      keywords=(xatt_itemid, xtitle, xlanguage, xstem_language, ),
-                                      sortFields=(xpname, xattachment, xmtime, xwname, ),
+                doc = xapdoc.Document(textFields=(xcontent, ),
+                                      keywords=(xatt_itemid, xtitle,
+                                          xlanguage, xstem_language,
+                                          xmimetype),
+                                      sortFields=(xpname, xattachment, xmtime,
+                                          xwname, ),
                                      )
                 doc.analyzerFactory = getWikiAnalyzerFactory(request,
                         stem_language)
--- a/MoinMoin/search/queryparser.py	Fri Aug 18 18:06:42 2006 +0200
+++ b/MoinMoin/search/queryparser.py	Sat Aug 19 13:10:44 2006 +0200
@@ -696,6 +696,70 @@
             return UnicodeQuery('%s:%s' % (prefix, pattern))
 
 
+class MimetypeSearch(BaseExpression):
+    """ Search for files beloging to a specific mimetype """
+
+    def __init__(self, pattern, use_re=False, case=True):
+        """ Init a mimetype search
+
+        @param pattern: pattern to search for, ascii string or unicode
+        @param use_re: treat pattern as re of plain text, bool
+        @param case: do case sensitive search, bool 
+        """
+        self._pattern = pattern.lower()
+        self.negated = 0
+        self.use_re = use_re
+        self.case = False       # not case-sensitive!
+        self.xapian_called = False
+        self._build_re(self._pattern, use_re=use_re, case=case)
+
+    def costs(self):
+        return 5000 # cheaper than a TextSearch
+
+    def __unicode__(self):
+        neg = self.negated and '-' or ''
+        return u'%s!"%s"' % (neg, unicode(self._pattern))
+
+    def highlight_re(self):
+        return ""
+
+    def search(self, page):
+        # We just use (and trust ;)) xapian for this.. deactivated for _moinSearch
+        if not self.xapian_called:
+            return []
+        else:
+            return [Match()]
+
+    def xapian_wanted(self):
+        return True             # only easy regexps possible
+
+    def xapian_need_postproc(self):
+        return False            # case-sensitivity would make no sense
+
+    def xapian_term(self, request, allterms):
+        self.xapian_called = True
+        prefix = Xapian.Index.prefixMap['mimetype']
+        if self.use_re:
+            # basic regex matching per term
+            terms = []
+            found = None
+            n = len(prefix)
+            for term in allterms():
+                if prefix == term[:n]:
+                    found = True
+                    if self.search_re.match(term[n:]):
+                        terms.append(term)
+                elif found:
+                    continue
+
+            if not terms:
+                return Query()
+            return Query(Query.OP_OR, terms)
+        else:
+            pattern = self._pattern
+            return UnicodeQuery('%s%s' % (prefix, pattern))
+
+
 ##############################################################################
 ### Parse Query
 ##############################################################################
@@ -782,6 +846,7 @@
         linkto = False
         lang = False
         category = False
+        mimetype = False
 
         for m in modifiers:
             if "title".startswith(m):
@@ -796,6 +861,8 @@
                 lang = True
             elif "category".startswith(m):
                 category = True
+            elif "mimetype".startswith(m):
+                mimetype = True
 
         # oh, let's better call xapian if we encouter this nasty regexp ;)
         if not category:
@@ -808,6 +875,8 @@
 
         if category:
             obj = CategorySearch(text, use_re=regex, case=case)
+        elif mimetype:
+            obj = MimetypeSearch(text, use_re=regex, case=False)
         elif lang:
             obj = LanguageSearch(text, use_re=regex, case=False)
         elif linkto: