changeset 851:4d1bc2e51184

indexing & searching without stemmer installed
author Franz Pletz <fpletz AT franz-pletz DOT org>
date Sat, 17 Jun 2006 20:53:12 +0200
parents 71875396f812
children 0ccd65be5656
files MoinMoin/Xapian.py MoinMoin/search.py docs/CHANGES.fpletz
diffstat 3 files changed, 22 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/Xapian.py	Fri Jun 16 14:20:17 2006 +0200
+++ b/MoinMoin/Xapian.py	Sat Jun 17 20:53:12 2006 +0200
@@ -523,19 +523,21 @@
         default_lang = page.request.cfg.language_default
 
         lang = ''
-        for line in body.split('\n'):
-            if line.startswith('#language'):
-                lang = line.split(' ')[1]
-                try:
-                    Stemmer(lang)
-                except KeyError:
-                    # lang is not stemmable
+
+        if use_stemming:
+            for line in body.split('\n'):
+                if line.startswith('#language'):
+                    lang = line.split(' ')[1]
+                    try:
+                        Stemmer(lang)
+                    except KeyError:
+                        # lang is not stemmable
+                        break
+                    else:
+                        # lang is stemmable
+                        return (lang, lang)
+                elif not line.startswith('#'):
                     break
-                else:
-                    # lang is stemmable
-                    return (lang, lang)
-            elif not line.startswith('#'):
-                break
         
         if not lang:
             # no lang found at all.. fallback to default language
--- a/MoinMoin/search.py	Fri Jun 16 14:20:17 2006 +0200
+++ b/MoinMoin/search.py	Sat Jun 17 20:53:12 2006 +0200
@@ -308,7 +308,7 @@
                     t = tmp
                 else:
                     # just not stemmed
-                    t = [Query(i) for i in analyzer.tokenize(t)]
+                    t = [UnicodeQuery(i) for i in analyzer.tokenize(t)]
                 queries.append(Query(Query.OP_AND, t))
 
             # TODO: hilight and sort stemmed words correctly (also in TitleSearch)
@@ -394,7 +394,7 @@
                             for i in analyzer.tokenize(t, flat_stemming=False)]
                 else:
                     # just not stemmed
-                    t = [UnicodeQuery('%s%s' % (Xapian.Index.prefixMap['title'], j))
+                    t = [UnicodeQuery('%s%s' % (Xapian.Index.prefixMap['title'], i))
                         for i in analyzer.tokenize(t)]
 
                 queries.append(Query(Query.OP_AND, t))
--- a/docs/CHANGES.fpletz	Fri Jun 16 14:20:17 2006 +0200
+++ b/docs/CHANGES.fpletz	Sat Jun 17 20:53:12 2006 +0200
@@ -2,8 +2,6 @@
 =============================
 
   Known main issues:
-    * Stemming in English only for now because we would have to stem
-      every word in a query for every language. Suggestions? ;-)
     * Somethings' wrong with the matching of stemmed terms, i.e. matches
       beyond single WikiWord borders although matching lower-case only
       (see MoinMoin/search.py:92)
@@ -21,8 +19,6 @@
     * Wikifarms support (multiple indexes)
     * Indexing and searching of Categories (new term prefix)
     * Finish the stemming/matching stuff
-    * Test if indexing/searching works realiably without a stemmer
-      installed
 
   New Features:
     * Faster search thanks to Xapian
@@ -48,8 +44,8 @@
 tweaking to use AND_NOT because Xapian doesn't provide a pure NOT. Should
 be no issue with OrExpression as _moinSearch handles this correctly.
 
-2006-06-11 Now handling prefixes correctly (title -> S, XLINKTO always
-with ':')
+2006-06-11
+    * Now handling prefixes correctly (title -> S, XLINKTO always with ':')
 
 2006-06-15
     * Integrated basic stemming, english only for now (see issues).
@@ -58,4 +54,8 @@
       to usage of _moinSearch
 
 2006-06-16
+    * Indexing & searching now works without a stemmer installed (small
+      bugfixes)
 
+2006-06-17
+