changeset 3676:8dc2c2fc64ef

removed PyStemmer dependency by just using xapian.Stem and requiring xapian >= 1.0.0, also remove code handling older xapian versions
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sat, 07 Jun 2008 20:42:57 +0200
parents f048227cabc3
children aba86e157059
files MoinMoin/macro/SystemInfo.py MoinMoin/search/Xapian.py MoinMoin/search/builtin.py docs/CHANGES
diffstat 4 files changed, 36 insertions(+), 60 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/macro/SystemInfo.py	Sat Jun 07 18:47:58 2008 +0200
+++ b/MoinMoin/macro/SystemInfo.py	Sat Jun 07 20:42:57 2008 +0200
@@ -130,9 +130,18 @@
         from MoinMoin.search.builtin import Search
         xapState = (_('Disabled'), _('Enabled'))
         idxState = (_('index available'), _('index unavailable'))
-        xapRow = xapState[request.cfg.xapian_search]
 
-        if request.cfg.xapian_search:
+        xapian_enabled = request.cfg.xapian_search
+        xapRow = xapState[xapian_enabled]
+        try:
+            import xapian
+            xapVersion = 'Xapian %s' % xapian.version_string()
+        except ImportError:
+            xapian = None
+            xapVersion = _('Xapian and/or Python Xapian bindings not installed')
+        xapRow += ', %s' % xapVersion
+
+        if xapian and xapian_enabled:
             idx = Search._xapianIndex(request)
             available = idx and idxState[0] or idxState[1]
             mtime = _('last modified: %s') % (idx and
@@ -141,32 +150,12 @@
                     _('N/A'))
             xapRow += ', %s, %s' % (available, mtime)
 
-        try:
-            import xapian
-            try:
-                xapVersion = xapian.version_string()
-            except AttributeError:
-                xapVersion = xapian.xapian_version_string() # deprecated since xapian 0.9.6, removal in 1.1.0
-        except ImportError:
-            xapVersion = _('Xapian and/or Python Xapian bindings not installed')
-
         row(_('Xapian search'), xapRow)
-        row(_('Xapian Version'), xapVersion)
 
-        stems = [nonestr]
-        try:
-            import Stemmer
-            try:
-                stems = Stemmer.algorithms()
-                stemVersion = Stemmer.version()
-            except:
-                stemVersion = _('PyStemmer not installed')
-        except ImportError:
-            stemVersion = _('PyStemmer not installed')
-
-        row(_('Stemming for Xapian'), xapState[request.cfg.xapian_stemming])
-        row(_('PyStemmer Version'), stemVersion)
-        row(_('PyStemmer stems'), ', '.join(stems) or nonestr)
+        if xapian and xapian_enabled:
+            stems = xapian.Stem.get_available_languages()
+            row(_('Stemming for Xapian'), xapState[request.cfg.xapian_stemming] +
+                " (%s)" % (stems or nonestr))
 
         try:
             from threading import activeCount
--- a/MoinMoin/search/Xapian.py	Sat Jun 07 18:47:58 2008 +0200
+++ b/MoinMoin/search/Xapian.py	Sat Jun 07 20:42:57 2008 +0200
@@ -23,12 +23,6 @@
 from MoinMoin import config, wikiutil
 from MoinMoin.search.builtin import BaseIndex
 
-try:
-    # PyStemmer, snowball python bindings from http://snowball.tartarus.org/
-    from Stemmer import Stemmer
-except ImportError:
-    Stemmer = None
-
 
 class UnicodeQuery(Query):
     """ Xapian query object which automatically encodes unicode strings """
@@ -98,10 +92,13 @@
         @param language: if given, the language in which to stem words
         """
         self.stemmer = None
-        if request and request.cfg.xapian_stemming and language and Stemmer:
+        if request and request.cfg.xapian_stemming and language:
             try:
-                self.stemmer = Stemmer(language)
-            except (KeyError, TypeError):
+                stemmer = xapian.Stem(language)
+                # we need this wrapper because the stemmer returns a utf-8
+                # encoded string even when it gets fed with unicode objects:
+                self.stemmer = lambda word: stemmer(word).decode('utf-8')
+            except xapian.InvalidArgumentError:
                 # lang is not stemmable or not available
                 pass
 
@@ -163,9 +160,9 @@
             if flat_stemming:
                 yield (word, pos)
                 if self.stemmer:
-                    yield (self.stemmer.stemWord(word), pos)
+                    yield (self.stemmer(word), pos)
             else:
-                yield (word, self.stemmer.stemWord(word), pos)
+                yield (word, self.stemmer(word), pos)
 
 
 #############################################################################
@@ -217,18 +214,11 @@
         self._check_version()
         BaseIndex.__init__(self, request)
 
-        # Check if we should and can stem words
-        if request.cfg.xapian_stemming and not Stemmer:
-            request.cfg.xapian_stemming = False
-
     def _check_version(self):
         """ Checks if the correct version of Xapian is installed """
         # every version greater than or equal to XAPIAN_MIN_VERSION is allowed
-        XAPIAN_MIN_VERSION = (0, 9, 6)
-        try:
-            major, minor, revision = xapian.major_version(), xapian.minor_version(), xapian.revision()
-        except AttributeError:
-            major, minor, revision = xapian.xapian_major_version(), xapian.xapian_minor_version(), xapian.xapian_revision() # deprecated since xapian 0.9.6, removal in 1.1.0
+        XAPIAN_MIN_VERSION = (1, 0, 0)
+        major, minor, revision = xapian.major_version(), xapian.minor_version(), xapian.revision()
         if (major, minor, revision) >= XAPIAN_MIN_VERSION:
             return
 
@@ -393,23 +383,16 @@
         lang = None
         default_lang = page.request.cfg.language_default
 
-        # if we should stem, we check if we have stemmer for the
-        # language available
+        # if we should stem, we check if we have stemmer for the language available
         if page.request.cfg.xapian_stemming:
             lang = page.pi['language']
-            # Stemmer(lang) has an exception bug if the language is not available
-            # TypeError: exceptions must be strings, classes, or instances, not exceptions.KeyError
             try:
-                Stemmer(lang)
+                xapian.Stem(lang)
                 # if there is no exception, lang is stemmable
                 return (lang, lang)
-            except KeyError:
+            except xapian.InvalidArgumentError:
                 # lang is not stemmable
                 pass
-            except TypeError:
-                # Stemmer(lang) has an exception bug if the language is not available
-                # TypeError: exceptions must be strings, classes, or instances,
-                pass
 
         if not lang:
             # no lang found at all.. fallback to default language
--- a/MoinMoin/search/builtin.py	Sat Jun 07 18:47:58 2008 +0200
+++ b/MoinMoin/search/builtin.py	Sat Jun 07 20:42:57 2008 +0200
@@ -519,10 +519,7 @@
 
                 clock.start('_xapianQuery')
                 query = self.query.xapian_term(self.request, index.allterms)
-                try:
-                    description = query.get_description() # deprecated since xapian 1.0, removal in 1.1
-                except AttributeError:
-                    description = str(query)
+                description = str(query)
                 logging.debug("_xapianSearch: query = %r" % description)
                 query = xapwrap.index.QObjQuery(query)
                 enq, mset, hits = index.search(query, sort=self.sort,
--- a/docs/CHANGES	Sat Jun 07 18:47:58 2008 +0200
+++ b/docs/CHANGES	Sat Jun 07 20:42:57 2008 +0200
@@ -33,6 +33,13 @@
     * Fix xmlrpc request.read() call to use content-length header, if available.
     * Fix traceback in filesys.py on Mac OS X when import Carbon fails.
 
+  Other changes:
+    * HINT: If you want to use xapian based indexed search, you need to have
+            Xapian >= 1.0.0.
+    * Removed dependency on PyStemmer (we just use the stemmer built into
+      Xapian (it uses utf-8 since 1.0.0, so we can use it)).
+
+
 Version 1.7.0rc2:
 
   Bug Fixes: