changeset 77:ce42dfe12227

remove non-working search code, see moin/1.9 or moin/2.0-dev for reference left some search-related css in place, maybe it can be useful the search box itself and the js related to it is also still there
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sun, 06 Mar 2011 17:32:39 +0100
parents dab51965d605
children 6f39926c7d7c
files MoinMoin/_tests/__init__.py MoinMoin/config/default.py MoinMoin/script/index/__init__.py MoinMoin/script/index/build.py MoinMoin/search/Xapian/__init__.py MoinMoin/search/Xapian/_tests/__init__.py MoinMoin/search/Xapian/indexing.py MoinMoin/search/Xapian/search.py MoinMoin/search/Xapian/tokenizer.py MoinMoin/search/__init__.py MoinMoin/search/_tests/test_search.py MoinMoin/search/_tests/test_wiki_analyzer.py MoinMoin/search/builtin.py MoinMoin/search/queryparser/__init__.py MoinMoin/search/queryparser/expressions.py MoinMoin/search/results.py docs/examples/config/snippets/xapian_wikiconfig_snippet setup.py
diffstat 17 files changed, 0 insertions(+), 3554 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/_tests/__init__.py	Sun Mar 06 16:52:18 2011 +0100
+++ b/MoinMoin/_tests/__init__.py	Sun Mar 06 17:32:39 2011 +0100
@@ -80,12 +80,6 @@
     chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
     return [u"%s" % random_string(length, chars) for counter in range(count)]
 
-def nuke_xapian_index():
-    """ completely delete everything in xapian index dir """
-    fpath = app.cfg.xapian_index_dir
-    if os.path.exists(fpath):
-        shutil.rmtree(fpath, True)
-
 def nuke_item(name):
     """ complete destroys an item """
     item = Item.create(name)
--- a/MoinMoin/config/default.py	Sun Mar 06 16:52:18 2011 +0100
+++ b/MoinMoin/config/default.py	Sun Mar 06 17:32:39 2011 +0100
@@ -46,7 +46,6 @@
     auth_have_login = None
     auth_login_inputs = None
     _site_plugin_lists = None
-    xapian_searchers = None
 
     def __init__(self):
         """ Init Config instance """
@@ -60,8 +59,6 @@
         self.data_dir = data_dir
         if not getattr(self, 'plugin_dir', None):
             setattr(self, 'plugin_dir', os.path.abspath(os.path.join(data_dir, 'plugin')))
-        if not getattr(self, 'xapian_index_dir', None):
-            setattr(self, 'xapian_index_dir', os.path.abspath(os.path.join(data_dir, 'xapian')))
 
         # Try to decode certain names which allow unicode
         self._decode()
@@ -118,17 +115,6 @@
         # e.g u'%(item_root)s' % self
         self.navi_bar = [elem % self for elem in self.navi_bar]
 
-        # check if python-xapian is installed
-        if self.xapian_search:
-            try:
-                import xapian
-            except ImportError, err:
-                self.xapian_search = False
-                logging.error("xapian_search was auto-disabled because python-xapian is not installed [%s]." % str(err))
-
-        # list to cache xapian searcher objects
-        self.xapian_searchers = []
-
         # check if mail is possible and set flag:
         self.mail_enabled = (self.mail_smarthost is not None or self.mail_sendmail is not None) and self.mail_from
         self.mail_enabled = self.mail_enabled and True or False
@@ -505,8 +491,6 @@
     ('refresh', None,
      "refresh = (minimum_delay_s, targets_allowed) enables use of `#refresh 5 PageName` processing instruction, targets_allowed must be either `'internal'` or `'external'`"),
 
-    ('search_results_per_page', 25, "Number of hits shown per page in the search results"),
-
     ('siteid', 'MoinMoin', None), # XXX just default to some existing module name to
                                   # make plugin loader etc. work for now
   )),
@@ -553,17 +537,6 @@
       ('trash', 'Trash/', 'This is the namespace in which an item ends up when it is deleted.')
     )),
 
-    'xapian': ('Xapian search', "Configuration of the Xapian based indexed search, see HelpOnXapian.", (
-      ('search', False,
-       "True to enable the fast, indexed search (based on the Xapian search library)"),
-      ('index_dir', None,
-       "Directory where the Xapian search index is stored (None = auto-configure wiki local storage)"),
-      ('stemming', False,
-       "True to enable Xapian word stemmer usage for indexing / searching."),
-      ('index_history', False,
-       "True to enable indexing of non-current page revisions."),
-    )),
-
     'user': ('Users / User settings', None, (
       ('email_unique', True,
        "if True, check email addresses for uniqueness and don't accept duplicates."),
--- a/MoinMoin/script/index/__init__.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-# Copyright: 2006 MoinMoin:ThomasWaldmann
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - Fullsearch Index Script Package
-
-    TODO: rename this module back to xapian when script framework is
-    fixed to not confuse it with the xapian.org "xapian" module.
-"""
-
-
-from MoinMoin.util import pysupport
-
-# create a list of extension scripts from the subpackage directory
-index_scripts = pysupport.getPackageModules(__file__)
-modules = index_scripts
-
--- a/MoinMoin/script/index/build.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,121 +0,0 @@
-# Copyright: 2006-2009 MoinMoin:ThomasWaldmann
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-MoinMoin - build xapian search engine's index
-"""
-
-
-import os
-import errno
-import shutil
-
-from MoinMoin.script import MoinScript
-
-class IndexScript(MoinScript):
-    """\
-Purpose:
-========
-This tool allows you to control xapian's index of Moin.
-
-Detailed Instructions:
-======================
-General syntax: moin [options] index build [build-options]
-
-[options] usually should be:
-    --config-dir=/path/to/my/cfg/ --wiki-url=http://wiki.example.org/
-
-[build-options] see below:
-    Please note:
-    * You must run this script as the owner of the wiki files,
-      usually this is the web server user.
-    * You may add the build-option --files=files.lst to let the indexer
-      also consider the filesystem filenames contained in that file (one
-      filename per line). Search results from these files will be "found"
-      under a special pseudo page called FS (like File System).
-      Without this option, the indexer will just consider wiki items.
-
-    1. Conditionally (considering modification time) update the index:
-       moin ... index build --mode=update
-
-    2. Unconditionally add to the index:
-       moin ... index build --mode=add
-
-    3. Completely rebuild the index (1-stage):
-       moin ... index build --mode=rebuild
-
-       Note: until it has completely built the new index, the wiki will still
-       use the old index. After rebuild has completed, it kills the old index
-       and moves the new index into its place.
-       If the wiki uses the index at that moment, that might have unwanted side
-       effects. If you want to avoid that and you can accept a short downtime,
-       consider using this safer method:
-
-       Completely rebuild the index (2-stage):
-       # takes long, does not interfere with wiki searches:
-       moin ... index build --mode=buildnewindex
-       stop this moin wiki process(es)
-       # quick, replaces the old index with the new one:
-       moin ... index build --mode=usenewindex
-       start this moin wiki process(es)
-"""
-
-    def __init__(self, argv, def_values):
-        MoinScript.__init__(self, argv, def_values)
-        self.parser.add_option(
-            "--files", metavar="FILES", dest="file_list",
-            help="filename of file list, e.g. files.lst (one file per line)"
-        )
-        self.parser.add_option(
-            "--mode", metavar="MODE", dest="mode",
-            help="either add (unconditionally add), update (conditional update), rebuild (complete 1-stage index rebuild)"
-                 " or buildnewindex and usenewindex (complete 2-stage index rebuild)"
-        )
-
-    def mainloop(self):
-        self.init_request()
-        # Do we have additional files to index?
-        if self.options.file_list:
-            self.files = file(self.options.file_list)
-        else:
-            self.files = None
-        self.command()
-
-class PluginScript(IndexScript):
-    """ Xapian index build script class """
-
-    def command(self):
-        from MoinMoin.search.Xapian import XapianIndex
-        mode = self.options.mode
-        if mode in ('rebuild', 'buildnewindex'):
-            # rebuilding the DB into a new index directory, so the rebuild
-            # process does not interfere with the currently in-use DB
-            idx_mode, idx_name = 'add', 'index.new'
-        elif mode in ('add', 'update'):
-            # update/add in-place
-            idx_mode, idx_name = mode, 'index'
-        elif mode == 'usenewindex':
-            pass # nothing todo
-        else:
-            pass # XXX give error msg about invalid mode
-
-        if mode != 'usenewindex':
-            idx = XapianIndex(self.request, name=idx_name)
-            idx.indexPages(self.files, idx_mode)
-
-        if mode in ('rebuild', 'usenewindex'):
-            # 'rebuild' is still a bit dirty, because just killing old index will
-            # fail currently running searches. Thus, maybe do this in a time
-            # with litte wiki activity or better use 'buildnewindex' and
-            # 'usenewindex' (see above).
-            # XXX code here assumes that idx.db is a directory
-            # TODO improve this with xapian stub DBs
-            idx_old = XapianIndex(self.request, name='index').db
-            idx_new = XapianIndex(self.request, name='index.new').db
-            try:
-                shutil.rmtree(idx_old)
-            except OSError, err:
-                if err.errno != errno.ENOENT: # ignore it if we have no current index
-                    raise
-            os.rename(idx_new, idx_old)
-
--- a/MoinMoin/search/Xapian/__init__.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-# Copyright: 2006-2009 MoinMoin:ThomasWaldmann
-# Copyright: 2006 MoinMoin:FranzPletz
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - xapian search engine
-"""
-
-
-from MoinMoin.search.Xapian.indexing import XapianIndex, Query, MoinSearchConnection, MoinIndexerConnection, XapianDatabaseLockError
-from MoinMoin.search.Xapian.tokenizer import WikiAnalyzer
-
--- a/MoinMoin/search/Xapian/indexing.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,537 +0,0 @@
-# Copyright: 2006-2009 MoinMoin:ThomasWaldmann
-# Copyright: 2006 MoinMoin:FranzPletz
-# Copyright: 2009 MoinMoin:DmitrijsMilajevs
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - xapian search engine indexing
-"""
-
-
-import os, re
-import xapian
-import xappy
-
-from MoinMoin import log
-logging = log.getLogger(__name__)
-
-from flask import current_app as app
-
-from MoinMoin.search.builtin import BaseIndex
-from MoinMoin.search.Xapian.tokenizer import WikiAnalyzer
-from MoinMoin.util import filesys
-
-from MoinMoin.Page import Page
-from MoinMoin import config, wikiutil
-
-
-class Query(xapian.Query):
-    pass
-
-
-class UnicodeQuery(xapian.Query):
-    """ Xapian query object which automatically encodes unicode strings """
-
-    def __init__(self, *args, **kwargs):
-        """
-        :keyword encoding: specify the encoding manually (default: value of config.charset)
-        """
-        self.encoding = kwargs.get('encoding', config.charset)
-
-        nargs = []
-        for term in args:
-            if isinstance(term, unicode):
-                term = term.encode(self.encoding)
-            elif isinstance(term, list) or isinstance(term, tuple):
-                term = [t.encode(self.encoding) for t in term]
-            nargs.append(term)
-
-        Query.__init__(self, *nargs, **kwargs)
-
-
-class MoinSearchConnection(xappy.SearchConnection):
-
-    def get_all_documents(self, query=None):
-        """
-        Return all the documents in the index (that match query, if given).
-        """
-        document_count = self.get_doccount()
-        query = query or self.query_all()
-        hits = self.search(query, 0, document_count)
-        return hits
-
-    def get_all_documents_with_fields(self, **fields):
-        """
-        Return all the documents in the index (that match the field=value kwargs given).
-        """
-        field_queries = [self.query_field(field, value) for field, value in fields.iteritems()]
-        query = self.query_composite(self.OP_AND, field_queries)
-        return self.get_all_documents(query)
-
-
-XapianDatabaseLockError = xappy.XapianDatabaseLockError
-
-class MoinIndexerConnection(xappy.IndexerConnection):
-
-    def __init__(self, *args, **kwargs):
-        super(MoinIndexerConnection, self).__init__(*args, **kwargs)
-        self._define_fields_actions()
-
-    def _define_fields_actions(self):
-        SORTABLE = xappy.FieldActions.SORTABLE
-        INDEX_EXACT = xappy.FieldActions.INDEX_EXACT
-        INDEX_FREETEXT = xappy.FieldActions.INDEX_FREETEXT
-        STORE_CONTENT = xappy.FieldActions.STORE_CONTENT
-
-        self.add_field_action('wikiname', INDEX_EXACT)
-        self.add_field_action('wikiname', STORE_CONTENT)
-        self.add_field_action('pagename', INDEX_EXACT)
-        self.add_field_action('pagename', STORE_CONTENT)
-        self.add_field_action('pagename', SORTABLE)
-        self.add_field_action('attachment', INDEX_EXACT)
-        self.add_field_action('attachment', STORE_CONTENT)
-        self.add_field_action('mtime', INDEX_EXACT)
-        self.add_field_action('mtime', STORE_CONTENT)
-        self.add_field_action('revision', STORE_CONTENT)
-        self.add_field_action('revision', INDEX_EXACT)
-        self.add_field_action('mimetype', INDEX_EXACT)
-        self.add_field_action('mimetype', STORE_CONTENT)
-        self.add_field_action('title', INDEX_FREETEXT, weight=100)
-        self.add_field_action('title', STORE_CONTENT)
-        self.add_field_action('content', INDEX_FREETEXT, spell=True)
-        self.add_field_action('domain', INDEX_EXACT)
-        self.add_field_action('domain', STORE_CONTENT)
-        self.add_field_action('lang', INDEX_EXACT)
-        self.add_field_action('lang', STORE_CONTENT)
-        self.add_field_action('stem_lang', INDEX_EXACT)
-        self.add_field_action('author', INDEX_EXACT)
-        self.add_field_action('linkto', INDEX_EXACT)
-        self.add_field_action('linkto', STORE_CONTENT)
-
-
-class StemmedField(xappy.Field):
-
-    def __init__(self, name, value, request):
-        analyzer = WikiAnalyzer(language=app.cfg.language_default)
-        value = ' '.join(unicode('%s %s' % (word, stemmed)).strip() for word, stemmed in analyzer.tokenize(value))
-        super(StemmedField, self).__init__(name, value)
-
-
-class XapianIndex(BaseIndex):
-
-    def __init__(self, request, name='index'):
-        super(XapianIndex, self).__init__(request)
-        self.db = os.path.join(self.main_dir, name)
-
-    def _main_dir(self):
-        """ Get the directory of the xapian index """
-        return os.path.join(app.cfg.xapian_index_dir, app.cfg.siteid)
-
-    def exists(self):
-        """ Check if index exists """
-        return os.path.exists(self.db)
-
-    def mtime(self):
-        """ Modification time of the index """
-        return os.path.getmtime(self.db)
-
-    def touch(self):
-        """ Touch the index """
-        filesys.touch(self.db)
-
-    def get_search_connection(self):
-        return MoinSearchConnection(self.db)
-
-    def get_indexer_connection(self):
-        return MoinIndexerConnection(self.db)
-
-    def _search(self, query, sort='weight', historysearch=0):
-        """
-        Perform the search using xapian
-
-        :param query: the search query objects
-        :param sort: the sorting of the results (default: 'weight')
-        :param historysearch: whether to search in all page revisions (default: 0) TODO: use/implement this
-        """
-        while True:
-            try:
-                searcher, timestamp = app.cfg.xapian_searchers.pop()
-                if timestamp != self.mtime():
-                    searcher.close()
-                else:
-                    break
-            except IndexError:
-                searcher = self.get_search_connection()
-                timestamp = self.mtime()
-                break
-
-        # Refresh connection, since it may be outdated.
-        searcher.reopen()
-        query = query.xapian_term(self.request, searcher)
-
-        # Get maximum possible amount of hits from xappy, which is number of documents in the index.
-        document_count = searcher.get_doccount()
-
-        kw = {}
-        if sort == 'page_name':
-            kw['sortby'] = 'pagename'
-
-        hits = searcher.search(query, 0, document_count, **kw)
-
-        app.cfg.xapian_searchers.append((searcher, timestamp))
-        return hits
-
-    def do_queued_updates(self, amount=-1):
-        """ Index <amount> entries from the indexer queue.
-
-            :param amount: amount of queue entries to process (default: -1 == all)
-        """
-        try:
-            request = self._indexingRequest(self.request)
-            connection = self.get_indexer_connection()
-            self.touch()
-            try:
-                done_count = 0
-                while amount:
-                    # trick: if amount starts from -1, it will never get 0
-                    amount -= 1
-                    try:
-                        pagename, attachmentname, revno = self.update_queue.get()
-                    except IndexError:
-                        # queue empty
-                        break
-                    else:
-                        logging.debug("got from indexer queue: %r %r %r" % (pagename, attachmentname, revno))
-                        if not attachmentname:
-                            if revno is None:
-                                # generic "index this page completely, with attachments" request
-                                self._index_page(request, connection, pagename, mode='update')
-                            else:
-                                # "index this page revision" request
-                                self._index_page_rev(request, connection, pagename, revno, mode='update')
-                        else:
-                            # "index this attachment" request
-                            self._index_attachment(request, connection, pagename, attachmentname, mode='update')
-                        done_count += 1
-            finally:
-                logging.debug("updated xapian index with %d queued updates" % done_count)
-                connection.close()
-        except XapianDatabaseLockError:
-            # another indexer has locked the index, we can retry it later...
-            logging.debug("can't lock xapian index, not doing queued updates now")
-
-    def _get_document(self, connection, doc_id, mtime, mode):
-        do_index = False
-
-        if mode == 'update':
-            try:
-                doc = connection.get_document(doc_id)
-                docmtime = long(doc.data['mtime'][0])
-            except KeyError:
-                do_index = True
-            else:
-                do_index = mtime > docmtime
-        elif mode == 'add':
-            do_index = True
-        else:
-            raise ValueError("mode must be 'update' or 'add'")
-
-        if do_index:
-            document = xappy.UnprocessedDocument()
-            document.id = doc_id
-        else:
-            document = None
-        return document
-
-    def _add_fields_to_document(self, request, document, fields=None, multivalued_fields=None):
-
-        fields_to_stem = ['title', 'content']
-
-        if fields is None:
-            fields = {}
-        if multivalued_fields is None:
-            multivalued_fields = {}
-
-        for field, value in fields.iteritems():
-            document.fields.append(xappy.Field(field, value))
-            if field in fields_to_stem:
-                document.fields.append(StemmedField(field, value, request))
-
-        for field, values in multivalued_fields.iteritems():
-            for value in values:
-                document.fields.append(xappy.Field(field, value))
-
-    def _get_languages(self, page):
-        """ Get language of a page and the language to stem it in
-
-        :param page: the page instance
-        """
-        lang = None
-        default_lang = app.cfg.language_default
-
-        # if we should stem, we check if we have a stemmer for the language available
-        if app.cfg.xapian_stemming:
-            lang = page.pi['language']
-            try:
-                xapian.Stem(lang)
-                # if there is no exception, lang is stemmable
-                return (lang, lang)
-            except xapian.InvalidArgumentError:
-                # lang is not stemmable
-                pass
-
-        if not lang:
-            # no lang found at all.. fallback to default language
-            lang = default_lang
-
-        # return actual lang and lang to stem in
-        return (lang, default_lang)
-
-    def _get_domains(self, page):
-        """ Returns a generator with all the domains the page belongs to
-
-        :param page: page
-        """
-        if page.isStandardPage():
-            yield 'standard'
-        if wikiutil.isSystemItem(page.page_name):
-            yield 'system'
-
-    def _index_page(self, request, connection, pagename, mode='update'):
-        """ Index a page.
-
-        Index all revisions (if wanted by configuration) and all attachments.
-
-        :param request: request suitable for indexing
-        :param connection: the Indexer connection object
-        :param pagename: a page name
-        :param mode: 'add' = just add, no checks
-                     'update' = check if already in index and update if needed (mtime)
-        """
-        page = Page(request, pagename)
-        revlist = page.getRevList() # recent revs first, does not include deleted revs
-        logging.debug("indexing page %r, %d revs found" % (pagename, len(revlist)))
-
-        if not revlist:
-            # we have an empty revision list, that means the page is not there any more,
-            # likely it (== all of its revisions, all of its attachments) got either renamed or nuked
-            wikiname = app.cfg.interwikiname or u'Self'
-
-            sc = self.get_search_connection()
-            docs_to_delete = sc.get_all_documents_with_fields(wikiname=wikiname, pagename=pagename)
-                                                              # any page rev, any attachment
-            sc.close()
-
-            for doc in docs_to_delete:
-                connection.delete(doc.id)
-            logging.debug('page %s (all revs, all attachments) removed from xapian index' % pagename)
-
-        else:
-            if app.cfg.xapian_index_history:
-                index_revs, remove_revs = revlist, []
-            else:
-                if page.exists(): # is current rev not deleted?
-                    index_revs, remove_revs = revlist[:1], revlist[1:]
-                else:
-                    index_revs, remove_revs = [], revlist
-
-            for revno in index_revs:
-                updated = self._index_page_rev(request, connection, pagename, revno, mode=mode)
-                logging.debug("updated page %r rev %d (updated==%r)" % (pagename, revno, updated))
-                if not updated:
-                    # we reached the revisions that are already present in the index
-                    break
-
-            for revno in remove_revs:
-                # XXX remove_revs can be rather long for pages with many revs and
-                # XXX most page revs usually will be already deleted. optimize?
-                self._remove_page_rev(request, connection, pagename, revno)
-                logging.debug("removed page %r rev %d" % (pagename, revno))
-
-            from MoinMoin.action import AttachFile
-            for attachmentname in AttachFile._get_files(request, pagename):
-                self._index_attachment(request, connection, pagename, attachmentname, mode)
-
-    def _index_page_rev(self, request, connection, pagename, revno, mode='update'):
-        """ Index a page revision.
-
-        :param request: request suitable for indexing
-        :param connection: the Indexer connection object
-        :param pagename: the page name
-        :param revno: page revision number (int)
-        :param mode: 'add' = just add, no checks
-                     'update' = check if already in index and update if needed (mtime)
-        """
-        page = Page(request, pagename, rev=revno)
-
-        wikiname = app.cfg.interwikiname or u"Self"
-        revision = str(page.get_real_rev())
-        itemid = "%s:%s:%s" % (wikiname, pagename, revision)
-        #mtime = wikiutil.timestamp2version(page.mtime())
-        mtime = page.mtime_usecs()
-
-        doc = self._get_document(connection, itemid, mtime, mode)
-        logging.debug("%s %s %r" % (pagename, revision, doc))
-        if doc:
-            mimetype = 'text/%s' % page.pi['format']  # XXX improve this
-
-            fields = {}
-            fields['wikiname'] = wikiname
-            fields['pagename'] = pagename
-            fields['attachment'] = '' # this is a real page, not an attachment
-            fields['mtime'] = str(mtime)
-            fields['revision'] = revision
-            fields['title'] = pagename
-            fields['content'] = page.get_raw_body()
-            fields['lang'], fields['stem_lang'] = self._get_languages(page)
-            fields['author'] = page.edit_info().get('editor', '?')
-
-            multivalued_fields = {}
-            multivalued_fields['mimetype'] = [mt for mt in [mimetype] + mimetype.split('/')]
-            multivalued_fields['domain'] = self._get_domains(page)
-            multivalued_fields['linkto'] = page.getPageLinks(request)
-
-            self._add_fields_to_document(request, doc, fields, multivalued_fields)
-
-            try:
-                connection.replace(doc)
-            except xappy.IndexerError, err:
-                logging.warning("IndexerError at %r %r %r (%s)" % (
-                    wikiname, pagename, revision, str(err)))
-
-        return bool(doc)
-
-    def _remove_page_rev(self, request, connection, pagename, revno):
-        """ Remove a page revision from the index.
-
-        :param request: request suitable for indexing
-        :param connection: the Indexer connection object
-        :param pagename: the page name
-        :param revno: a real revision number (int), > 0
-        """
-        wikiname = app.cfg.interwikiname or u"Self"
-        revision = str(revno)
-        itemid = "%s:%s:%s" % (wikiname, pagename, revision)
-        connection.delete(itemid)
-        logging.debug('page %s, revision %d removed from index' % (pagename, revno))
-
-    def _index_attachment(self, request, connection, pagename, attachmentname, mode='update'):
-        """ Index an attachment
-
-        :param request: request suitable for indexing
-        :param connection: the Indexer connection object
-        :param pagename: the page name
-        :param attachmentname: the attachment's name
-        :param mode: 'add' = just add, no checks
-                     'update' = check if already in index and update if needed (mtime)
-        """
-        from MoinMoin.action import AttachFile
-        wikiname = app.cfg.interwikiname or u"Self"
-        itemid = "%s:%s//%s" % (wikiname, pagename, attachmentname)
-
-        filename = AttachFile.getFilename(request, pagename, attachmentname)
-        # check if the file is still there. as we might be doing queued index updates,
-        # the file could be gone meanwhile...
-        if os.path.exists(filename):
-            mtime = wikiutil.timestamp2version(os.path.getmtime(filename))
-            doc = self._get_document(connection, itemid, mtime, mode)
-            logging.debug("%s %s %r" % (pagename, attachmentname, doc))
-            if doc:
-                page = Page(request, pagename)
-                mimetype, att_content = self.contentfilter(filename)
-
-                fields = {}
-                fields['wikiname'] = wikiname
-                fields['pagename'] = pagename
-                fields['attachment'] = attachmentname
-                fields['mtime'] = str(mtime)
-                fields['revision'] = '0'
-                fields['title'] = '%s/%s' % (pagename, attachmentname)
-                fields['content'] = att_content
-                fields['lang'], fields['stem_lang'] = self._get_languages(page)
-
-                multivalued_fields = {}
-                multivalued_fields['mimetype'] = [mt for mt in [mimetype] + mimetype.split('/')]
-                multivalued_fields['domain'] = self._get_domains(page)
-
-                self._add_fields_to_document(request, doc, fields, multivalued_fields)
-
-                connection.replace(doc)
-                logging.debug('attachment %s (page %s) updated in index' % (attachmentname, pagename))
-        else:
-            # attachment file was deleted, remove it from index also
-            connection.delete(itemid)
-            logging.debug('attachment %s (page %s) removed from index' % (attachmentname, pagename))
-
-    def _index_file(self, request, connection, filename, mode='update'):
-        """ index files (that are NOT attachments, just arbitrary files)
-
-        :param request: request suitable for indexing
-        :param connection: the Indexer connection object
-        :param filename: a filesystem file name
-        :param mode: 'add' = just add, no checks
-                     'update' = check if already in index and update if needed (mtime)
-        """
-        wikiname = app.cfg.interwikiname or u"Self"
-        fs_rootpage = 'FS' # XXX FS hardcoded
-
-        try:
-            itemid = "%s:%s" % (wikiname, os.path.join(fs_rootpage, filename))
-            mtime = wikiutil.timestamp2version(os.path.getmtime(filename))
-
-            doc = self._get_document(connection, itemid, mtime, mode)
-            logging.debug("%s %r" % (filename, doc))
-            if doc:
-                mimetype, file_content = self.contentfilter(filename)
-
-                fields = {}
-                fields['wikiname'] = wikiname
-                fields['pagename'] = fs_rootpage
-                fields['attachment'] = filename # XXX we should treat files like real pages, not attachments
-                fields['mtime'] = str(mtime)
-                fields['revision'] = '0'
-                fields['title'] = " ".join(os.path.join(fs_rootpage, filename).split("/"))
-                fields['content'] = file_content
-
-                multivalued_fields = {}
-                multivalued_fields['mimetype'] = [mt for mt in [mimetype] + mimetype.split('/')]
-
-                self._add_fields_to_document(request, doc, fields, multivalued_fields)
-
-                connection.replace(doc)
-
-        except (OSError, IOError, UnicodeError):
-            logging.exception("_index_file crashed:")
-
-    def _index_pages(self, request, files=None, mode='update', pages=None):
-        """ Index all (given) pages (and all given files)
-
-        This should be called from indexPages only!
-
-        :param request: request suitable for indexing
-        :param files: an optional list of files to index
-        :param mode: 'add' = just add, no checks
-                     'update' = check if already in index and update if needed (mtime)
-        :param pages: list of pages to index, if not given, all pages are indexed
-        """
-        if pages is None:
-            # Index all pages
-            pages = request.rootpage.getPageList(user='', exists=1)
-
-        try:
-            connection = self.get_indexer_connection()
-            self.touch()
-            try:
-                logging.info("indexing %d pages..." % len(pages))
-                for pagename in pages:
-                    self._index_page(request, connection, pagename, mode=mode)
-                if files:
-                    logging.info("indexing all files...")
-                    for fname in files:
-                        fname = fname.strip()
-                        self._index_file(request, connection, fname, mode)
-            finally:
-                connection.close()
-        except XapianDatabaseLockError:
-            logging.warning("xapian index is locked, can't index.")
-
--- a/MoinMoin/search/Xapian/search.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,69 +0,0 @@
-# Copyright: 2005 MoinMoin:FlorianFesti
-# Copyright: 2005 MoinMoin:NirSoffer
-# Copyright: 2005 MoinMoin:AlexanderSchremmer
-# Copyright: 2006-2009 MoinMoin:ThomasWaldmann
-# Copyright: 2006 MoinMoin:FranzPletz
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - search engine internals
-"""
-
-
-from MoinMoin import log
-logging = log.getLogger(__name__)
-
-from MoinMoin.i18n import _, L_, N_
-from MoinMoin.search.builtin import BaseSearch, MoinSearch, BaseIndex
-from MoinMoin.search.Xapian.indexing import XapianIndex
-
-class IndexDoesNotExistError(Exception):
-    pass
-
-class XapianSearch(BaseSearch):
-
-    def __init__(self, request, query, sort='weight', mtime=None, historysearch=0):
-        super(XapianSearch, self).__init__(request, query, sort, mtime, historysearch)
-
-        self.index = self._xapian_index()
-
-    def _xapian_index(self):
-        """ Get the xapian index if possible
-
-        :param request: current request
-        """
-        index = XapianIndex(self.request)
-
-        if not index.exists():
-            raise IndexDoesNotExistError
-
-        return index
-
-    def _search(self):
-        """ Search using Xapian
-
-        Get a list of pages using fast xapian search and
-        return moin search in those pages if needed.
-        """
-        index = self.index
-
-        search_results = index.search(self.query, sort=self.sort, historysearch=self.historysearch)
-        logging.debug("_xapianSearch: finds: %r" % search_results)
-
-        # Note: .data is (un)pickled inside xappy, so we get back exactly what
-        #       we had put into it at indexing time (including unicode objects).
-        pages = [{'uid': r.id,
-                  'wikiname': r.data['wikiname'][0],
-                  'pagename': r.data['pagename'][0],
-                  'attachment': r.data['attachment'][0],
-                  'revision': r.data.get('revision', [0])[0]}
-                 for r in search_results]
-        if not self.query.xapian_need_postproc():
-            # xapian handled the full query
-
-            return self._getHits(pages), (search_results.estimate_is_exact and '' or _('about'), search_results.matches_estimated)
-
-        # some postprocessing by MoinSearch is required
-        return MoinSearch(self.request, self.query, self.sort, self.mtime, self.historysearch, pages=pages)._search()
-
-
--- a/MoinMoin/search/Xapian/tokenizer.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,125 +0,0 @@
-# Copyright: 2006-2008 MoinMoin:ThomasWaldmann
-# Copyright: 2006 MoinMoin:FranzPletz
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - A text analyzer for wiki syntax
-"""
-
-
-import re
-import xapian
-
-from flask import current_app as app
-
-from MoinMoin.parser.text_moin_wiki import Parser as WikiParser
-from MoinMoin import config
-
-
-class WikiAnalyzer(object):
-    """ A text analyzer for wiki syntax
-
-    The purpose of this class is to analyze texts/pages in wiki syntax
-    and yield single terms to feed into the xapian database.
-    """
-
-    singleword = r"[%(u)s][%(l)s]+" % {
-                     'u': config.chars_upper,
-                     'l': config.chars_lower,
-                 }
-
-    singleword_re = re.compile(singleword, re.U)
-    wikiword_re = re.compile(WikiParser.word_rule, re.UNICODE|re.VERBOSE)
-
-    token_re = re.compile(
-        r"(?P<company>\w+[&@]\w+)|" + # company names like AT&T and Excite@Home.
-        r"(?P<email>\w+([.-]\w+)*@\w+([.-]\w+)*)|" +    # email addresses
-        r"(?P<acronym>(\w\.)+)|" +          # acronyms: U.S.A., I.B.M., etc.
-        r"(?P<word>\w+)",                   # words (including WikiWords)
-        re.U)
-
-    dot_re = re.compile(r"[-_/,.]")
-    mail_re = re.compile(r"[-_/,.]|(@)")
-    alpha_num_re = re.compile(r"\d+|\D+")
-
-    def __init__(self, language=None):
-        """
-        :param language: if given, the language in which to stem words
-        """
-        self.stemmer = None
-        if app.cfg.xapian_stemming and language:
-            try:
-                stemmer = xapian.Stem(language)
-                # we need this wrapper because the stemmer returns a utf-8
-                # encoded string even when it gets fed with unicode objects:
-                self.stemmer = lambda word: stemmer(word).decode('utf-8')
-            except xapian.InvalidArgumentError:
-                # lang is not stemmable or not available
-                pass
-
-    def raw_tokenize_word(self, word, pos):
-        """ try to further tokenize some word starting at pos """
-        yield (word, pos)
-        if self.wikiword_re.match(word):
-            # if it is a CamelCaseWord, we additionally try to tokenize Camel, Case and Word
-            for m in re.finditer(self.singleword_re, word):
-                mw, mp = m.group(), pos + m.start()
-                for w, p in self.raw_tokenize_word(mw, mp):
-                    yield (w, p)
-        else:
-            # if we have Foo42, yield Foo and 42
-            for m in re.finditer(self.alpha_num_re, word):
-                mw, mp = m.group(), pos + m.start()
-                if mw != word:
-                    for w, p in self.raw_tokenize_word(mw, mp):
-                        yield (w, p)
-
-    def raw_tokenize(self, value):
-        """ Yield a stream of words from a string.
-
-        :param value: string to split, must be an unicode object or a list of
-                      unicode objects
-        """
-        if isinstance(value, list): # used for page links
-            for v in value:
-                yield (v, 0)
-        else:
-            tokenstream = re.finditer(self.token_re, value)
-            for m in tokenstream:
-                if m.group("acronym"):
-                    yield (m.group("acronym").replace('.', ''), m.start())
-                elif m.group("company"):
-                    yield (m.group("company"), m.start())
-                elif m.group("email"):
-                    displ = 0
-                    for word in self.mail_re.split(m.group("email")):
-                        if word:
-                            yield (word, m.start() + displ)
-                            displ += len(word) + 1
-                elif m.group("word"):
-                    for word, pos in self.raw_tokenize_word(m.group("word"), m.start()):
-                        yield word, pos
-
-    def tokenize(self, value):
-        """
-        Yield a stream of raw lower cased and stemmed words from a string.
-
-        :param value: string to split, must be an unicode object or a list of
-                      unicode objects
-        """
-        if self.stemmer:
-
-            def stemmer(value):
-                stemmed = self.stemmer(value)
-                if stemmed != value:
-                    return stemmed
-                else:
-                    return ''
-        else:
-            stemmer = lambda v: ''
-
-        for word, pos in self.raw_tokenize(value):
-            # Xapian stemmer expects lowercase input
-            word = word.lower()
-            yield word, stemmer(word)
-
--- a/MoinMoin/search/__init__.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,66 +0,0 @@
-# Copyright: 2005 MoinMoin:FlorianFesti
-# Copyright: 2005 MoinMoin:NirSoffer
-# Copyright: 2005 MoinMoin:AlexanderSchremmer
-# Copyright: 2006 MoinMoin:ThomasWaldmann
-# Copyright: 2006 MoinMoin:FranzPletz
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - search engine
-"""
-
-
-from MoinMoin import log
-logging = log.getLogger(__name__)
-
-from flask import current_app as app
-
-from MoinMoin.search.queryparser import QueryParser, QueryError
-from MoinMoin.search.builtin import MoinSearch
-
-
-def searchPages(request, query, sort='weight', mtime=None, historysearch=None, **kw):
-    """
-    Search the text of all pages for query.
-
-    :param request: current request
-    :param query: the expression (string or query objects) we want to search for
-    :keyword sort: sorting of the search results, either 'weight' or 'page_name'
-    :keyword mtime: only items modified since mtime
-    :keyword historysearch: include older revisions of items in search
-    :keyword titlesearch: treat all terms as title searches (passed to qp)
-    :keyword case: do case sensitive search (passed to qp)
-    :keyword regex: treat all terms as regular expression (passed to qp)
-    :rtype: SearchResults instance
-    :returns: search results
-    """
-    return _get_searcher(request, query, sort, mtime, historysearch, **kw).run()
-
-
-def _get_searcher(request, query, sort='weight', mtime=None, historysearch=None, **kw):
-    """
-    Return a searcher object according to the configuration.
-    """
-    query = _parse_query(query, **kw)
-    searcher = None
-
-    if app.cfg.xapian_search:
-        try:
-            from MoinMoin.search.Xapian.search import XapianSearch, IndexDoesNotExistError
-            searcher = XapianSearch(request, query, sort, mtime=mtime, historysearch=historysearch)
-        except ImportError, error:
-            logging.warning("%s. You should either set xapian_search = False in your wiki config or install/upgrade Xapian." % str(error))
-        except IndexDoesNotExistError:
-            logging.warning("Slow moin search is used because the Xapian index does not exist. You should create it using the moin index build command.")
-
-    if searcher is None:
-        searcher = MoinSearch(request, query, sort, mtime=mtime, historysearch=historysearch)
-
-    return searcher
-
-def _parse_query(query, **kw):
-    if isinstance(query, str) or isinstance(query, unicode):
-        query = QueryParser(**kw).parse_query(query)
-
-    return query
-
--- a/MoinMoin/search/_tests/test_search.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,467 +0,0 @@
-# Copyright: 2005 by Nir Soffer <nirs@freeshell.org>
-# Copyright: 2007-2010 by MoinMoin:ThomasWaldmann
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - MoinMoin.search Tests
-
-    We exclude underlay/system pages for some search tests to save time.
-"""
-
-
-import os, StringIO, time
-
-import py
-
-py.test.skip("broken")
-
-from MoinMoin.search import QueryError, _get_searcher
-from MoinMoin.search.queryparser import QueryParser
-from MoinMoin.search.builtin import MoinSearch
-from MoinMoin._tests import nuke_xapian_index, wikiconfig, become_trusted, create_item
-from MoinMoin.wikiutil import Version
-
-PY_MIN_VERSION = '1.0.0'
-if Version(version=py.version) < Version(version=PY_MIN_VERSION):
-    # There are some generative tests, which won't run on older versions!
-    # XXX These tests should be refactored to be able to be run with older versions of py.
-    py.test.skip('Currently py version %s is needed' % PY_MIN_VERSION)
-
-
-class TestQueryParsing(object):
-    """ search: query parser tests """
-
-    def testQueryParser(self):
-        """ search: test the query parser """
-        parser = QueryParser()
-        for query, wanted in [
-            # Even a single term is a and expression (this is needed for xapian because it
-            # only has AND_NOT, but not a simple NOT).  This is why we have many many brackets here.
-            ("a", '["a"]'),
-            ("-a", '[-"a"]'),
-            ("a b", '["a" "b"]'),
-            ("a -b c", '["a" -"b" "c"]'),
-            ("aaa bbb -ccc", '["aaa" "bbb" -"ccc"]'),
-            ("title:aaa title:bbb -title:ccc", '[title:"aaa" title:"bbb" -title:"ccc"]'),
-            ("title:case:aaa title:re:bbb -title:re:case:ccc", '[title:case:"aaa" title:re:"bbb" -title:re:case:"ccc"]'),
-            ("linkto:aaa", '[linkto:"aaa"]'),
-            ("domain:aaa", '[domain:"aaa"]'),
-            ("re:case:title:aaa", '[title:re:case:"aaa"]'),
-            ("(aaa or bbb) and (ccc or ddd)", '[[[["aaa"] or ["bbb"]]] [[["ccc"] or ["ddd"]]]]'),
-            ("(aaa or bbb) (ccc or ddd)", '[[[["aaa"] or ["bbb"]]] [[["ccc"] or ["ddd"]]]]'),
-            ("aaa or bbb", '[[["aaa"] or ["bbb"]]]'),
-            ("aaa or bbb or ccc", '[[["aaa"] or [[["bbb"] or ["ccc"]]]]]'),
-            ("aaa or bbb and ccc", '[[["aaa"] or ["bbb" "ccc"]]]'),
-            ("aaa and bbb or ccc", '[[["aaa" "bbb"] or ["ccc"]]]'),
-            ("aaa and bbb and ccc", '["aaa" "bbb" "ccc"]'),
-            ("aaa or bbb and ccc or ddd", '[[["aaa"] or [[["bbb" "ccc"] or ["ddd"]]]]]'),
-            ("aaa or bbb ccc or ddd", '[[["aaa"] or [[["bbb" "ccc"] or ["ddd"]]]]]'),
-            ("(HelpOn) (Administration)", '[["HelpOn"] ["Administration"]]'),
-            ("(HelpOn) (-Administration)", '[["HelpOn"] [-"Administration"]]'),
-            ("(HelpOn) and (-Administration)", '[["HelpOn"] [-"Administration"]]'),
-            ("(HelpOn) and (Administration) or (Configuration)", '[[[["HelpOn"] ["Administration"]] or [["Configuration"]]]]'),
-            ("(a) and (b) or (c) or -d", '[[[["a"] ["b"]] or [[[["c"]] or [-"d"]]]]]'),
-            ("a b c d e or f g h", '[[["a" "b" "c" "d" "e"] or ["f" "g" "h"]]]'),
-            ('"no', '[""no"]'),
-            ('no"', '["no""]'),
-            ("'no", "[\"'no\"]"),
-            ("no'", "[\"no'\"]"),
-            ('"no\'', '[""no\'"]')]:
-            result = parser.parse_query(query)
-            assert str(result) == wanted
-
-    def testQueryParserExceptions(self):
-        """ search: test the query parser """
-        parser = QueryParser()
-
-        def _test(q):
-            py.test.raises(QueryError, parser.parse_query, q)
-
-        for query in ['""', '(', ')', '(a or b']:
-            yield _test, query
-
-
-class BaseSearchTest(object):
-    """ search: test search """
-    doesnotexist = u'jfhsdaASDLASKDJ'
-
-    class Config(wikiconfig.Config):
-        load_xml = wikiconfig.Config._test_items_xml
-
-    # key - page name, value - page content. If value is None page
-    # will not be created but will be used for a search. None should
-    # be used for pages which already exist.
-    pages = {u'SearchTestPage': u'this is a test page',
-             u'SearchTestLinks': u'SearchTestPage',
-             u'SearchTestLinksLowerCase': u'searchtestpage',
-             u'SearchTestOtherLinks': u'SearchTestLinks',
-             u'TestEdit': u'TestEdit',
-             u'TestOnEditing': u'another test page',
-             u'ContentSearchUpper': u'Find the NEEDLE in the haystack.',
-             u'ContentSearchLower': u'Find the needle in the haystack.',
-             u'LanguageSetup': None,
-             u'HomePageWiki': None,
-             u'FrontPage': None,
-             u'RecentChanges': None,
-             u'HelpOnCreoleSyntax': None,
-             u'HelpIndex': None,
-            }
-
-    searcher_class = None
-
-    def _index_update(self):
-        pass
-
-    @classmethod
-    def setup_class(cls):
-        pass
-
-    def teardown_class(self):
-        pass
-
-    def setup_method(cls, method):
-        request = cls.request
-        become_trusted()
-
-        for page, text in cls.pages.iteritems():
-            if text:
-                create_item(page, text)
-
-    def get_searcher(self, query):
-        raise NotImplementedError
-
-    def search(self, query):
-        if isinstance(query, str) or isinstance(query, unicode):
-            query = QueryParser().parse_query(query)
-
-        return self.get_searcher(query).run()
-
-    def test_title_search_simple(self):
-        searches = {u'title:SearchTestPage': 1,
-                    u'title:LanguageSetup': 1,
-                    u'title:HelpIndex': 1,
-                    u'title:Help': 2,
-                    u'title:TestOn': 1,
-                    u'title:SearchTestNotExisting': 0,
-                    u'title:FrontPage': 1,
-                    u'title:TestOnEditing': 1,
-                   }
-
-        def test(query, res_count):
-            result = self.search(query)
-            test_result = len(result.hits)
-            assert test_result == res_count
-
-        for query, res_count in searches.iteritems():
-            yield query, test, query, res_count
-
-    def test_title_search_re(self):
-        expected_pages = set([u'SearchTestPage', u'SearchTestLinks', u'SearchTestLinksLowerCase', u'SearchTestOtherLinks', ])
-        result = self.search(ur'-domain:underlay -domain:system title:re:\bSearchTest')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-        result = self.search(ur'-domain:underlay -domain:system title:re:\bSearchTest\b')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert not found_pages
-
-    def test_title_search_case(self):
-        expected_pages = set([u'SearchTestPage', ])
-        result = self.search(u'-domain:underlay -domain:system title:case:SearchTestPage')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-        result = self.search(u'-domain:underlay -domain:system title:case:searchtestpage')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert not found_pages
-
-    def test_title_search_case_re(self):
-        expected_pages = set([u'SearchTestPage', ])
-        result = self.search(ur'-domain:underlay -domain:system title:case:re:\bSearchTestPage\b')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-        result = self.search(ur'-domain:underlay -domain:system title:case:re:\bsearchtestpage\b')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert not found_pages
-
-    def test_linkto_search_simple(self):
-        expected_pages = set([u'SearchTestLinks', ])
-        result = self.search(u'-domain:underlay -domain:system linkto:SearchTestPage')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-        result = self.search(u'-domain:underlay -domain:system linkto:SearchTestNotExisting')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert not found_pages
-
-    def test_linkto_search_re(self):
-        expected_pages = set([u'SearchTestLinks', u'SearchTestOtherLinks', ])
-        result = self.search(ur'-domain:underlay -domain:system linkto:re:\bSearchTest')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-        result = self.search(ur'-domain:underlay -domain:system linkto:re:\bSearchTest\b')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert not found_pages
-
-    def test_linkto_search_case(self):
-        expected_pages = set([u'SearchTestLinks', ])
-        result = self.search(u'-domain:underlay -domain:system linkto:case:SearchTestPage')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-        result = self.search(u'-domain:underlay -domain:system linkto:case:searchtestpage')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert not found_pages
-
-    def test_linkto_search_case_re(self):
-        expected_pages = set([u'SearchTestLinks', ])
-        result = self.search(ur'-domain:underlay -domain:system linkto:case:re:\bSearchTestPage\b')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-        result = self.search(ur'-domain:underlay -domain:system linkto:case:re:\bsearchtestpage\b')
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert not found_pages
-
-    def test_mimetype_search_simple(self):
-        result = self.search(u'mimetype:text/wiki')
-        test_result = len(result.hits)
-        assert test_result == 14
-
-    def test_mimetype_search_re(self):
-        result = self.search(ur'mimetype:re:\btext/wiki\b')
-        test_result = len(result.hits)
-        assert test_result == 14
-
-    def test_language_search_simple(self):
-        result = self.search(u'language:en')
-        test_result = len(result.hits)
-        assert test_result == 14
-
-    def test_domain_search_simple(self):
-        result = self.search(u'domain:system')
-        assert result.hits
-
-    def test_search_and(self):
-        """ search: title search with AND expression """
-        expected_pages = set([u'HelpOnCreoleSyntax', ])
-        result = self.search(u"title:HelpOnCreoleSyntax lang:en")
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-        result = self.search(u"title:HelpOnCreoleSyntax lang:de")
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert not found_pages
-
-        result = self.search(u"title:Help title:%s" % self.doesnotexist)
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert not found_pages
-
-    def testTitleSearchOR(self):
-        """ search: title search with OR expression """
-        expected_pages = set([u'FrontPage', u'RecentChanges', ])
-        result = self.search(u"title:FrontPage or title:RecentChanges")
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-    def testTitleSearchNegatedFindAll(self):
-        """ search: negated title search for some pagename that does not exist results in all pagenames """
-        result = self.search(u"-title:%s" % self.doesnotexist)
-        n_pages = len(self.pages)
-        test_result = len(result.hits)
-        assert test_result == n_pages
-
-    def testTitleSearchNegativeTerm(self):
-        """ search: title search for a AND expression with a negative term """
-        result = self.search(u"-title:FrontPage")
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert u'FrontPage' not in found_pages
-        test_result = len(result.hits)
-        n_pages = len(self.pages) - 1
-        assert test_result == n_pages
-
-        result = self.search(u"-title:HelpOn")
-        test_result = len(result.hits)
-        n_pages = len(self.pages) - 1
-        assert test_result == n_pages
-
-    def testFullSearchNegatedFindAll(self):
-        """ search: negated full search for some string that does not exist results in all pages """
-        result = self.search(u"-%s" % self.doesnotexist)
-        test_result = len(result.hits)
-        n_pages = len(self.pages)
-        assert test_result == n_pages
-
-    def testFullSearchRegexCaseInsensitive(self):
-        """ search: full search for regular expression (case insensitive) """
-        search_re = 'ne{2}dle' # matches 'NEEDLE' or 'needle' or ...
-        expected_pages = set(['ContentSearchUpper', 'ContentSearchLower', ])
-        result = self.search(u'-domain:underlay -domain:system re:%s' % search_re)
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-    def testFullSearchRegexCaseSensitive(self):
-        """ search: full search for regular expression (case sensitive) """
-        search_re = 'ne{2}dle' # matches 'needle'
-        expected_pages = set(['ContentSearchLower', ])
-        result = self.search(u'-domain:underlay -domain:system re:case:%s' % search_re)
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-    def testFullSearchNegativeTerm(self):
-        """ search: full search for a AND expression with a negative term """
-        helpon_count = len(self.search(u"HelpOn").hits)
-        result = self.search(u"HelpOn -Thumbnails")
-        assert 0 < len(result.hits) < helpon_count
-
-    def test_title_search(self):
-        expected_pages = set(['FrontPage', ])
-        query = QueryParser(titlesearch=True).parse_query('FrontPage')
-        result = self.search(query)
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-    def test_get_searcher(self):
-        assert isinstance(_get_searcher(self.request, ''), self.searcher_class)
-
-
-class TestMoinSearch(BaseSearchTest):
-    """ search: test Moin search """
-    searcher_class = MoinSearch
-
-    def get_searcher(self, query):
-        pages = [{'pagename': page, 'attachment': '', 'wikiname': 'Self', } for page in self.pages]
-        return MoinSearch(self.request, query, pages=pages)
-
-    def test_stemming(self):
-        expected_pages = set([u'TestEdit', u'TestOnEditing', ])
-        result = self.search(u"title:edit")
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-        expected_pages = set([u'TestOnEditing', ])
-        result = self.search(u"title:editing")
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-
-class TestXapianSearch(BaseSearchTest):
-    """ search: test Xapian indexing / search """
-
-    class Config(wikiconfig.Config):
-        xapian_search = True
-
-    def _index_update(self):
-        # for xapian, we queue index updates so they can get indexed later.
-        # here we make sure the queue will be processed completely,
-        # before we continue:
-        from MoinMoin.search.Xapian import XapianIndex
-        XapianIndex(self.request).do_queued_updates()
-
-    def get_searcher(self, query):
-        from MoinMoin.search.Xapian.search import XapianSearch
-        return XapianSearch(self.request, query)
-
-    def get_moin_search_connection(self):
-        from MoinMoin.search.Xapian import XapianIndex
-        return XapianIndex(self.request).get_search_connection()
-
-    def setup_class(self):
-        py.test.skip("xapian tests broken")
-        try:
-            from MoinMoin.search.Xapian import XapianIndex
-            from MoinMoin.search.Xapian.search import XapianSearch
-            self.searcher_class = XapianSearch
-
-        except ImportError, error:
-            if not str(error).startswith('Xapian '):
-                raise
-            py.test.skip('xapian is not installed')
-
-        nuke_xapian_index()
-        index = XapianIndex(self.request)
-        # Additionally, pages which were not created but supposed to be searched
-        # are indexed.
-        pages_to_index = [page for page in self.pages if not self.pages[page]]
-        index.indexPages(mode='add', pages=pages_to_index)
-
-        super(TestXapianSearch, self).setup_class()
-
-    def teardown_class(self):
-        nuke_xapian_index()
-
-    def test_get_all_documents(self):
-        connection = self.get_moin_search_connection()
-        documents = connection.get_all_documents()
-        n_pages = len(self.pages)
-        test_result = len(documents)
-        assert test_result == n_pages
-        for document in documents:
-            assert document.data['pagename'][0] in self.pages.keys()
-
-    def test_xapian_term(self):
-        parser = QueryParser()
-        connection = self.get_moin_search_connection()
-
-        prefixes = {u'': ([u'', u're:', u'case:', u'case:re:'], u'SearchTestPage'),
-                    u'title:': ([u'', u're:', u'case:', u'case:re:'], u'SearchTestPage'),
-                    u'linkto:': ([u'', u're:', u'case:', u'case:re:'], u'FrontPage'),
-                    u'mimetype:': ([u'', u're:'], u'text/wiki'),
-                    u'language:': ([u''], u'en'),
-                    u'domain:': ([u''], u'system'),
-                   }
-
-        def test_query(query):
-            query_ = parser.parse_query(query).xapian_term(self.request, connection)
-            print str(query_)
-            assert not query_.empty()
-
-        for prefix, data in prefixes.iteritems():
-            modifiers, term = data
-            for modifier in modifiers:
-                query = ''.join([prefix, modifier, term])
-                yield query, test_query, query
-
-    def test_stemming(self):
-        expected_pages = set([u'TestEdit', ])
-        result = self.search(u"title:edit")
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-        expected_pages = set([u'TestOnEditing', ])
-        result = self.search(u"title:editing")
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-
-class TestXapianSearchStemmed(TestXapianSearch):
-    """ search: test Xapian indexing / search - with stemming enabled """
-
-    class Config(wikiconfig.Config):
-        xapian_search = True
-        xapian_stemming = True
-
-    def test_stemming(self):
-        expected_pages = set([u'TestEdit', u'TestOnEditing', ])
-        result = self.search(u"title:edit")
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-        expected_pages = set([u'TestEdit', u'TestOnEditing', ])
-        result = self.search(u"title:editing")
-        found_pages = set([hit.page_name for hit in result.hits])
-        assert found_pages == expected_pages
-
-
-class TestGetSearcher(object):
-
-    class Config(wikiconfig.Config):
-        xapian_search = True
-
-    def test_get_searcher(self):
-        assert isinstance(_get_searcher(self.request, ''), MoinSearch), 'Xapian index is not created, despite the configuration, MoinSearch must be used!'
-
-coverage_modules = ['MoinMoin.search']
-
--- a/MoinMoin/search/_tests/test_wiki_analyzer.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,111 +0,0 @@
-# Copyright: 2009 MoinMoin:DmitrijsMilajevs
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - MoinMoin.search.Xapian.tokenizer Tests
-"""
-
-
-import py
-
-from flask import current_app as app
-
-from MoinMoin._tests import wikiconfig
-
-try:
-    from MoinMoin.search.Xapian.tokenizer import WikiAnalyzer
-except ImportError:
-    py.test.skip('xapian is not installed')
-
-class TestWikiAnalyzer(object):
-
-    word = u'HelpOnMoinTesting'
-    words = {word.lower(): u'',
-             u'help': u'',
-             u'on': u'',
-             u'moin': u'',
-             u'testing': u''}
-
-    def setup_class(self):
-        self.analyzer = WikiAnalyzer(language=app.cfg.language_default)
-
-    def test_tokenize(self):
-        words = self.words
-        tokens = list(self.analyzer.tokenize(self.word))
-
-        assert len(tokens) == len(words)
-
-        for token, stemmed in tokens:
-            assert token in words
-            assert words[token] == stemmed
-
-
-class TestWikiAnalyzerStemmed(TestWikiAnalyzer):
-
-    word = u'HelpOnMoinTesting'
-    words = {word.lower(): u'helponmointest',
-             u'help': u'',
-             u'on': u'',
-             u'moin': u'',
-             u'testing': u'test'}
-
-    class Config(wikiconfig.Config):
-
-        xapian_stemming = True
-
-
-class TestWikiAnalyzerSeveralWords(TestWikiAnalyzer):
-
-    word = u'HelpOnMoinTesting OtherWikiWord'
-    words = {u'helponmointesting': u'',
-             u'help': u'',
-             u'on': u'',
-             u'moin': u'',
-             u'testing': u'',
-             u'otherwikiword': u'',
-             u'other': u'',
-             u'wiki': u'',
-             u'word': u''}
-
-
-class TestWikiAnalyzerStemmedSeveralWords(TestWikiAnalyzer):
-
-    word = u'HelpOnMoinTesting OtherWikiWord'
-    words = {u'helponmointesting': u'helponmointest',
-             u'help': u'',
-             u'on': u'',
-             u'moin': u'',
-             u'testing': u'test',
-             u'otherwikiword': u'',
-             u'other': u'',
-             u'wiki': u'',
-             u'word': u''}
-
-    class Config(wikiconfig.Config):
-
-        xapian_stemming = True
-
-
-class TestWikiAnalyzerStemmedHelpOnEditing(TestWikiAnalyzer):
-
-    word = u'HelpOnEditing'
-    words = {u'helponediting': u'helponedit',
-             u'help': u'',
-             u'on': u'',
-             u'editing': u'edit'}
-
-    class Config(wikiconfig.Config):
-
-        xapian_stemming = True
-
-
-class TestWikiAnalyzerStemmedCategoryHomepage(TestWikiAnalyzer):
-
-    word = u'CategoryHomepage'
-    words = {u'categoryhomepage': u'categoryhomepag',
-             u'category': u'categori',
-             u'homepage': u'homepag'}
-
-    class Config(wikiconfig.Config):
-
-        xapian_stemming = True
--- a/MoinMoin/search/builtin.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,390 +0,0 @@
-# Copyright: 2005 MoinMoin:FlorianFesti
-# Copyright: 2005 MoinMoin:NirSoffer
-# Copyright: 2005 MoinMoin:AlexanderSchremmer
-# Copyright: 2006-2009 MoinMoin:ThomasWaldmann
-# Copyright: 2006 MoinMoin:FranzPletz
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - search engine internals
-"""
-
-
-import sys, os, time, errno, codecs
-
-from MoinMoin import log
-logging = log.getLogger(__name__)
-
-from flask import current_app as app
-
-from flask import flaskg
-
-from MoinMoin import wikiutil, config
-from MoinMoin.util import lock, filesys
-from MoinMoin.search.results import getSearchResults, Match, TextMatch, TitleMatch, getSearchResults
-
-##############################################################################
-# Search Engine Abstraction
-##############################################################################
-
-
-class IndexerQueue(object):
-    """
-    Represents a locked on-disk queue with jobs for the xapian indexer
-
-    Each job is a tuple like: (PAGENAME, ATTACHMENTNAME, REVNO)::
-
-        PAGENAME: page name (unicode)
-        ATTACHMENTNAME: attachment name (unicode) or None (for pages)
-        REVNO: revision number (int) - meaning "look at that revision",
-               or None - meaning "look at all revisions"
-    """
-
-    def __init__(self, request, xapian_dir, queuename, timeout=10.0):
-        """
-        :param request: request object
-        :param xapian_dir: the xapian main directory
-        :param queuename: name of the queue (used for caching key)
-        :param timeout: lock acquire timeout
-        """
-        self.request = request
-        self.xapian_dir = xapian_dir
-        self.queuename = queuename
-        self.timeout = timeout
-
-    def get_cache(self, locking):
-        return caching.CacheEntry(self.xapian_dir, self.queuename,
-                                  scope='dir', use_pickle=True, do_locking=locking)
-
-    def _queue(self, cache):
-        try:
-            queue = cache.content()
-        except caching.CacheError:
-            # likely nothing there yet
-            queue = []
-        return queue
-
-    def put(self, pagename, attachmentname=None, revno=None):
-        """ Put an entry into the queue (append at end)
-
-        :param pagename: page name [unicode]
-        :param attachmentname: attachment name [unicode]
-        :param revno: revision number (int) or None (all revs)
-        """
-        cache = self.get_cache(locking=False) # we lock manually
-        cache.lock('w', 60.0)
-        try:
-            queue = self._queue(cache)
-            entry = (pagename, attachmentname, revno)
-            queue.append(entry)
-            cache.update(queue)
-        finally:
-            cache.unlock()
-
-    def get(self):
-        """ Get (and remove) first entry from the queue
-
-        Raises IndexError if queue was empty when calling get().
-        """
-        cache = self.get_cache(locking=False) # we lock manually
-        cache.lock('w', 60.0)
-        try:
-            queue = self._queue(cache)
-            entry = queue.pop(0)
-            cache.update(queue)
-        finally:
-            cache.unlock()
-        return entry
-
-
-class BaseIndex(object):
-    """ Represents a search engine index """
-
-    def __init__(self, request):
-        """
-        :param request: current request
-        """
-        self.request = request
-        self.main_dir = self._main_dir()
-        if not os.path.exists(self.main_dir):
-            os.makedirs(self.main_dir)
-        self.update_queue = IndexerQueue(request, self.main_dir, 'indexer-queue')
-
-    def _main_dir(self):
-        raise NotImplemented('...')
-
-    def exists(self):
-        """ Check if index exists """
-        raise NotImplemented('...')
-
-    def mtime(self):
-        """ Modification time of the index """
-        raise NotImplemented('...')
-
-    def touch(self):
-        """ Touch the index """
-        raise NotImplemented('...')
-
-    def _search(self, query):
-        """ Actually perfom the search
-
-        :param query: the search query objects tree
-        """
-        raise NotImplemented('...')
-
-    def search(self, query, **kw):
-        """ Search for items in the index
-
-        :param query: the search query objects to pass to the index
-        """
-        return self._search(query, **kw)
-
-    def update_item(self, pagename, attachmentname=None, revno=None, now=True):
-        """ Update a single item (page or attachment) in the index
-
-        :param pagename: the name of the page to update
-        :param attachmentname: the name of the attachment to update
-        :param revno: a specific revision number (int) or None (all revs)
-        :param now: do all updates now (default: True)
-        """
-        self.update_queue.put(pagename, attachmentname, revno)
-        if now:
-            self.do_queued_updates()
-
-    def indexPages(self, files=None, mode='update', pages=None):
-        """ Index pages (and files, if given)
-
-        :param files: iterator or list of files to index additionally
-        :param mode: set the mode of indexing the pages, either 'update' or 'add'
-        :param pages: list of pages to index, if not given, all pages are indexed
-        """
-        start = time.time()
-        request = self._indexingRequest(self.request)
-        self._index_pages(request, files, mode, pages=pages)
-        logging.info("indexing completed successfully in %0.2f seconds." %
-                    (time.time() - start))
-
-    def _index_pages(self, request, files=None, mode='update', pages=None):
-        """ Index all pages (and all given files)
-
-        This should be called from indexPages only!
-
-        :param request: current request
-        :param files: iterator or list of files to index additionally
-        :param mode: set the mode of indexing the pages, either 'update' or 'add'
-        :param pages: list of pages to index, if not given, all pages are indexed
-
-        """
-        raise NotImplemented('...')
-
-    def do_queued_updates(self, amount=-1):
-        """ Perform updates in the queues
-
-        :param request: the current request
-        :keyword amount: how many updates to perform at once (default: -1 == all)
-        """
-        raise NotImplemented('...')
-
-    def optimize(self):
-        """ Optimize the index if possible """
-        raise NotImplemented('...')
-
-    def contentfilter(self, filename):
-        """ Get a filter for content of filename and return unicode content.
-
-        :param filename: name of the file
-        """
-        mt = wikiutil.MimeType(filename=filename)
-        return mt.mime_type(), u'not implemented' # XXX see moin 1.9 code about how it was done there
-
-    def _indexingRequest(self, request):
-        """ Return a new request that can be used for index building.
-
-        This request uses a security policy that lets the current user
-        read any page. Without this policy some pages will not render,
-        which will create broken pagelinks index.
-
-        :param request: current request
-        """
-        import copy
-        from MoinMoin.security import Permissions
-
-        class SecurityPolicy(Permissions):
-
-            def read(self, *args, **kw):
-                return True
-
-        r = copy.copy(request)
-        r.user.may = SecurityPolicy(r.user) # XXX
-        return r
-
-
-##############################################################################
-### Searching
-##############################################################################
-
-
-class BaseSearch(object):
-    """ A search run """
-
-    def __init__(self, request, query, sort='weight', mtime=None, historysearch=0):
-        """
-        :param request: current request
-        :param query: search query objects tree
-        :keyword sort: the sorting of the results (default: 'weight')
-        :keyword mtime: only show items newer than this timestamp (default: None)
-        :keyword historysearch: whether to show old revisions of a page (default: 0)
-        """
-        self.request = request
-        self.query = query
-        self.sort = sort
-        self.mtime = mtime
-        self.historysearch = historysearch
-        self.filtered = False
-        self.fs_rootpage = "FS" # XXX FS hardcoded
-
-    def run(self):
-        """ Perform search and return results object """
-
-        start = time.time()
-        hits, estimated_hits = self._search()
-
-        # important - filter pages the user may not read!
-        if not self.filtered:
-            hits = self._filter(hits)
-            logging.debug("after filtering: %d hits" % len(hits))
-
-        return self._get_search_results(hits, start, estimated_hits)
-
-    def _search(self):
-        """
-        Search pages.
-
-        Return list of tuples (wikiname, page object, attachment,
-        matches, revision) and estimated number of search results (if
-        there is no estimate, None should be returned).
-
-        The list may contain deleted pages or pages the user may not read.
-        """
-        raise NotImplementedError()
-
-    def _filter(self, hits):
-        """
-        Filter out deleted or acl protected pages
-
-        :param hits: list of hits
-        """
-        userMayRead = flaskg.user.may.read
-        fs_rootpage = self.fs_rootpage + "/"
-        thiswiki = (app.cfg.interwikiname, 'Self')
-        filtered = [(wikiname, page, attachment, match, rev)
-                for wikiname, page, attachment, match, rev in hits
-                    if (not wikiname in thiswiki or
-                       page.exists() and userMayRead(page.page_name) or
-                       page.page_name.startswith(fs_rootpage)) and
-                       (not self.mtime or self.mtime <= page.mtime_usecs()/1000000)]
-        return filtered
-
-    def _get_search_results(self, hits, start, estimated_hits):
-        return getSearchResults(self.request, self.query, hits, start, self.sort, estimated_hits)
-
-    def _get_match(self, page=None, uid=None):
-        """
-        Get all matches
-
-        :param page: the current page instance
-        """
-        if page:
-            return self.query.search(page)
-
-    def _getHits(self, pages):
-        """ Get the hit tuples in pages through _get_match """
-        logging.debug("_getHits searching in %d pages ..." % len(pages))
-        from MoinMoin.Page import Page
-        hits = []
-        revisionCache = {}
-        fs_rootpage = self.fs_rootpage
-        for hit in pages:
-
-            uid = hit.get('uid')
-            wikiname = hit['wikiname']
-            pagename = hit['pagename']
-            attachment = hit['attachment']
-            revision = int(hit.get('revision', 0))
-
-            logging.debug("_getHits processing %r %r %d %r" % (wikiname, pagename, revision, attachment))
-
-            if wikiname in (app.cfg.interwikiname, 'Self'): # THIS wiki
-                page = Page(self.request, pagename, rev=revision)
-
-                if not self.historysearch and revision:
-                    revlist = page.getRevList()
-                    # revlist can be empty if page was nuked/renamed since it was included in xapian index
-                    if not revlist or revlist[0] != revision:
-                        # nothing there at all or not the current revision
-                        logging.debug("no history search, skipping non-current revision...")
-                        continue
-
-                if attachment:
-                    # revision currently is 0 ever
-                    if pagename == fs_rootpage: # not really an attachment
-                        page = Page(self.request, "%s/%s" % (fs_rootpage, attachment))
-                        hits.append((wikiname, page, None, None, revision))
-                    else:
-                        matches = self._get_match(page=None, uid=uid)
-                        hits.append((wikiname, page, attachment, matches, revision))
-                else:
-                    matches = self._get_match(page=page, uid=uid)
-                    logging.debug("self._get_match %r" % matches)
-                    if matches:
-                        if not self.historysearch and pagename in revisionCache and revisionCache[pagename][0] < revision:
-                            hits.remove(revisionCache[pagename][1])
-                            del revisionCache[pagename]
-                        hits.append((wikiname, page, attachment, matches, revision))
-                        revisionCache[pagename] = (revision, hits[-1])
-
-            else: # other wiki
-                hits.append((wikiname, pagename, attachment, None, revision))
-        logging.debug("_getHits returning %r." % hits)
-        return hits
-
-
-class MoinSearch(BaseSearch):
-
-    def __init__(self, request, query, sort='weight', mtime=None, historysearch=0, pages=None):
-        super(MoinSearch, self).__init__(request, query, sort, mtime, historysearch)
-
-        self.pages = pages
-
-    def _search(self):
-        """
-        Search pages using moin's built-in full text search
-
-        The list may contain deleted pages or pages the user may not
-        read.
-
-        if self.pages is not None, searches in that pages.
-        """
-        # if self.pages is none, we make a full pagelist, but don't
-        # search attachments (thus attachment name = '')
-        pages = self.pages or [{'pagename': p, 'attachment': '', 'wikiname': 'Self', } for p in self._getPageList()]
-
-        hits = self._getHits(pages)
-        return hits, None
-
-    def _getPageList(self):
-        """ Get list of pages to search in
-
-        If the query has a page filter, use it to filter pages before
-        searching. If not, get a unfiltered page list. The filtering
-        will happen later on the hits, which is faster with current
-        slow storage.
-        """
-        filter_ = self.query.pageFilter()
-        if filter_:
-            # There is no need to filter the results again.
-            self.filtered = True
-            return self.request.rootpage.getPageList(filter=filter_)
-        else:
-            return self.request.rootpage.getPageList(user='')
-
--- a/MoinMoin/search/queryparser/__init__.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,160 +0,0 @@
-# Copyright: 2005 MoinMoin:FlorianFesti
-# Copyright: 2005 MoinMoin:NirSoffer
-# Copyright: 2005 MoinMoin:AlexanderSchremmer
-# Copyright: 2006-2008 MoinMoin:ThomasWaldmann
-# Copyright: 2006 MoinMoin:FranzPletz
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - search query parser
-"""
-
-
-import re
-
-from MoinMoin import log
-logging = log.getLogger(__name__)
-
-from MoinMoin import config
-from MoinMoin.util.paramparser import parse_quoted_separated_ext, ParserPrefix, BracketError
-from MoinMoin.search.queryparser.expressions import AndExpression, OrExpression, TextSearch, TitleSearch, \
-    LinkSearch, DomainSearch, MimetypeSearch, LanguageSearch
-
-class QueryError(ValueError):
-    """ error raised for problems when parsing the query """
-
-
-class QueryParser(object):
-    """
-    Converts a String into a tree of Query objects.
-    """
-
-    def __init__(self, **kw):
-        """
-        :keyword titlesearch: treat all terms as title searches
-        :keyword case: do case sensitive search
-        :keyword regex: treat all terms as regular expressions
-        """
-        self.titlesearch = kw.get('titlesearch', 0)
-        self.case = kw.get('case', 0)
-        self.regex = kw.get('regex', 0)
-        self._M = ParserPrefix('-')
-
-    def _analyse_items(self, items):
-        terms = AndExpression()
-        M = self._M
-        while items:
-            item = items[0]
-            items = items[1:]
-
-            if isinstance(item, unicode):
-                if item.lower() == 'or':
-                    sub = terms.subterms()
-                    if len(sub) >= 1:
-                        last = sub[-1]
-                        if last.__class__ == OrExpression:
-                            orexpr = last
-                        else:
-                            # Note: do NOT reduce "terms" when it has a single subterm only!
-                            # Doing that would break "-someterm" searches as we rely on AndExpression
-                            # doing a "MatchAll AND_NOT someterm" for that case!
-                            orexpr = OrExpression(terms)
-                        terms = AndExpression(orexpr)
-                    else:
-                        raise QueryError('Nothing to OR')
-                    remaining = self._analyse_items(items)
-                    if remaining.__class__ == OrExpression:
-                        for sub in remaining.subterms():
-                            orexpr.append(sub)
-                    else:
-                        orexpr.append(remaining)
-                    break
-                elif item.lower() == 'and':
-                    pass
-                else:
-                    # odd workaround; we should instead ignore this term
-                    # and reject expressions that contain nothing after
-                    # being parsed rather than rejecting an empty string
-                    # before parsing...
-                    if not item:
-                        raise QueryError("Term too short")
-                    regex = self.regex
-                    case = self.case
-                    if self.titlesearch:
-                        terms.append(TitleSearch(item, use_re=regex, case=case))
-                    else:
-                        terms.append(TextSearch(item, use_re=regex, case=case))
-            elif isinstance(item, tuple):
-                negate = item[0] == M
-                title_search = self.titlesearch
-                regex = self.regex
-                case = self.case
-                linkto = False
-                lang = False
-                mimetype = False
-                domain = False
-                while len(item) > 1:
-                    m = item[0]
-                    if m is None:
-                        raise QueryError("Invalid search prefix")
-                    elif m == M:
-                        negate = True
-                    elif "title".startswith(m):
-                        title_search = True
-                    elif "regex".startswith(m):
-                        regex = True
-                    elif "case".startswith(m):
-                        case = True
-                    elif "linkto".startswith(m):
-                        linkto = True
-                    elif "language".startswith(m):
-                        lang = True
-                    elif "mimetype".startswith(m):
-                        mimetype = True
-                    elif "domain".startswith(m):
-                        domain = True
-                    else:
-                        raise QueryError("Invalid search prefix")
-                    item = item[1:]
-
-                text = item[0]
-                if mimetype:
-                    obj = MimetypeSearch(text, use_re=regex, case=False)
-                elif lang:
-                    obj = LanguageSearch(text, use_re=regex, case=False)
-                elif linkto:
-                    obj = LinkSearch(text, use_re=regex, case=case)
-                elif domain:
-                    obj = DomainSearch(text, use_re=regex, case=False)
-                elif title_search:
-                    obj = TitleSearch(text, use_re=regex, case=case)
-                else:
-                    obj = TextSearch(text, use_re=regex, case=case)
-                obj.negated = negate
-                terms.append(obj)
-            elif isinstance(item, list):
-                # strip off the opening parenthesis
-                terms.append(self._analyse_items(item[1:]))
-
-        # Note: do NOT reduce "terms" when it has a single subterm only!
-        # Doing that would break "-someterm" searches as we rely on AndExpression
-        # doing a "MatchAll AND_NOT someterm" for that case!
-        return terms
-
-    def parse_query(self, query):
-        """ transform an string into a tree of Query objects """
-        if isinstance(query, str):
-            query = query.decode(config.charset)
-        try:
-            items = parse_quoted_separated_ext(query,
-                                               name_value_separator=':',
-                                               prefixes='-',
-                                               multikey=True,
-                                               brackets=('()', ),
-                                               quotes='\'"')
-        except BracketError, err:
-            raise QueryError(str(err))
-        logging.debug("parse_quoted_separated items: %r" % items)
-        query = self._analyse_items(items)
-        logging.debug("analyse_items query: %r" % query)
-        return query
--- a/MoinMoin/search/queryparser/expressions.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,563 +0,0 @@
-# Copyright: 2005 MoinMoin:FlorianFesti
-# Copyright: 2005 MoinMoin:NirSoffer
-# Copyright: 2005 MoinMoin:AlexanderSchremmer
-# Copyright: 2006-2008 MoinMoin:ThomasWaldmann
-# Copyright: 2006 MoinMoin:FranzPletz
-# Copyright: 2009 MoinMoin:DmitrijsMilajevs
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - search query expressions
-"""
-
-
-import re
-
-from MoinMoin import log
-logging = log.getLogger(__name__)
-
-from flask import current_app as app
-
-from MoinMoin import config, wikiutil
-from MoinMoin.search.results import Match, TitleMatch, TextMatch
-
-
-class BaseExpression(object):
-    """ Base class for all search terms """
-
-    # costs is estimated time to calculate this term.
-    # Number is relative to other terms and has no real unit.
-    # It allows to do the fast searches first.
-    costs = 0
-    _tag = ""
-
-    def __init__(self, pattern, use_re=False, case=False):
-        """ Init a text search
-
-        :param pattern: pattern to search for, ascii string or unicode
-        :param use_re: treat pattern as re of plain text, bool
-        :param case: do case sensitive search, bool
-        """
-        self._pattern = unicode(pattern)
-        self.negated = 0
-        self.use_re = use_re
-        self.case = case
-
-        if use_re:
-            self._tag += 're:'
-        if case:
-            self._tag += 'case:'
-
-        self.pattern, self.search_re = self._build_re(self._pattern, use_re=use_re, case=case)
-
-    def __str__(self):
-        return unicode(self).encode(config.charset, 'replace')
-
-    def negate(self):
-        """ Negate the result of this term """
-        self.negated = 1
-
-    def pageFilter(self):
-        """ Return a page filtering function
-
-        This function is used to filter page list before we search
-        it. Return a function that get a page name, and return bool.
-
-        The default expression does not have any filter function and
-        return None. Sub class may define custom filter functions.
-        """
-        return None
-
-    def _get_matches(self, page):
-        raise NotImplementedError
-
-    def search(self, page):
-        """ Search a page
-
-        Returns a list of Match objects or None if term didn't find
-        anything (vice versa if negate() was called).  Terms containing
-        other terms must call this method to aggregate the results.
-        This Base class returns True (Match()) if not negated.
-        """
-        logging.debug("%s searching page %r for (negated = %r) %r" % (self.__class__, page.page_name, self.negated, self._pattern))
-
-        matches = self._get_matches(page)
-
-        # Decide what to do with the results.
-        if self.negated:
-            if matches:
-                result = None
-            else:
-                result = [Match()] # represents "matched" (but as it was a negative match, we have nothing to show)
-        else: # not negated
-            if matches:
-                result = matches
-            else:
-                result = None
-        logging.debug("%s returning %r" % (self.__class__, result))
-        return result
-
-    def highlight_re(self):
-        """ Return a regular expression of what the term searches for
-
-        Used to display the needle in the page.
-        """
-        return u''
-
-    def _build_re(self, pattern, use_re=False, case=False, stemmed=False):
-        """ Make a regular expression out of a text pattern """
-        flags = case and re.U or (re.I | re.U)
-
-        try:
-            search_re = re.compile(pattern, flags)
-        except re.error:
-            pattern = re.escape(pattern)
-            search_re = re.compile(pattern, flags)
-
-        return pattern, search_re
-
-    def _get_query_for_search_re(self, connection, field_to_check=None):
-        """
-        Return a query which satisfy self.search_re for field values.
-        If field_to_check is given check values only for that field.
-        """
-        from MoinMoin.search.Xapian import Query
-
-        queries = []
-
-        documents = connection.get_all_documents()
-        for document in documents:
-            data = document.data
-            if field_to_check:
-                # Check only field with given name
-                if field_to_check in data:
-                    for term in data[field_to_check]:
-                        if self.search_re.match(term):
-                            queries.append(connection.query_field(field_to_check, term))
-            else:
-                # Check all fields
-                for field, terms in data.iteritems():
-                    for term in terms:
-                        if self.search_re.match(term):
-                            queries.append(connection.query_field(field_to_check, term))
-
-        return Query(Query.OP_OR, queries)
-
-    def xapian_need_postproc(self):
-        return self.case
-
-    def __unicode__(self):
-        neg = self.negated and '-' or ''
-        return u'%s%s"%s"' % (neg, self._tag, unicode(self._pattern))
-
-
-class AndExpression(BaseExpression):
-    """ A term connecting several sub terms with a logical AND """
-
-    operator = ' '
-
-    def __init__(self, *terms):
-        self._subterms = list(terms)
-        self.negated = 0
-
-    def append(self, expression):
-        """ Append another term """
-        self._subterms.append(expression)
-
-    def subterms(self):
-        return self._subterms
-
-    @property
-    def costs(self):
-        return sum([t.costs for t in self._subterms])
-
-    def __unicode__(self):
-        result = ''
-        for t in self._subterms:
-            result += self.operator + unicode(t)
-        return u'[' + result[len(self.operator):] + u']'
-
-    def _filter(self, terms, name):
-        """ A function that returns True if all terms filter name """
-        result = None
-        for term in terms:
-            _filter = term.pageFilter()
-            t = _filter(name)
-            if t is True:
-                result = True
-            elif t is False:
-                result = False
-                break
-        logging.debug("pageFilter AND returns %r" % result)
-        return result
-
-    def pageFilter(self):
-        """ Return a page filtering function
-
-        This function is used to filter page list before we search it.
-
-        Return a function that gets a page name, and return bool, or None.
-        """
-        # Sort terms by cost, then get all title searches
-        self.sortByCost()
-        terms = [term for term in self._subterms if isinstance(term, TitleSearch)]
-        if terms:
-            return lambda name: self._filter(terms, name)
-
-    def sortByCost(self):
-        self._subterms.sort(key=lambda t: t.costs)
-
-    def search(self, page):
-        """ Search for each term, cheap searches first """
-        self.sortByCost()
-        matches = []
-        for term in self._subterms:
-            result = term.search(page)
-            if not result:
-                return None
-            matches.extend(result)
-        return matches
-
-    def highlight_re(self):
-        result = []
-        for s in self._subterms:
-            highlight_re = s.highlight_re()
-            if highlight_re:
-                result.append(highlight_re)
-
-        return u'|'.join(result)
-
-    def xapian_need_postproc(self):
-        for term in self._subterms:
-            if term.xapian_need_postproc():
-                return True
-        return False
-
-    def xapian_term(self, request, connection):
-        from MoinMoin.search.Xapian import Query
-
-        # sort negated terms
-        terms = []
-        not_terms = []
-
-        for term in self._subterms:
-            if not term.negated:
-                terms.append(term.xapian_term(request, connection))
-            else:
-                not_terms.append(term.xapian_term(request, connection))
-
-        # prepare query for not negated terms
-        if terms:
-            query = Query(Query.OP_AND, terms)
-        else:
-            query = Query('') # MatchAll
-
-        # prepare query for negated terms
-        if not_terms:
-            query_negated = Query(Query.OP_OR, not_terms)
-        else:
-            query_negated = Query()
-
-        return Query(Query.OP_AND_NOT, query, query_negated)
-
-
-class OrExpression(AndExpression):
-    """ A term connecting several sub terms with a logical OR """
-
-    operator = ' or '
-
-    def _filter(self, terms, name):
-        """ A function that returns True if any term filters name """
-        result = None
-        for term in terms:
-            _filter = term.pageFilter()
-            t = _filter(name)
-            if t is True:
-                result = True
-                break
-            elif t is False:
-                result = False
-        logging.debug("pageFilter OR returns %r" % result)
-        return result
-
-    def search(self, page):
-        """ Search page with terms
-
-        :param page: the page instance
-        """
-        # XXX Do we have any reason to sort here? we are not breaking out
-        # of the search in any case.
-        #self.sortByCost()
-        matches = []
-        for term in self._subterms:
-            result = term.search(page)
-            if result:
-                matches.extend(result)
-        return matches
-
-    def xapian_term(self, request, connection):
-        from MoinMoin.search.Xapian import Query
-        # XXX: negated terms managed by _moinSearch?
-        return Query(Query.OP_OR, [term.xapian_term(request, connection) for term in self._subterms])
-
-
-class BaseTextFieldSearch(BaseExpression):
-
-    _field_to_search = None
-
-    def xapian_term(self, request, connection):
-        from MoinMoin.search.Xapian import Query, WikiAnalyzer
-
-        if self.use_re:
-            queries = [self._get_query_for_search_re(connection, self._field_to_search)]
-        else:
-            queries = []
-            stemmed = []
-            analyzer = WikiAnalyzer(language=app.cfg.language_default)
-
-            for term in self._pattern.split():
-                query_term = connection.query_field(self._field_to_search, term)
-                tokens = analyzer.tokenize(term)
-
-                if app.cfg.xapian_stemming:
-                    query_token = []
-                    for token, stemmed_ in tokens:
-                        if token != term.lower():
-                            if stemmed_:
-                                query_token.append(Query(Query.OP_OR,
-                                                         [connection.query_field(self._field_to_search, token),
-                                                          connection.query_field(self._field_to_search, stemmed_)]))
-#                                 stemmed.append('(%s|%s)' % (token, stemmed_))
-                            else:
-                                query_token.append(connection.query_field(self._field_to_search, token))
-#                                 stemmed.append(token)
-                    query_tokens = Query(Query.OP_AND, query_token)
-                else:
-                    query_tokens = Query(Query.OP_AND, [connection.query_field(self._field_to_search, token) for token, stemmed_ in tokens if token != term.lower()])
-
-                queries.append(Query(Query.OP_OR, [query_term, query_tokens]))
-
-            # XXX broken wrong regexp is built!
-            if not self.case and stemmed:
-                new_pat = ' '.join(stemmed)
-                self._pattern = new_pat
-                self.pattern, self.search_re = self._build_re(new_pat, use_re=False, case=self.case, stemmed=True)
-
-        return Query(Query.OP_AND, queries)
-
-
-class TextSearch(BaseTextFieldSearch):
-    """ A term that does a normal text search
-
-    Both page content and the page title are searched, using an
-    additional TitleSearch term.
-    """
-
-    costs = 10000
-    _field_to_search = 'content'
-
-    def highlight_re(self):
-        return u"(%s)" % self.pattern
-
-    def _get_matches(self, page):
-        matches = []
-
-        # Search in page name
-        results = TitleSearch(self._pattern, use_re=self.use_re, case=self.case)._get_matches(page)
-        if results:
-            matches.extend(results)
-
-        # Search in page body
-        body = page.get_raw_body()
-        for match in self.search_re.finditer(body):
-            matches.append(TextMatch(re_match=match))
-
-        return matches
-
-    def xapian_term(self, request, connection):
-        from MoinMoin.search.Xapian import Query
-        if self.use_re:
-            # if regex search is wanted, we need to match all documents, because
-            # we do not have full content stored and need post processing to do
-            # the regex searching.
-            return Query('') # MatchAll
-        else:
-            content_query = super(TextSearch, self).xapian_term(request, connection)
-            title_query = TitleSearch(self._pattern, use_re=self.use_re, case=self.case).xapian_term(request, connection)
-            return Query(OP_OR, [title_query, content_query])
-
-    def xapian_need_postproc(self):
-        # case-sensitive: xapian is case-insensitive, therefore we need postproc
-        # regex: xapian can't do regex search. also we don't have full content
-        #        stored (and we don't want to do that anyway), so regex search
-        #        needs postproc also.
-        return self.case or self.use_re
-
-
-class TitleSearch(BaseTextFieldSearch):
-    """ Term searches in pattern in page title only """
-
-    _tag = 'title:'
-    costs = 100
-    _field_to_search = 'title'
-
-    def pageFilter(self):
-        """ Page filter function for single title search """
-
-        def filter(name):
-            match = self.search_re.search(name)
-            result = bool(self.negated) ^ bool(match)
-            logging.debug("pageFilter title returns %r (%r)" % (result, self.pattern))
-            return result
-        return filter
-
-    def _get_matches(self, page):
-        """ Get matches in page name """
-        matches = []
-
-        for match in self.search_re.finditer(page.page_name):
-            matches.append(TitleMatch(re_match=match))
-
-        return matches
-
-
-class BaseFieldSearch(BaseExpression):
-
-    _field_to_search = None
-
-    def xapian_term(self, request, connection):
-        if self.use_re:
-            return self._get_query_for_search_re(connection, self._field_to_search)
-        else:
-            return connection.query_field(self._field_to_search, self._pattern)
-
-
-class LinkSearch(BaseFieldSearch):
-    """ Search the term in the pagelinks """
-
-    _tag = 'linkto:'
-    _field_to_search = 'linkto'
-    costs = 5000 # cheaper than a TextSearch
-
-    def __init__(self, pattern, use_re=False, case=True):
-        """ Init a link search
-
-        :param pattern: pattern to search for, ascii string or unicode
-        :param use_re: treat pattern as re of plain text, bool
-        :param case: do case sensitive search, bool
-        """
-
-        super(LinkSearch, self).__init__(pattern, use_re, case)
-
-        self._textpattern = '(' + pattern.replace('/', '|') + ')' # used for search in text
-        self.textsearch = TextSearch(self._textpattern, use_re=True, case=case)
-
-    def highlight_re(self):
-        return u"(%s)" % self._textpattern
-
-    def _get_matches(self, page):
-        # Get matches in page links
-        matches = []
-
-        # XXX in python 2.5 any() may be used.
-        found = False
-        for link in page.getPageLinks(page.request):
-            if self.search_re.match(link):
-                found = True
-                break
-
-        if found:
-            # Search in page text
-            results = self.textsearch.search(page)
-            if results:
-                matches.extend(results)
-            else: # This happens e.g. for pages that use navigation macros
-                matches.append(TextMatch(0, 0))
-
-        return matches
-
-
-class LanguageSearch(BaseFieldSearch):
-    """ Search the pages written in a language """
-
-    _tag = 'language:'
-    _field_to_search = 'lang'
-    costs = 5000 # cheaper than a TextSearch
-
-    def __init__(self, pattern, use_re=False, case=False):
-        """ Init a language search
-
-        :param pattern: pattern to search for, ascii string or unicode
-        :param use_re: treat pattern as re of plain text, bool
-        :param case: do case sensitive search, bool
-        """
-        # iso language code, always lowercase and not case-sensitive
-        super(LanguageSearch, self).__init__(pattern.lower(), use_re, case=False)
-
-    def _get_matches(self, page):
-
-        if self.pattern == page.pi['language']:
-            return [Match()]
-        else:
-            return []
-
-
-class MimetypeSearch(BaseFieldSearch):
-    """ Search for files belonging to a specific mimetype """
-
-    _tag = 'mimetype:'
-    _field_to_search = 'mimetype'
-    costs = 5000 # cheaper than a TextSearch
-
-    def __init__(self, pattern, use_re=False, case=False):
-        """ Init a mimetype search
-
-        :param pattern: pattern to search for, ascii string or unicode
-        :param use_re: treat pattern as re of plain text, bool
-        :param case: do case sensitive search, bool
-        """
-        # always lowercase and not case-sensitive
-        super(MimetypeSearch, self).__init__(pattern.lower(), use_re, case=False)
-
-    def _get_matches(self, page):
-
-        page_mimetype = u'text/%s' % page.pi['format']
-
-        if self.search_re.search(page_mimetype):
-            return [Match()]
-        else:
-            return []
-
-
-class DomainSearch(BaseFieldSearch):
-    """ Search for pages belonging to a specific domain """
-
-    _tag = 'domain:'
-    _field_to_search = 'domain'
-    costs = 5000 # cheaper than a TextSearch
-
-    def __init__(self, pattern, use_re=False, case=False):
-        """ Init a domain search
-
-        :param pattern: pattern to search for, ascii string or unicode
-        :param use_re: treat pattern as re of plain text, bool
-        :param case: do case sensitive search, bool
-        """
-        # always lowercase and not case-sensitive
-        super(DomainSearch, self).__init__(pattern.lower(), use_re, case=False)
-
-    def _get_matches(self, page):
-        checks = {'standard': page.isStandardPage,
-                  'system': lambda page=page: wikiutil.isSystemItem(page.page_name),
-                 }
-
-        try:
-            match = checks[self.pattern]()
-        except KeyError:
-            match = False
-
-        if match:
-            return [Match()]
-        else:
-            return []
-
--- a/MoinMoin/search/results.py	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,836 +0,0 @@
-# Copyright: 2005 MoinMoin:FlorianFesti
-# Copyright: 2005 MoinMoin:NirSoffer
-# Copyright: 2005 MoinMoin:AlexanderSchremmer
-# Copyright: 2006 MoinMoin:ThomasWaldmann
-# Copyright: 2006 MoinMoin:FranzPletz
-# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
-
-"""
-    MoinMoin - search results processing
-"""
-
-
-import StringIO, time
-
-from flask import current_app as app
-
-from MoinMoin import wikiutil
-from MoinMoin.i18n import _, L_, N_
-
-############################################################################
-### Results
-############################################################################
-
-
-class Match(object):
-    """ Base class for all Matches (found pieces of pages).
-
-    This class represents a empty True value as returned from negated searches.
-    """
-    # Default match weight
-    _weight = 1.0
-
-    def __init__(self, start=0, end=0, re_match=None):
-        self.re_match = re_match
-        if not re_match:
-            self._start = start
-            self._end = end
-        else:
-            self._start = self._end = 0
-
-    def __len__(self):
-        return self.end - self.start
-
-    def __eq__(self, other):
-        equal = (self.__class__ == other.__class__ and
-                 self.start == other.start and
-                 self.end == other.end)
-        return equal
-
-    def __ne__(self, other):
-        return not self.__eq__(other)
-
-    def view(self):
-        return ''
-
-    def weight(self):
-        return self._weight
-
-    def _get_start(self):
-        if self.re_match:
-            return self.re_match.start()
-        return self._start
-
-    def _get_end(self):
-        if self.re_match:
-            return self.re_match.end()
-        return self._end
-
-    # object properties
-    start = property(_get_start)
-    end = property(_get_end)
-
-
-class TextMatch(Match):
-    """ Represents a match in the page content """
-    pass
-
-
-class TitleMatch(Match):
-    """ Represents a match in the page title
-
-    Has more weight than a match in the page content.
-    """
-    # Matches in titles are much more important in wikis. This setting
-    # seems to make all pages that have matches in the title to appear
-    # before pages that their title does not match.
-    _weight = 100.0
-
-
-class AttachmentMatch(Match):
-    """ Represents a match in a attachment content
-
-    Not used yet.
-    """
-    pass
-
-
-class FoundPage(object):
-    """ Represents a page in a search result """
-
-    def __init__(self, page_name, matches=None, page=None, rev=0):
-        self.page_name = page_name
-        self.attachment = '' # this is not an attachment
-        self.page = page
-        self.rev = rev
-        if matches is None:
-            matches = []
-        self._matches = matches
-
-    def weight(self, unique=1):
-        """ returns how important this page is for the terms searched for
-
-        Summarize the weight of all page matches
-
-        :param unique: ignore identical matches
-        :rtype: int
-        :returns: page weight
-        """
-        weight = 0
-        for match in self.get_matches(unique=unique):
-            weight += match.weight()
-            # More sophisticated things to be added, like increase
-            # weight of near matches.
-        if self.page.parse_processing_instructions().get('deprecated', False):
-            weight = int(weight / 4) # rank it down
-        return weight
-
-    def add_matches(self, matches):
-        """ Add found matches """
-        self._matches.extend(matches)
-
-    def get_matches(self, unique=1, sort='start', type=Match):
-        """ Return all matches of type sorted by sort
-
-        :param unique: return only unique matches (bool)
-        :param sort: match attribute to sort by (string)
-        :param type: type of match to return (Match or sub class)
-        :rtype: list
-        :returns: list of matches
-        """
-        if unique:
-            matches = self._unique_matches(type=type)
-            if sort == 'start':
-                # matches already sorted by match.start, finished.
-                return matches
-        else:
-            matches = self._matches
-
-        # Filter by type and sort by sort using fast schwartzian transform.
-        if sort == 'start':
-            tmp = [(match.start, match) for match in matches if isinstance(match, type)]
-        else:
-            tmp = [(match.weight(), match) for match in matches if isinstance(match, type)]
-        tmp.sort()
-        if sort == 'weight':
-            tmp.reverse()
-        matches = [item[1] for item in tmp]
-
-        return matches
-
-    def _unique_matches(self, type=Match):
-        """ Get a list of unique matches of type
-
-        The result is sorted by match.start, because its easy to remove
-        duplicates like this.
-
-        :param type: type of match to return
-        :rtype: list
-        :returns: list of matches of type, sorted by match.start
-        """
-        # Filter by type and sort by match.start using fast schwartzian transform.
-        tmp = [(match.start, match) for match in self._matches if isinstance(match, type)]
-        tmp.sort()
-
-        if not len(tmp):
-            return []
-
-        # Get first match into matches list
-        matches = [tmp[0][1]]
-
-        # Add the remaining ones of matches ignoring identical matches
-        for item in tmp[1:]:
-            if item[1] == matches[-1]:
-                continue
-            matches.append(item[1])
-
-        return matches
-
-
-class FoundAttachment(FoundPage):
-    """ Represents an attachment in search results """
-
-    def __init__(self, page_name, attachment, matches=None, page=None, rev=0):
-        self.page_name = page_name
-        self.attachment = attachment
-        self.rev = rev
-        self.page = page
-        if matches is None:
-            matches = []
-        self._matches = matches
-
-    def weight(self, unique=1):
-        return 1
-
-
-class FoundRemote(FoundPage):
-    """ Represents a remote search result """
-
-    def __init__(self, wikiname, page_name, attachment, matches=None, page=None, rev=0):
-        self.wikiname = wikiname
-        self.page_name = page_name
-        self.rev = rev
-        self.attachment = attachment
-        self.page = page
-        if matches is None:
-            matches = []
-        self._matches = matches
-
-    def weight(self, unique=1):
-        return 1
-
-    def get_matches(self, unique=1, sort='start', type=Match):
-        return []
-
-    def _unique_matches(self, type=Match):
-        return []
-
-
-############################################################################
-### Search results formatting
-############################################################################
-
-
-class SearchResults(object):
-    """ Manage search results, supply different views
-
-    Search results can hold valid search results and format them for
-    many requests, until the wiki content changes.
-
-    For example, one might ask for full page list sorted from A to Z,
-    and then ask for the same list sorted from Z to A. Or sort results
-    by name and then by rank.
-    """
-    # Public functions --------------------------------------------------
-
-    def __init__(self, query, hits, pages, elapsed, sort, estimated_hits):
-        self.query = query # the query
-        self.hits = hits # hits list
-        self.pages = pages # number of pages in the wiki
-        self.elapsed = elapsed # search time
-        self.estimated_hits = estimated_hits # about how much hits?
-
-        if sort == 'weight':
-            self._sortByWeight()
-        elif sort == 'page_name':
-            self._sortByPagename()
-        self.sort = sort
-
-    def _sortByWeight(self):
-        """ Sorts found pages by the weight of the matches """
-        tmp = [(hit.weight(), hit.page_name, hit.attachment, hit) for hit in self.hits]
-        tmp.sort()
-        tmp.reverse()
-        self.hits = [item[3] for item in tmp]
-
-    def _sortByPagename(self):
-        """ Sorts a list of found pages alphabetical by page/attachment name """
-        tmp = [(hit.page_name, hit.attachment, hit) for hit in self.hits]
-        tmp.sort()
-        self.hits = [item[2] for item in tmp]
-
-    def stats(self, request, formatter, hitsFrom):
-        """ Return search statistics, formatted with formatter
-
-        :param request: current request
-        :param formatter: formatter to use
-        :param hitsFrom: current position in the hits
-        :rtype: unicode
-        :returns: formatted statistics
-        """
-        if not self.estimated_hits:
-            self.estimated_hits = ('', len(self.hits))
-
-        output = [
-            formatter.paragraph(1, attr={'class': 'searchstats'}),
-            _("Results %(bs)s%(hitsFrom)d - %(hitsTo)d%(be)s "
-                    "of %(aboutHits)s %(bs)s%(hits)d%(be)s results out of "
-                    "about %(items)d items.") %
-                {'aboutHits': self.estimated_hits[0],
-                    'hits': self.estimated_hits[1], 'items': self.pages,
-                    'hitsFrom': hitsFrom + 1,
-                    'hitsTo': hitsFrom +
-                            min(self.estimated_hits[1] - hitsFrom,
-                                app.cfg.search_results_per_page),
-                    'bs': formatter.strong(1), 'be': formatter.strong(0)},
-            u' (%s %s)' % (''.join([formatter.strong(1),
-                formatter.text("%.2f" % self.elapsed),
-                formatter.strong(0)]),
-                formatter.text(_("seconds"))),
-            formatter.paragraph(0),
-            ]
-        return ''.join(output)
-
-    def pageList(self, request, formatter, info=0, numbered=1,
-            paging=True, hitsFrom=0, hitsInfo=0):
-        """ Format a list of found pages
-
-        :param request: current request
-        :param formatter: formatter to use
-        :param info: show match info in title
-        :param numbered: use numbered list for display
-        :param paging: toggle paging
-        :param hitsFrom: current position in the hits
-        :param hitsInfo: toggle hits info line
-        :rtype: unicode
-        :returns: formatted page list
-        """
-        self._reset(request, formatter)
-        f = formatter
-        write = self.buffer.write
-        if numbered:
-            lst = lambda on: f.number_list(on, start=hitsFrom+1)
-        else:
-            lst = f.bullet_list
-
-        if paging and len(self.hits) <= app.cfg.search_results_per_page:
-            paging = False
-
-        # Add pages formatted as list
-        if self.hits:
-            write(lst(1))
-
-            if paging:
-                hitsTo = hitsFrom + app.cfg.search_results_per_page
-                displayHits = self.hits[hitsFrom:hitsTo]
-            else:
-                displayHits = self.hits
-
-            for page in displayHits:
-                if isinstance(page, FoundRemote):
-                    # TODO handle FoundRemote (interwiki) search hits
-                    continue
-                elif isinstance(page, FoundAttachment):
-                    querydict = {
-                        'action': 'AttachFile',
-                        'do': 'view',
-                        'target': page.attachment,
-                    }
-                elif isinstance(page, FoundPage):
-                    if page.rev and page.rev != page.page.getRevList()[0]:
-                        querydict = {
-                            'rev': page.rev,
-                        }
-                    else:
-                        querydict = None
-                querystr = self.querystring(querydict)
-
-                matchInfo = ''
-                if info:
-                    matchInfo = self.formatInfo(f, page)
-
-                info_for_hits = u''
-                if hitsInfo:
-                    info_for_hits = self.formatHitInfoBar(page)
-
-                item = [
-                    f.listitem(1),
-                    f.pagelink(1, page.page_name, querystr=querystr),
-                    self.formatTitle(page),
-                    f.pagelink(0, page.page_name),
-                    matchInfo,
-                    info_for_hits,
-                    f.listitem(0),
-                    ]
-                write(''.join(item))
-            write(lst(0))
-            if paging:
-                write(self.formatPageLinks(hitsFrom=hitsFrom,
-                    hitsPerPage=app.cfg.search_results_per_page,
-                    hitsNum=len(self.hits)))
-
-        return self.getvalue()
-
-    def pageListWithContext(self, request, formatter, info=1, context=180,
-                            maxlines=1, paging=True, hitsFrom=0, hitsInfo=0):
-        """ Format a list of found pages with context
-
-        :param request: current request
-        :param formatter: formatter to use
-        :param info: show match info near the page link
-        :param context: how many characters to show around each match.
-        :param maxlines: how many contexts lines to show.
-        :param paging: toggle paging
-        :param hitsFrom: current position in the hits
-        :param hitsInfo: toggle hits info line
-        :rtype: unicode
-        :returns: formatted page list with context
-        """
-        self._reset(request, formatter)
-        f = formatter
-        write = self.buffer.write
-
-        if paging and len(self.hits) <= app.cfg.search_results_per_page:
-            paging = False
-
-        # Add pages formatted as definition list
-        if self.hits:
-            write(f.definition_list(1))
-
-            if paging:
-                hitsTo = hitsFrom + app.cfg.search_results_per_page
-                displayHits = self.hits[hitsFrom:hitsTo]
-            else:
-                displayHits = self.hits
-
-            for page in displayHits:
-                # TODO handle interwiki search hits
-                matchInfo = ''
-                if info:
-                    matchInfo = self.formatInfo(f, page)
-                if page.attachment:
-                    fmt_context = ""
-                    querydict = {
-                        'action': 'AttachFile',
-                        'do': 'view',
-                        'target': page.attachment,
-                    }
-                elif page.page_name.startswith('FS/'): # XXX FS hardcoded
-                    fmt_context = ""
-                    querydict = None
-                else:
-                    fmt_context = self.formatContext(page, context, maxlines)
-                    if page.rev and page.rev != page.page.getRevList()[0]:
-                        querydict = {
-                            'rev': page.rev,
-                        }
-                    else:
-                        querydict = None
-                querystr = self.querystring(querydict)
-                item = [
-                    f.definition_term(1),
-                    f.pagelink(1, page.page_name, querystr=querystr),
-                    self.formatTitle(page),
-                    f.pagelink(0, page.page_name),
-                    matchInfo,
-                    f.definition_term(0),
-                    f.definition_desc(1),
-                    fmt_context,
-                    f.definition_desc(0),
-                    self.formatHitInfoBar(page),
-                    ]
-                write(''.join(item))
-            write(f.definition_list(0))
-            if paging:
-                write(self.formatPageLinks(hitsFrom=hitsFrom,
-                    hitsPerPage=app.cfg.search_results_per_page,
-                    hitsNum=len(self.hits)))
-
-        return self.getvalue()
-
-    # Private -----------------------------------------------------------
-
-    # This methods are not meant to be used by clients and may change
-    # without notice.
-
-    def formatContext(self, page, context, maxlines):
-        """ Format search context for each matched page
-
-        Try to show first maxlines interesting matches context.
-        """
-        f = self.formatter
-        if not page.page:
-            from MoinMoin.Page import Page
-            page.page = Page(self.request, page.page_name)
-        body = page.page.get_raw_body()
-        last = len(body) - 1
-        lineCount = 0
-        output = []
-
-        # Get unique text matches sorted by match.start, try to ignore
-        # matches in page header, and show the first maxlines matches.
-        # TODO: when we implement weight algorithm for text matches, we
-        # should get the list of text matches sorted by weight and show
-        # the first maxlines matches.
-        matches = page.get_matches(unique=1, sort='start', type=TextMatch)
-        i, start = self.firstInterestingMatch(page, matches)
-
-        # Format context
-        while i < len(matches) and lineCount < maxlines:
-            match = matches[i]
-
-            # Get context range for this match
-            start, end = self.contextRange(context, match, start, last)
-
-            # Format context lines for matches. Each complete match in
-            # the context will be highlighted, and if the full match is
-            # in the context, we increase the index, and will not show
-            # same match again on a separate line.
-
-            output.append(f.text(u'...'))
-
-            # Get the index of the first match completely within the
-            # context.
-            for j in xrange(0, len(matches)):
-                if matches[j].start >= start:
-                    break
-
-            # Add all matches in context and the text between them
-            while True:
-                match = matches[j]
-                # Ignore matches behind the current position
-                if start < match.end:
-                    # Append the text before match
-                    if start < match.start:
-                        output.append(f.text(body[start:match.start]))
-                    # And the match
-                    output.append(self.formatMatch(body, match, start))
-                    start = match.end
-                # Get next match, but only if its completely within the context
-                if j < len(matches) - 1 and matches[j + 1].end <= end:
-                    j += 1
-                else:
-                    break
-
-            # Add text after last match and finish the line
-            if match.end < end:
-                output.append(f.text(body[match.end:end]))
-            output.append(f.text(u'...'))
-            output.append(f.linebreak(preformatted=0))
-
-            # Increase line and point to the next match
-            lineCount += 1
-            i = j + 1
-
-        output = ''.join(output)
-
-        if not output:
-            # Return the first context characters from the page text
-            output = f.text(page.page.getPageText(length=context))
-            output = output.strip()
-            if not output:
-                # This is a page with no text, only header, for example,
-                # a redirect page.
-                output = f.text(page.page.getPageHeader(length=context))
-
-        return output
-
-    def firstInterestingMatch(self, page, matches):
-        """ Return the first interesting match
-
-        This function is needed only because we don't have yet a weight
-        algorithm for page text matches.
-
-        Try to find the first match in the page text. If we can't find
-        one, we return the first match and start=0.
-
-        :rtype: tuple
-        :returns: index of first match, start of text
-        """
-        header = page.page.getPageHeader()
-        start = len(header)
-        # Find first match after start
-        for i in xrange(len(matches)):
-            if matches[i].start >= start and \
-                    isinstance(matches[i], TextMatch):
-                return i, start
-        return 0, 0
-
-    def contextRange(self, context, match, start, last):
-        """ Compute context range
-
-        Add context around each match. If there is no room for context
-        before or after the match, show more context on the other side.
-
-        :param context: context length
-        :param match: current match
-        :param start: context should not start before that index, unless
-                      end is past the last character.
-        :param last: last character index
-        :rtype: tuple
-        :returns: start, end of context
-        """
-        # Start by giving equal context on both sides of match
-        contextlen = max(context - len(match), 0)
-        cstart = match.start - contextlen / 2
-        cend = match.end + contextlen / 2
-
-        # If context start before start, give more context on end
-        if cstart < start:
-            cend += start - cstart
-            cstart = start
-
-        # But if end if after last, give back context to start
-        if cend > last:
-            cstart -= cend - last
-            cend = last
-
-        # Keep context start positive for very short texts
-        cstart = max(cstart, 0)
-
-        return cstart, cend
-
-    def formatTitle(self, page):
-        """ Format page title
-
-        Invoke format match on all unique matches in page title.
-
-        :param page: found page
-        :rtype: unicode
-        :returns: formatted title
-        """
-        # Get unique title matches sorted by match.start
-        matches = page.get_matches(unique=1, sort='start', type=TitleMatch)
-
-        # Format
-        pagename = page.page_name
-        f = self.formatter
-        output = []
-        start = 0
-        for match in matches:
-            # Ignore matches behind the current position
-            if start < match.end:
-                # Append the text before the match
-                if start < match.start:
-                    output.append(f.text(pagename[start:match.start]))
-                # And the match
-                output.append(self.formatMatch(pagename, match, start))
-                start = match.end
-        # Add text after match
-        if start < len(pagename):
-            output.append(f.text(pagename[start:]))
-
-        if page.attachment: # show the attachment that matched
-            output.extend([
-                    " ",
-                    f.strong(1),
-                    f.text("(%s)" % page.attachment),
-                    f.strong(0)])
-
-        return ''.join(output)
-
-    def formatMatch(self, body, match, location):
-        """ Format single match in text
-
-        Format the part of the match after the current location in the
-        text. Matches behind location are ignored and an empty string is
-        returned.
-
-        :param body: text containing match
-        :param match: search match in text
-        :param location: current location in text
-        :rtype: unicode
-        :returns: formatted match or empty string
-        """
-        start = max(location, match.start)
-        if start < match.end:
-            f = self.formatter
-            output = [
-                f.strong(1),
-                f.text(body[start:match.end]),
-                f.strong(0),
-                ]
-            return ''.join(output)
-        return ''
-
-    def formatPageLinks(self, hitsFrom, hitsPerPage, hitsNum):
-        """ Format previous and next page links in page
-
-        :param hitsFrom: current position in the hits
-        :param hitsPerPage: number of hits per page
-        :param hitsNum: number of hits
-        :rtype: unicode
-        :returns: links to previous and next pages (if exist)
-        """
-        f = self.formatter
-        querydict = dict(wikiutil.parseQueryString(self.request.query_string))
-
-        def page_url(n):
-            querydict.update({'from': n * hitsPerPage})
-            return XXX.page.url(self.request, querydict, escape=0)
-
-        pages = hitsNum // hitsPerPage
-        remainder = hitsNum % hitsPerPage
-        if remainder:
-            pages += 1
-        cur_page = hitsFrom // hitsPerPage
-
-        textlinks = []
-
-        # previous page available
-        if cur_page > 0:
-            textlinks.append(''.join([
-                        f.url(1, href=page_url(cur_page-1)),
-                        f.text(_('Previous')),
-                        f.url(0)]))
-        else:
-            textlinks.append('')
-
-        # list of pages to be shown
-        page_range = range(*(
-            cur_page - 5 < 0 and
-                (0, pages > 10 and 10 or pages) or
-                (cur_page - 5, cur_page + 6 > pages and
-                    pages or cur_page + 6)))
-        textlinks.extend([''.join([
-                i != cur_page and f.url(1, href=page_url(i)) or '',
-                f.text(str(i+1)),
-                i != cur_page and f.url(0) or '',
-            ]) for i in page_range])
-
-        # next page available
-        if cur_page < pages - 1:
-            textlinks.append(''.join([
-                f.url(1, href=page_url(cur_page+1)),
-                f.text(_('Next')),
-                f.url(0)]))
-        else:
-            textlinks.append('')
-
-        return ''.join([
-            f.table(1, attrs={'tableclass': 'searchpages'}),
-            f.table_row(1),
-                f.table_cell(1),
-                # textlinks
-                (f.table_cell(0) + f.table_cell(1)).join(textlinks),
-                f.table_cell(0),
-            f.table_row(0),
-            f.table(0),
-        ])
-
-    def formatHitInfoBar(self, page):
-        """ Returns the code for the information below a search hit
-
-        :param page: the FoundPage instance
-        """
-        request = self.request
-        f = self.formatter
-        p = page.page
-
-        rev = p.get_real_rev()
-        if rev is None:
-            rev = 0
-
-        size_str = '%.1fk' % (p.size()/1024.0)
-        revisions = p.getRevList()
-        if len(revisions) and rev == revisions[0]:
-            rev_str = '%s: %d (%s)' % (_('rev'), rev, _('current'))
-        else:
-            rev_str = '%s: %d' % (_('rev'), rev, )
-        lastmod_str = _('last modified: %s') % p.mtime(printable=True)
-
-        result = f.paragraph(1, attr={'class': 'searchhitinfobar'}) + \
-                 f.text('%s - %s %s' % (size_str, rev_str, lastmod_str)) + \
-                 f.paragraph(0)
-        return result
-
-    def querystring(self, querydict=None):
-        """ Return query string, used in the page link
-
-        :keyword querydict: use these parameters (default: None)
-        """
-        if querydict is None:
-            querydict = {}
-        if 'action' not in querydict or querydict['action'] == 'AttachFile':
-            highlight = self.query.highlight_re()
-            if highlight:
-                querydict.update({'highlight': highlight})
-        querystr = wikiutil.makeQueryString(querydict)
-        return querystr
-
-    def formatInfo(self, formatter, page):
-        """ Return formatted match info
-
-        :param formatter: the formatter instance to use
-        :param page: the current page instance
-        """
-        template = u' . . . %s %s'
-        template = u"%s%s%s" % (formatter.span(1, css_class="info"),
-                                template,
-                                formatter.span(0))
-        # Count number of unique matches in text of all types
-        count = len(page.get_matches(unique=1))
-        info = template % (count, self.matchLabel[count != 1])
-        return info
-
-    def getvalue(self):
-        """ Return output in div with CSS class """
-        value = [
-            self.formatter.div(1, css_class='searchresults'),
-            self.buffer.getvalue(),
-            self.formatter.div(0),
-            ]
-        return '\n'.join(value)
-
-    def _reset(self, request, formatter):
-        """ Update internal state before new output
-
-        Do not call this, it should be called only by the instance code.
-
-        Each request might need different translations or other user preferences.
-
-        :param request: current request
-        :param formatter: the formatter instance to use
-        """
-        self.buffer = StringIO.StringIO()
-        self.formatter = formatter
-        self.request = request
-        # Use 1 match, 2 matches...
-        self.matchLabel = (_('match'), _('matches'))
-
-
-def getSearchResults(request, query, hits, start, sort, estimated_hits):
-    """ Return a SearchResults object with the specified properties
-
-    :param request: current request
-    :param query: the search query object tree
-    :param hits: list of hits
-    :param start: position to start showing the hits
-    :param sort: sorting of the results, either 'weight' or 'page_name'
-    :param estimated_hits: if true, use this estimated hit count
-    """
-    result_hits = []
-    for wikiname, page, attachment, match, rev in hits:
-        if wikiname in (app.cfg.interwikiname, 'Self'): # a local match
-            if attachment:
-                result_hits.append(FoundAttachment(page.page_name, attachment, matches=match, page=page, rev=rev))
-            else:
-                result_hits.append(FoundPage(page.page_name, matches=match, page=page, rev=rev))
-        else:
-            page_name = page # for remote wikis, we have the page_name, not the page obj
-            result_hits.append(FoundRemote(wikiname, page_name, attachment, matches=match, rev=rev))
-    elapsed = time.time() - start
-    count = 0 # XXX was: count of items in storage
-    return SearchResults(query, result_hits, count, elapsed, sort,
-            estimated_hits)
-
--- a/docs/examples/config/snippets/xapian_wikiconfig_snippet	Sun Mar 06 16:52:18 2011 +0100
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-    # This is a sample configuration snippet that shows how to configure
-    # Xapian indexing search. For further help see HelpOnXapian.
-
-    # You need Xapian and its Python bindings (xapian-core and xapian-bindings). 
-    # On Windows, you additionally need pywin32 (http://sourceforge.net/projects/pywin32/). 
-
-    # Setting this to True, enables Xapian indexed search.
-    #xapian_search = False
-
-    # IMPORTANT: After enabling (and also after changing any xapian related setting), you
-    # need to completely (re-)build the index.
-    # There are 2 ways:
-    # A. Unsafe 1-stage xapian index rebuilding:
-    #    moin --config-dir=/where/your/configdir/is --wiki-url=wiki-url/ index build --mode=rebuild
-    #    Use this if your wiki is not running at that time or if it is unlikely that
-    #    someone uses the search index or if you can live with a few failing searches when
-    #    it is switching from old to new index.
-    #    "rebuild" is the same as "buildnewindex" immediately followed by "usenewindex".
-    # B. Safe 2-stage xapian index rebuilding:
-    #    moin ... index build --mode=buildnewindex
-    #    <stop wiki>
-    #    moin ... index build --mode=usenewindex
-    #    <start wiki>
-    #    buildnewindex will build a SEPARATE new index while the wiki is running and it
-    #    will NOT interfere with wiki operations at all (except that it consumes some
-    #    server resources like cpu, disk, ram) - the wiki will NOT use the new index.
-    #    This phase can take some minutes up to many hours, depending on the size of your wiki.
-    #    usenewindex is the switching command that will switch from the current to the
-    #    new index. If you like to avoid trouble with a few failing searches (but rather want
-    #    the wiki offline for a short moment), you can stop the wiki, switch index, start
-    #    the wiki. usenewindex will just take some milliseconds.
-
-    # If set to True means that ALL page revisions will be indexed (not only
-    # the latest revision).
-    # Thus, they optionally will be searchable [see FindPage search options]).
-    #xapian_index_history = False
-
-    # If set to True, words will be indexed in their original AND their stemmed
-    # forms. This means that searching for "testing" will also find "tested",
-    # "tester", "testings", etc.
-    #xapian_stemming = False
-
-    # This option lets you specify a non-default directory for storing the index.
-    # If set to None (default), it gets stored into <data_dir>/cache/xapian/. 
-    #xapian_index_dir = None
-
--- a/setup.py	Sun Mar 06 16:52:18 2011 +0100
+++ b/setup.py	Sun Mar 06 17:32:39 2011 +0100
@@ -89,7 +89,6 @@
         'sqlalchemy>=0.6.0', # metadata index and other stuff
         #'Werkzeug>=0.7dev', # wsgi toolkit, dev version (do not use ==0.7dev!)
         'Werkzeug==0.6.2', # use this if 0.7dev fails
-        #'xappy>=0.5', # xapian python OO interface, not used (yet?)
         'py==1.3.4', # py.test 1.3.4 is needed by unit tests
         'sphinx', # needed to build the docs
     ],