changeset 825:2438f386293f

merge xapian branch
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sat, 10 Jun 2006 16:52:04 +0200
parents 9d74a2f53323 (current diff) 4562cd3a4a5f (diff)
children 8e880418d736
files MoinMoin/Page.py MoinMoin/PageEditor.py MoinMoin/lupy.py MoinMoin/multiconfig.py MoinMoin/script/lupy/__init__.py MoinMoin/script/lupy/build.py MoinMoin/script/lupy/optimize.py MoinMoin/support/lupy/__init__.py MoinMoin/support/lupy/document.py MoinMoin/support/lupy/index/__init__.py MoinMoin/support/lupy/index/documentwriter.py MoinMoin/support/lupy/index/field.py MoinMoin/support/lupy/index/indexwriter.py MoinMoin/support/lupy/index/segment.py MoinMoin/support/lupy/index/segmentmerger.py MoinMoin/support/lupy/index/term.py MoinMoin/support/lupy/index/terminfo.py MoinMoin/support/lupy/indexer.py MoinMoin/support/lupy/search/__init__.py MoinMoin/support/lupy/search/boolean.py MoinMoin/support/lupy/search/camelcase.py MoinMoin/support/lupy/search/fuzzy.py MoinMoin/support/lupy/search/hits.py MoinMoin/support/lupy/search/indexsearcher.py MoinMoin/support/lupy/search/phrase.py MoinMoin/support/lupy/search/prefix.py MoinMoin/support/lupy/search/regularexpression.py MoinMoin/support/lupy/search/similarity.py MoinMoin/support/lupy/search/term.py MoinMoin/support/lupy/store.py MoinMoin/support/lupy/util.py docs/CHANGES docs/Lupy-0.2.1/LICENSE docs/Lupy-0.2.1/README.txt docs/Lupy-0.2.1/releasenotes.txt setup.py
diffstat 53 files changed, 2488 insertions(+), 5371 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/Page.py	Sat Jun 10 16:45:05 2006 +0200
+++ b/MoinMoin/Page.py	Sat Jun 10 16:52:04 2006 +0200
@@ -1540,7 +1540,8 @@
             links = self.parsePageLinks(request)
             cache.update('\n'.join(links) + '\n', True)
             return links
-        return cache.content(True).split('\n')
+        links = cache.content(True).split('\n')
+        return [link for link in links if link]
 
     def parsePageLinks(self, request):
         """ Parse page links by formatting with a pagelinks formatter 
--- a/MoinMoin/PageEditor.py	Sat Jun 10 16:45:05 2006 +0200
+++ b/MoinMoin/PageEditor.py	Sat Jun 10 16:52:04 2006 +0200
@@ -967,9 +967,9 @@
             if self.request.cfg.mail_enabled:
                 msg = msg + self._notifySubscribers(comment, trivial)
           
-            if self.request.cfg.lupy_search:
-                from MoinMoin import lupy
-                index = lupy.Index(self.request)
+            if self.request.cfg.xapian_search:
+                from MoinMoin import Xapian
+                index = Xapian.Index(self.request)
                 # When we have automatic index building, we can add to
                 # the queue even if the index is missing.
                 if index.exists():
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/Xapian.py	Sat Jun 10 16:52:04 2006 +0200
@@ -0,0 +1,708 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - xapian indexing search engine
+
+    @copyright: 2006 MoinMoin:ThomasWaldmann,
+                2006 MoinMoin:FranzPletz
+    @license: GNU GPL, see COPYING for details.
+"""
+debug = True
+
+import sys, os, re, codecs, errno, time
+from pprint import pprint
+
+import xapian
+from MoinMoin.support.xapwrap import document as xapdoc
+from MoinMoin.support.xapwrap import index as xapidx
+from MoinMoin.parser.text_moin_wiki import Parser as WikiParser
+
+from MoinMoin.Page import Page
+from MoinMoin import config, wikiutil
+from MoinMoin.util import filesys, lock
+
+
+class UnicodeQuery(xapian.Query):
+    def __init__(self, *args, **kwargs):
+        self.encoding = kwargs.get('encoding', config.charset)
+
+        nargs = []
+        for i in args:
+            if isinstance(i, unicode):
+                i = i.encode(self.encoding)
+            nargs.append(i)
+
+        xapian.Query.__init__(self, *nargs, **kwargs)
+
+
+##############################################################################
+### Tokenizer
+##############################################################################
+
+class WikiAnalyzer:
+    singleword = r"[%(u)s][%(l)s]+" % {
+                     'u': config.chars_upper,
+                     'l': config.chars_lower,
+                 }
+
+    singleword_re = re.compile(singleword, re.U)
+    wikiword_re = re.compile(WikiParser.word_rule, re.U)
+
+    token_re = re.compile(
+        r"(?P<company>\w+[&@]\w+)|" + # company names like AT&T and Excite@Home.
+        r"(?P<email>\w+([.-]\w+)*@\w+([.-]\w+)*)|" +    # email addresses
+        r"(?P<hostname>\w+(\.\w+)+)|" +                 # hostnames
+        r"(?P<num>(\w+[-/.,])*\w*\d\w*([-/.,]\w+)*)|" + # version numbers
+        r"(?P<acronym>(\w\.)+)|" +          # acronyms: U.S.A., I.B.M., etc.
+        r"(?P<word>\w+)",                   # words (including WikiWords)
+        re.U)
+
+    dot_re = re.compile(r"[-_/,.]")
+    mail_re = re.compile(r"[-_/,.]|(@)")
+    
+    # XXX limit stuff above to xapdoc.MAX_KEY_LEN
+    # WORD_RE = re.compile('\\w{1,%i}' % MAX_KEY_LEN, re.U)
+
+    def tokenize(self, value):
+        """Yield a stream of lower cased words from a string.
+           value must be an UNICODE object or a list of unicode objects
+        """
+        def enc(uc):
+            """ 'encode' unicode results into whatever xapian / xapwrap wants """
+            lower = uc.lower()
+            return lower
+            
+        if isinstance(value, list): # used for page links
+            for v in value:
+                yield enc(v)
+        else:
+            tokenstream = re.finditer(self.token_re, value)
+            for m in tokenstream:
+                if m.group("acronym"):
+                    yield enc(m.group("acronym").replace('.', ''))
+                elif m.group("company"):
+                    yield enc(m.group("company"))
+                elif m.group("email"):
+                    for word in self.mail_re.split(m.group("email")):
+                        if word:
+                            yield enc(word)
+                elif m.group("hostname"):
+                    for word in self.dot_re.split(m.group("hostname")):
+                        yield enc(word)
+                elif m.group("num"):
+                    for word in self.dot_re.split(m.group("num")):
+                        yield enc(word)
+                elif m.group("word"):
+                    word = m.group("word")
+                    yield  enc(word)
+                    # if it is a CamelCaseWord, we additionally yield Camel, Case and Word
+                    if self.wikiword_re.match(word):
+                        for sm in re.finditer(self.singleword_re, word):
+                            yield enc(sm.group())
+
+
+#############################################################################
+### Indexing
+#############################################################################
+
+class UpdateQueue:
+    def __init__(self, file, lock_dir):
+        self.file = file
+        self.writeLock = lock.WriteLock(lock_dir, timeout=10.0)
+        self.readLock = lock.ReadLock(lock_dir, timeout=10.0)
+
+    def exists(self):
+        return os.path.exists(self.file)
+
+    def append(self, pagename):
+        """ Append a page to queue """
+        if not self.writeLock.acquire(60.0):
+            request.log("can't add %r to xapian update queue: can't lock queue" %
+                        pagename)
+            return
+        try:
+            f = codecs.open(self.file, 'a', config.charset)
+            try:
+                f.write(pagename + "\n")
+            finally:
+                f.close()
+        finally:
+            self.writeLock.release()
+
+    def pages(self):
+        """ Return list of pages in the queue """
+        if self.readLock.acquire(1.0):
+            try:
+                return self._decode(self._read())
+            finally:
+                self.readLock.release()
+        return []
+
+    def remove(self, pages):
+        """ Remove pages from the queue
+        
+        When the queue is empty, the queue file is removed, so exists()
+        can tell if there is something waiting in the queue.
+        """
+        if self.writeLock.acquire(30.0):
+            try:
+                queue = self._decode(self._read())
+                for page in pages:
+                    try:
+                        queue.remove(page)
+                    except ValueError:
+                        pass
+                if queue:
+                    self._write(queue)
+                else:
+                    self._removeFile()
+                return True
+            finally:
+                self.writeLock.release()
+        return False
+
+    # Private -------------------------------------------------------
+
+    def _decode(self, data):
+        """ Decode queue data """
+        pages = data.splitlines()
+        return self._filterDuplicates(pages)
+
+    def _filterDuplicates(self, pages):
+        """ Filter duplicates in page list, keeping the order """
+        unique = []
+        seen = {}
+        for name in pages:
+            if not name in seen:
+                unique.append(name)
+                seen[name] = 1
+        return unique
+
+    def _read(self):
+        """ Read and return queue data
+        
+        This does not do anything with the data so we can release the
+        lock as soon as possible, enabling others to update the queue.
+        """
+        try:
+            f = codecs.open(self.file, 'r', config.charset)
+            try:
+                return f.read()
+            finally:
+                f.close()
+        except (OSError, IOError), err:
+            if err.errno != errno.ENOENT:
+                raise
+            return ''
+
+    def _write(self, pages):
+        """ Write pages to queue file
+        
+        Requires queue write locking.
+        """
+        # XXX use tmpfile/move for atomic replace on real operating systems
+        data = '\n'.join(pages) + '\n'
+        f = codecs.open(self.file, 'w', config.charset)
+        try:
+            f.write(data)
+        finally:
+            f.close()
+
+    def _removeFile(self):
+        """ Remove queue file 
+        
+        Requires queue write locking.
+        """
+        try:
+            os.remove(self.file)
+        except OSError, err:
+            if err.errno != errno.ENOENT:
+                raise
+
+
+class Index:
+    indexValueMap = {
+        # mapping the value names we can easily fetch from the index to
+        # integers required by xapian. 0 and 1 are reserved by xapwrap!
+        'pagename': 2,
+        'attachment': 3,
+        'mtime': 4,
+        'wikiname': 5,
+    }
+    prefixMap = {
+        # http://svn.xapian.org/*checkout*/trunk/xapian-applications/omega/docs/termprefixes.txt
+        'author': 'A',
+        'date':   'D', # numeric format: YYYYMMDD or "latest" - e.g. D20050224 or Dlatest
+                       #G   newsGroup (or similar entity - e.g. a web forum name)
+        'hostname': 'H',
+        'keyword': 'K',
+        'lang': 'L',   # ISO Language code
+                       #M   Month (numeric format: YYYYMM)
+                       #N   ISO couNtry code (or domaiN name)
+                       #P   Pathname
+                       #Q   uniQue id
+                       #R   Raw (i.e. unstemmed) term
+        'title': 'S',  # Subject (or title)
+        'mimetype': 'T',
+        'url': 'U',    # full URL of indexed document - if the resulting term would be > 240
+                       # characters, a hashing scheme is used to prevent overflowing
+                       # the Xapian term length limit (see omindex for how to do this).
+                       #W   "weak" (approximately 10 day intervals, taken as YYYYMMD from
+                       #  the D term, and changing the last digit to a '2' if it's a '3')
+                       #X   longer prefix for user-defined use
+        'linkto': 'XLINKTO', # this document links to that document
+                       #Y   year (four digits)
+    }
+
+
+
+    class LockedException(Exception):
+        pass
+    
+    def __init__(self, request):
+        self.request = request
+        cache_dir = request.cfg.cache_dir
+        self.main_dir = os.path.join(cache_dir, 'xapian')
+        self.dir = os.path.join(self.main_dir, 'index')
+        filesys.makeDirs(self.dir)
+        self.sig_file = os.path.join(self.main_dir, 'complete')
+        lock_dir = os.path.join(self.main_dir, 'index-lock')
+        self.lock = lock.WriteLock(lock_dir,
+                                   timeout=3600.0, readlocktimeout=60.0)
+        self.read_lock = lock.ReadLock(lock_dir, timeout=3600.0)
+        self.queue = UpdateQueue(os.path.join(self.main_dir, "update-queue"),
+                                 os.path.join(self.main_dir, 'update-queue-lock'))
+        
+        # Disabled until we have a sane way to build the index with a
+        # queue in small steps.
+        ## if not self.exists():
+        ##    self.indexPagesInNewThread(request)
+
+    def exists(self):
+        """ Check if index exists """        
+        return os.path.exists(self.sig_file)
+                
+    def mtime(self):
+        return os.path.getmtime(self.dir)
+
+    def _search(self, query):
+        """ read lock must be acquired """
+        while True:
+            try:
+                searcher, timestamp = self.request.cfg.xapian_searchers.pop()
+                if timestamp != self.mtime():
+                    searcher.close()
+                else:
+                    break
+            except IndexError:
+                searcher = xapidx.ReadOnlyIndex(self.dir)
+                searcher.configure(self.prefixMap, self.indexValueMap)
+                timestamp = self.mtime()
+                break
+        
+        hits = searcher.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname'])
+        self.request.cfg.xapian_searchers.append((searcher, timestamp))
+        return hits
+    
+    def search(self, query):
+        if not self.read_lock.acquire(1.0):
+            raise self.LockedException
+        try:
+            hits = self._search(query)
+        finally:
+            self.read_lock.release()
+        return hits
+
+    def update_page(self, page):
+        self.queue.append(page.page_name)
+        self._do_queued_updates_InNewThread()
+
+    def indexPages(self, files=None, mode='update'):
+        """ Index all pages (and files, if given)
+        
+        Can be called only from a script. To index pages during a user
+        request, use indexPagesInNewThread.
+        @arg files: iterator or list of files to index additionally
+        """
+        if not self.lock.acquire(1.0):
+            self.request.log("can't index: can't acquire lock")
+            return
+        try:
+            request = self._indexingRequest(self.request)
+            self._index_pages(request, None, files, mode)
+        finally:
+            self.lock.release()
+    
+    def indexPagesInNewThread(self, files=None, mode='update'):
+        """ Index all pages in a new thread
+        
+        Should be called from a user request. From a script, use indexPages.
+        """
+        if not self.lock.acquire(1.0):
+            self.request.log("can't index: can't acquire lock")
+            return
+        try:
+            # Prevent rebuilding the index just after it was finished
+            if self.exists():
+                self.lock.release()
+                return
+            from threading import Thread
+            indexThread = Thread(target=self._index_pages,
+                args=(self._indexingRequest(self.request), self.lock, files, mode))
+            indexThread.setDaemon(True)
+            
+            # Join the index thread after current request finish, prevent
+            # Apache CGI from killing the process.
+            def joinDecorator(finish):
+                def func():
+                    finish()
+                    indexThread.join()
+                return func
+
+            self.request.finish = joinDecorator(self.request.finish)        
+            indexThread.start()
+        except:
+            self.lock.release()
+            raise
+
+    def optimize(self):
+        pass
+
+    # Private ----------------------------------------------------------------
+
+    def _do_queued_updates_InNewThread(self):
+        """ do queued index updates in a new thread
+        
+        Should be called from a user request. From a script, use indexPages.
+        """
+        if not self.lock.acquire(1.0):
+            self.request.log("can't index: can't acquire lock")
+            return
+        try:
+            from threading import Thread
+            indexThread = Thread(target=self._do_queued_updates,
+                args=(self._indexingRequest(self.request), self.lock))
+            indexThread.setDaemon(True)
+            
+            # Join the index thread after current request finish, prevent
+            # Apache CGI from killing the process.
+            def joinDecorator(finish):
+                def func():
+                    finish()
+                    indexThread.join()
+                return func
+                
+            self.request.finish = joinDecorator(self.request.finish)        
+            indexThread.start()
+        except:
+            self.lock.release()
+            raise
+
+    def _do_queued_updates(self, request, lock=None, amount=5):
+        """ Assumes that the write lock is acquired """
+        try:
+            writer = xapidx.Index(self.dir, True)
+            writer.configure(self.prefixMap, self.indexValueMap)
+            pages = self.queue.pages()[:amount]
+            for name in pages:
+                p = Page(request, name)
+                self._index_page(writer, p, mode='update')
+                self.queue.remove([name])
+        finally:
+            writer.close()
+            if lock:
+                lock.release()
+
+    def contentfilter(self, filename):
+        """ Get a filter for content of filename and return unicode content. """
+        request = self.request
+        mt = wikiutil.MimeType(filename=filename)
+        for modulename in mt.module_name():
+            try:
+                execute = wikiutil.importPlugin(request.cfg, 'filter', modulename)
+                break
+            except wikiutil.PluginMissingError:
+                pass
+            #else:
+            #    raise "Cannot load filter for mimetype." + modulename  # XXX
+        try:
+            data = execute(self, filename)
+            if debug:
+                request.log("Filter %s returned %d characters for file %s" % (modulename, len(data), filename))
+        except (OSError, IOError), err:
+            data = ''
+            request.log("Filter %s threw error '%s' for file %s" % (modulename, str(err), filename))
+        return mt.mime_type(), data
+   
+    def test(self, request):
+        idx = xapidx.ReadOnlyIndex(self.dir)
+        idx.configure(self.prefixMap, self.indexValueMap)
+        print idx.search("is")
+        #for d in docs:
+        #    request.log("%r %r %r" % (d, d.get('attachment'), d.get('pagename')))
+
+    def _index_file(self, request, writer, filename, mode='update'):
+        """ index a file as it were a page named pagename
+            Assumes that the write lock is acquired
+        """
+        fs_rootpage = 'FS' # XXX FS hardcoded
+        try:
+            wikiname = request.cfg.interwikiname or 'Self'
+            itemid = "%s:%s" % (wikiname, os.path.join(fs_rootpage, filename))
+            mtime = os.path.getmtime(filename)
+            mtime = wikiutil.timestamp2version(mtime)
+            if mode == 'update':
+                query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', itemid))
+                docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
+                if docs:
+                    doc = docs[0] # there should be only one
+                    uid = doc['uid']
+                    docmtime = long(doc['values']['mtime'])
+                    updated = mtime > docmtime
+                    if debug: request.log("uid %r: mtime %r > docmtime %r == updated %r" % (uid, mtime, docmtime, updated))
+                else:
+                    uid = None
+                    updated = True
+            elif mode == 'add':
+                updated = True
+            if debug: request.log("%s %r" % (filename, updated))
+            if updated:
+                xitemid = xapdoc.Keyword('itemid', itemid)
+                mimetype, file_content = self.contentfilter(filename)
+                xwname = xapdoc.SortKey('wikiname', request.cfg.interwikiname or "Self")
+                xpname = xapdoc.SortKey('pagename', fs_rootpage)
+                xattachment = xapdoc.SortKey('attachment', filename) # XXX we should treat files like real pages, not attachments
+                xmtime = xapdoc.SortKey('mtime', mtime)
+                title = " ".join(os.path.join(fs_rootpage, filename).split("/"))
+                xtitle = xapdoc.Keyword('title', title)
+                xmimetype = xapdoc.TextField('mimetype', mimetype, True)
+                xcontent = xapdoc.TextField('content', file_content)
+                doc = xapdoc.Document(textFields=(xcontent, xmimetype, ),
+                                      keywords=(xtitle, xitemid, ),
+                                      sortFields=(xpname, xattachment, xmtime, xwname, ),
+                                     )
+                doc.analyzerFactory = WikiAnalyzer
+                if mode == 'update':
+                    if debug: request.log("%s (replace %r)" % (filename, uid))
+                    doc.uid = uid
+                    id = writer.index(doc)
+                elif mode == 'add':
+                    if debug: request.log("%s (add)" % (filename,))
+                    id = writer.index(doc)
+        except (OSError, IOError), err:
+            pass
+
+    def _index_page(self, writer, page, mode='update'):
+        """ Index a page - assumes that the write lock is acquired
+            @arg writer: the index writer object
+            @arg page: a page object
+            @arg mode: 'add' = just add, no checks
+                       'update' = check if already in index and update if needed (mtime)
+            
+        """
+        request = page.request
+        wikiname = request.cfg.interwikiname or "Self"
+        pagename = page.page_name
+        mtime = page.mtime_usecs()
+        itemid = "%s:%s" % (wikiname, pagename)
+        updated = False
+
+        if mode == 'update':
+            # from #xapian: if you generate a special "unique id" term,
+            # you can just call database.replace_document(uid_term, doc)
+            # -> done in xapwrap.index.Index.index()
+            query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', itemid))
+            docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
+            if docs:
+                doc = docs[0] # there should be only one
+                uid = doc['uid']
+                docmtime = long(doc['values']['mtime'])
+                updated = mtime > docmtime
+                if debug: request.log("uid %r: mtime %r > docmtime %r == updated %r" % (uid, mtime, docmtime, updated))
+            else:
+                uid = None
+                updated = True
+        elif mode == 'add':
+            updated = True
+        if debug: request.log("%s %r" % (pagename, updated))
+        if updated:
+            xwname = xapdoc.SortKey('wikiname', request.cfg.interwikiname or "Self")
+            xpname = xapdoc.SortKey('pagename', pagename)
+            xattachment = xapdoc.SortKey('attachment', '') # this is a real page, not an attachment
+            xmtime = xapdoc.SortKey('mtime', mtime)
+            xtitle = xapdoc.TextField('title', pagename, True) # prefixed
+            xkeywords = [xapdoc.Keyword('itemid', itemid)]
+            for pagelink in page.getPageLinks(request):
+                xkeywords.append(xapdoc.Keyword('linkto', pagelink))
+            xcontent = xapdoc.TextField('content', page.get_raw_body())
+            doc = xapdoc.Document(textFields=(xcontent, xtitle),
+                                  keywords=xkeywords,
+                                  sortFields=(xpname, xattachment, xmtime, xwname, ),
+                                 )
+            doc.analyzerFactory = WikiAnalyzer
+            #search_db_language = "english"
+            #stemmer = xapian.Stem(search_db_language)
+            #pagetext = page.get_raw_body().lower()
+            #words = re.finditer(r"\w+", pagetext)
+            #count = 0
+            #for wordmatch in words:
+            #    count += 1
+            #    word = wordmatch.group().encode(config.charset)
+            #    document.add_posting('R' + stemmer.stem_word(word), count) # count should be term position in document (starting at 1)
+            
+            if mode == 'update':
+                if debug: request.log("%s (replace %r)" % (pagename, uid))
+                doc.uid = uid
+                id = writer.index(doc)
+            elif mode == 'add':
+                if debug: request.log("%s (add)" % (pagename,))
+                id = writer.index(doc)
+
+        from MoinMoin.action import AttachFile
+
+        attachments = AttachFile._get_files(request, pagename)
+        for att in attachments:
+            filename = AttachFile.getFilename(request, pagename, att)
+            att_itemid = "%s//%s" % (itemid, att)
+            mtime = wikiutil.timestamp2version(os.path.getmtime(filename))
+            if mode == 'update':
+                query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', att_itemid))
+                docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', ])
+                if debug: request.log("##%r %r" % (filename, docs))
+                if docs:
+                    doc = docs[0] # there should be only one
+                    uid = doc['uid']
+                    docmtime = long(doc['values']['mtime'])
+                    updated = mtime > docmtime
+                    if debug: request.log("uid %r: mtime %r > docmtime %r == updated %r" % (uid, mtime, docmtime, updated))
+                else:
+                    uid = None
+                    updated = True
+            elif mode == 'add':
+                updated = True
+            if debug: request.log("%s %s %r" % (pagename, att, updated))
+            if updated:
+                xatt_itemid = xapdoc.Keyword('itemid', att_itemid)
+                xpname = xapdoc.SortKey('pagename', pagename)
+                xattachment = xapdoc.SortKey('attachment', att) # this is an attachment, store its filename
+                xmtime = xapdoc.SortKey('mtime', mtime)
+                xtitle = xapdoc.Keyword('title', '%s/%s' % (pagename, att))
+                mimetype, att_content = self.contentfilter(filename)
+                xmimetype = xapdoc.TextField('mimetype', mimetype, True)
+                xcontent = xapdoc.TextField('content', att_content)
+                doc = xapdoc.Document(textFields=(xcontent, xmimetype, ),
+                                      keywords=(xatt_itemid, xtitle, ),
+                                      sortFields=(xpname, xattachment, xmtime, xwname, ),
+                                     )
+                doc.analyzerFactory = WikiAnalyzer
+                if mode == 'update':
+                    if debug: request.log("%s (replace %r)" % (pagename, uid))
+                    doc.uid = uid
+                    id = writer.index(doc)
+                elif mode == 'add':
+                    if debug: request.log("%s (add)" % (pagename,))
+                    id = writer.index(doc)
+        #writer.flush()
+        
+
+    def _index_pages(self, request, lock=None, files=None, mode='update'):
+        """ Index all pages (and all given files)
+        
+        This should be called from indexPages or indexPagesInNewThread only!
+        
+        This may take some time, depending on the size of the wiki and speed
+        of the machine.
+
+        When called in a new thread, lock is acquired before the call,
+        and this method must release it when it finishes or fails.
+        """
+        try:
+            self._unsign()
+            start = time.time()
+            writer = xapidx.Index(self.dir, True)
+            writer.configure(self.prefixMap, self.indexValueMap)
+            pages = request.rootpage.getPageList(user='', exists=1)
+            request.log("indexing all (%d) pages..." % len(pages))
+            for pagename in pages:
+                p = Page(request, pagename)
+                self._index_page(writer, p, mode)
+            if files:
+                request.log("indexing all files...")
+                for fname in files:
+                    fname = fname.strip()
+                    self._index_file(request, writer, fname, mode)
+            writer.close()
+            request.log("indexing completed successfully in %0.2f seconds." % 
+                        (time.time() - start))
+            self._sign()
+        finally:
+            writer.__del__()
+            if lock:
+                lock.release()
+
+    def _optimize(self, request):
+        """ Optimize the index """
+        pass
+
+    def _indexingRequest(self, request):
+        """ Return a new request that can be used for index building.
+        
+        This request uses a security policy that lets the current user
+        read any page. Without this policy some pages will not render,
+        which will create broken pagelinks index.        
+        """
+        from MoinMoin.request.CLI import Request
+        from MoinMoin.security import Permissions
+        request = Request(request.url)
+        class SecurityPolicy(Permissions):
+            def read(*args, **kw):
+                return True        
+        request.user.may = SecurityPolicy(request.user)
+        return request
+
+    def _unsign(self):
+        """ Remove sig file - assume write lock acquired """
+        try:
+            os.remove(self.sig_file)
+        except OSError, err:
+            if err.errno != errno.ENOENT:
+                raise
+
+    def _sign(self):
+        """ Add sig file - assume write lock acquired """
+        f = file(self.sig_file, 'w')
+        try:
+            f.write('')
+        finally:
+            f.close()
+
+
+def run_query(query, db):
+    enquire = xapian.Enquire(db)
+    parser = xapian.QueryParser()
+    query = parser.parse_query(query, xapian.QueryParser.FLAG_WILDCARD)
+    print query.get_description()
+    enquire.set_query(query)
+    return enquire.get_mset(0, 10)
+
+def run(request):
+    pass
+    #print "Begin"
+    #db = xapian.WritableDatabase(xapian.open('test.db',
+    #                                         xapian.DB_CREATE_OR_OPEN))
+    #
+    # index_data(db) ???
+    #del db
+    #mset = run_query(sys.argv[1], db)
+    #print mset.get_matches_estimated()
+    #iterator = mset.begin()
+    #while iterator != mset.end():
+    #    print iterator.get_document().get_data()
+    #    iterator.next()
+    #for i in xrange(1,170):
+    #    doc = db.get_document(i)
+    #    print doc.get_data()
+
+if __name__ == '__main__':
+    run()
+
+
--- a/MoinMoin/action/AttachFile.py	Sat Jun 10 16:45:05 2006 +0200
+++ b/MoinMoin/action/AttachFile.py	Sat Jun 10 16:52:04 2006 +0200
@@ -26,7 +26,7 @@
     @license: GNU GPL, see COPYING for details.
 """
 
-import os, mimetypes, time, zipfile
+import os, time, zipfile
 from MoinMoin import config, user, util, wikiutil, packages
 from MoinMoin.Page import Page
 from MoinMoin.util import filesys
@@ -163,15 +163,15 @@
     target = wikiutil.taintfilename(target)
 
     # set mimetype from extension, or from given mimetype
-    #type, encoding = mimetypes.guess_type(target)
+    #type, encoding = wikiutil.guess_type(target)
     #if not type:
     #    ext = None
     #    if request.form.has_key('mime'):
-    #        ext = mimetypes.guess_extension(request.form['mime'][0])
+    #        ext = wikiutil.guess_extension(request.form['mime'][0])
     #    if not ext:
-    #        type, encoding = mimetypes.guess_type(filename)
+    #        type, encoding = wikiutil.guess_type(filename)
     #        if type:
-    #            ext = mimetypes.guess_extension(type)
+    #            ext = wikiutil.guess_extension(type)
     #        else:
     #            ext = ''
     #    target = target + ext
@@ -193,7 +193,8 @@
         _addLogEntry(request, 'ATTNEW', pagename, target)
         
         return target
-    
+
+
 #############################################################################
 ### Internal helpers
 #############################################################################
@@ -645,16 +646,14 @@
     import shutil
 
     filename, fpath = _access_file(pagename, request)
-    if not filename: return # error msg already sent in _access_file
+    if not filename:
+        return # error msg already sent in _access_file
 
-    # get mimetype
-    type, enc = mimetypes.guess_type(filename)
-    if not type:
-        type = "application/octet-stream"
+    mt = wikiutil.MimeType(filename=filename)
 
     # send header
     request.http_headers([
-        "Content-Type: %s" % type,
+        "Content-Type: %s" % mt.content_type(),
         "Content-Length: %d" % os.path.getsize(fpath),
         # TODO: fix the encoding here, plain 8 bit is not allowed according to the RFCs
         # There is no solution that is compatible to IE except stripping non-ascii chars
@@ -778,24 +777,23 @@
 
     request.write('<h2>' + _("Attachment '%(filename)s'") % {'filename': filename} + '</h2>')
 
-    type, enc = mimetypes.guess_type(filename)
-    if type:
-        if type[:5] == 'image':
-            timestamp = htdocs_access(request) and "?%s" % time.time() or ''
-            request.write('<img src="%s%s" alt="%s">' % (
-                getAttachUrl(pagename, filename, request, escaped=1), timestamp, wikiutil.escape(filename, 1)))
-            return
-        elif type[:4] == 'text':
-            # TODO: should use formatter here!
-            request.write("<pre>")
-            # Try to decode file contents. It may return junk, but we
-            # don't have enough information on attachments.
-            content = open(fpath, 'r').read()
-            content = wikiutil.decodeUnknownInput(content)
-            content = wikiutil.escape(content)
-            request.write(content)
-            request.write("</pre>")
-            return
+    mt = wikiutil.MimeType(filename=filename)
+    if mt.major == 'image':
+        timestamp = htdocs_access(request) and "?%s" % time.time() or ''
+        request.write('<img src="%s%s" alt="%s">' % (
+            getAttachUrl(pagename, filename, request, escaped=1), timestamp, wikiutil.escape(filename, 1)))
+        return
+    elif mt.major == 'text':
+        # TODO: should use formatter here!
+        request.write("<pre>")
+        # Try to decode file contents. It may return junk, but we
+        # don't have enough information on attachments.
+        content = open(fpath, 'r').read()
+        content = wikiutil.decodeUnknownInput(content)
+        content = wikiutil.escape(content)
+        request.write(content)
+        request.write("</pre>")
+        return
 
     package = packages.ZipPackage(request, fpath)
     if package.isPackage():
--- a/MoinMoin/filter/application_msword.py	Sat Jun 10 16:45:05 2006 +0200
+++ b/MoinMoin/filter/application_msword.py	Sat Jun 10 16:52:04 2006 +0200
@@ -11,5 +11,5 @@
 from MoinMoin import filter
 
 def execute(indexobj, filename):
-    return filter.execfilter("antiword %s", filename)
+    return filter.execfilter("HOME=/tmp antiword %s", filename) # no HOME makes antiword complain
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/filter/application_vnd_oasis_opendocument.py	Sat Jun 10 16:52:04 2006 +0200
@@ -0,0 +1,25 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - OpenOffice.org 2.0 *.od? Filter (OpenDocument)
+
+    Depends on: nothing (only python with zlib)
+
+    @copyright: 2006 by ThomasWaldmann MoinMoin:ThomasWaldmann
+    @license: GNU GPL, see COPYING for details.
+"""
+
+import re, zipfile
+
+rx_stripxml = re.compile("<[^>]*?>", re.DOTALL|re.MULTILINE)
+
+def execute(indexobj, filename):
+    try:
+        zf = zipfile.ZipFile(filename, "r")
+        data = zf.read("content.xml")
+        zf.close()
+        data = " ".join(rx_stripxml.sub(" ", data).split())
+    except RuntimeError, err:
+        indexobj.request.log(str(err))
+        data = ""
+    return data.decode('utf-8')
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/filter/application_vnd_oasis_opendocument_presentation.py	Sat Jun 10 16:52:04 2006 +0200
@@ -0,0 +1,15 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - OpenOffice.org 2.x Presenter Filter (OpenDocument Presentation)
+
+    Depends on: nothing (only python with zlib)
+
+    @copyright: 2006 by ThomasWaldmann MoinMoin:ThomasWaldmann
+    @license: GNU GPL, see COPYING for details.
+"""
+
+from MoinMoin.filter.application_vnd_oasis_opendocument import execute as odfilter
+
+def execute(indexobj, filename):
+    return odfilter(indexobj, filename)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/filter/application_vnd_oasis_opendocument_spreadsheet.py	Sat Jun 10 16:52:04 2006 +0200
@@ -0,0 +1,15 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - OpenOffice.org 2.x Calc Filter (OpenDocument Spreadsheet)
+
+    Depends on: nothing (only python with zlib)
+
+    @copyright: 2006 by ThomasWaldmann MoinMoin:ThomasWaldmann
+    @license: GNU GPL, see COPYING for details.
+"""
+
+from MoinMoin.filter.application_vnd_oasis_opendocument import execute as odfilter
+
+def execute(indexobj, filename):
+    return odfilter(indexobj, filename)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/filter/application_vnd_oasis_opendocument_text.py	Sat Jun 10 16:52:04 2006 +0200
@@ -0,0 +1,15 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - OpenOffice.org 2.x Writer Filter (OpenDocument Text)
+
+    Depends on: nothing (only python with zlib)
+
+    @copyright: 2006 by ThomasWaldmann MoinMoin:ThomasWaldmann
+    @license: GNU GPL, see COPYING for details.
+"""
+
+from MoinMoin.filter.application_vnd_oasis_opendocument import execute as odfilter
+
+def execute(indexobj, filename):
+    return odfilter(indexobj, filename)
+
--- a/MoinMoin/filter/text.py	Sat Jun 10 16:45:05 2006 +0200
+++ b/MoinMoin/filter/text.py	Sat Jun 10 16:52:04 2006 +0200
@@ -11,7 +11,7 @@
 import codecs
 
 def execute(indexobj, filename):
-    for enc in ('utf-8', 'iso-8859-15', 'iso-8859-1', ):
+    for enc in ('utf-8', 'iso-8859-15', ):
         try:
             f = codecs.open(filename, "r", enc)
             data = f.read()
--- a/MoinMoin/lupy.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,588 +0,0 @@
-# -*- coding: iso-8859-1 -*-
-"""
-    MoinMoin - lupy indexing search engine
-
-    @copyright: 2005 by Florian Festi, Nir Soffer, Thomas Waldmann
-    @license: GNU GPL, see COPYING for details.
-"""
-
-import os, re, codecs, errno, time
-
-from MoinMoin.Page import Page
-from MoinMoin import config, wikiutil
-from MoinMoin.util import filesys, lock
-from MoinMoin.support.lupy.index.term import Term
-from MoinMoin.support.lupy import document
-from MoinMoin.support.lupy.index.indexwriter import IndexWriter
-from MoinMoin.support.lupy.search.indexsearcher import IndexSearcher
-
-from MoinMoin.support.lupy.index.term import Term
-from MoinMoin.support.lupy.search.term import TermQuery
-from MoinMoin.support.lupy.search.boolean import BooleanQuery
-
-##############################################################################
-### Tokenizer
-##############################################################################
-
-singleword = r"[%(u)s][%(l)s]+" % {
-                 'u': config.chars_upper,
-                 'l': config.chars_lower,
-             }
-
-singleword_re = re.compile(singleword, re.U)
-wikiword_re = re.compile(r"^(%s){2,}$" % singleword, re.U)
-
-token_re = re.compile(
-    r"(?P<company>\w+[&@]\w+)|" + # company names like AT&T and Excite@Home.
-    r"(?P<email>\w+([.-]\w+)*@\w+([.-]\w+)*)|" +    # email addresses
-    r"(?P<hostname>\w+(\.\w+)+)|" +                 # hostnames
-    r"(?P<num>(\w+[-/.,])*\w*\d\w*([-/.,]\w+)*)|" + # version numbers
-    r"(?P<acronym>(\w\.)+)|" +          # acronyms: U.S.A., I.B.M., etc.
-    r"(?P<word>\w+)",                   # words
-    re.U)
-
-dot_re = re.compile(r"[-_/,.]")
-mail_re = re.compile(r"[-_/,.]|(@)")
-
-def tokenizer(value):
-    """Yield a stream of lower cased words from a string."""
-    if isinstance(value, list): # used for page links
-        for v in value:
-            yield v
-    else:
-        tokenstream = re.finditer(token_re, value)
-        for m in tokenstream:
-            if m.group("acronym"):
-                yield m.group("acronym").replace('.', '').lower()
-            elif m.group("company"):
-                yield m.group("company").lower()
-            elif m.group("email"):
-                for word in mail_re.split(m.group("email").lower()):
-                    if word:
-                        yield word
-            elif m.group("hostname"):                
-                for word in dot_re.split(m.group("hostname").lower()):
-                    yield word
-            elif m.group("num"):
-                for word in dot_re.split(m.group("num").lower()):
-                    yield word
-            elif m.group("word"):
-                word = m.group("word")
-                yield  word.lower()
-                # if it is a CamelCaseWord, we additionally yield Camel, Case and Word
-                if wikiword_re.match(word):
-                    for sm in re.finditer(singleword_re, word):
-                        yield sm.group().lower()
-
-
-#############################################################################
-### Indexing
-#############################################################################
-
-class UpdateQueue:
-    def __init__(self, file, lock_dir):
-        self.file = file
-        self.writeLock = lock.WriteLock(lock_dir, timeout=10.0)
-        self.readLock = lock.ReadLock(lock_dir, timeout=10.0)
-
-    def exists(self):
-        return os.path.exists(self.file)
-
-    def append(self, pagename):
-        """ Append a page to queue """
-        if not self.writeLock.acquire(60.0):
-            request.log("can't add %r to lupy update queue: can't lock queue" %
-                        pagename)
-            return
-        try:
-            f = codecs.open(self.file, 'a', config.charset)
-            try:
-                f.write(pagename + "\n")
-            finally:
-                f.close()                
-        finally:
-            self.writeLock.release()
-
-    def pages(self):
-        """ Return list of pages in the queue """
-        if self.readLock.acquire(1.0):
-            try:
-                return self._decode(self._read())
-            finally:
-                self.readLock.release()            
-        return []
-
-    def remove(self, pages):
-        """ Remove pages from the queue
-        
-        When the queue is empty, the queue file is removed, so exists()
-        can tell if there is something waiting in the queue.
-        """
-        if self.writeLock.acquire(30.0):
-            try:
-                queue = self._decode(self._read())
-                for page in pages:
-                    try:
-                        queue.remove(page)
-                    except ValueError:
-                        pass
-                if queue:
-                    self._write(queue)
-                else:
-                    self._removeFile()
-                return True
-            finally:
-                self.writeLock.release()
-        return False
-
-    # Private -------------------------------------------------------
-
-    def _decode(self, data):
-        """ Decode queue data """
-        pages = data.splitlines()
-        return self._filterDuplicates(pages)
-
-    def _filterDuplicates(self, pages):
-        """ Filter duplicates in page list, keeping the order """
-        unique = []
-        seen = {}
-        for name in pages:
-            if not name in seen:
-                unique.append(name)
-                seen[name] = 1
-        return unique
-
-    def _read(self):
-        """ Read and return queue data
-        
-        This does not do anything with the data so we can release the
-        lock as soon as possible, enabling others to update the queue.
-        """
-        try:
-            f = codecs.open(self.file, 'r', config.charset)
-            try:
-                return f.read()
-            finally:
-                f.close()
-        except (OSError, IOError), err:
-            if err.errno != errno.ENOENT:
-                raise
-            return ''
-
-    def _write(self, pages):
-        """ Write pages to queue file
-        
-        Requires queue write locking.
-        """
-        # XXX use tmpfile/move for atomic replace on real operating systems
-        data = '\n'.join(pages) + '\n'
-        f = codecs.open(self.file, 'w', config.charset)
-        try:
-            f.write(data)
-        finally:
-            f.close()            
-
-    def _removeFile(self):
-        """ Remove queue file 
-        
-        Requires queue write locking.
-        """
-        try:
-            os.remove(self.file)
-        except OSError, err:
-            if err.errno != errno.ENOENT:
-                raise
-
-
-class Index:
-    class LockedException(Exception):
-        pass
-    
-    def __init__(self, request):
-        self.request = request
-        cache_dir = request.cfg.cache_dir
-        self.main_dir = os.path.join(cache_dir, 'lupy')
-        self.dir = os.path.join(self.main_dir, 'index')
-        filesys.makeDirs(self.dir)
-        self.sig_file = os.path.join(self.main_dir, 'complete')
-        self.segments_file = os.path.join(self.dir, 'segments')
-        lock_dir = os.path.join(self.main_dir, 'index-lock')
-        self.lock = lock.WriteLock(lock_dir,
-                                   timeout=3600.0, readlocktimeout=60.0)
-        self.read_lock = lock.ReadLock(lock_dir, timeout=3600.0)
-        self.queue = UpdateQueue(os.path.join(self.main_dir, "update-queue"),
-                                 os.path.join(self.main_dir, 'update-queue-lock'))
-        
-        # Disabled until we have a sane way to build the index with a
-        # queue in small steps.
-        ## if not self.exists():
-        ##    self.indexPagesInNewThread(request)
-
-    def exists(self):
-        """ Check if index exists """        
-        return os.path.exists(self.sig_file)
-                
-    def mtime(self):
-        return os.path.getmtime(self.segments_file)
-
-    def _search(self, query):
-        """ read lock must be acquired """
-        while True:
-            try:
-                searcher, timestamp = self.request.cfg.lupy_searchers.pop()
-                if timestamp != self.mtime():
-                    searcher.close()
-                else:
-                    break
-            except IndexError:
-                searcher = IndexSearcher(self.dir)
-                timestamp = self.mtime()
-                break
-            
-        hits = list(searcher.search(query))
-        self.request.cfg.lupy_searchers.append((searcher, timestamp))
-        return hits
-    
-    def search(self, query):
-        if not self.read_lock.acquire(1.0):
-            raise self.LockedException
-        try:
-            hits = self._search(query)
-        finally:
-            self.read_lock.release()
-        return hits
-
-    def update_page(self, page):
-        self.queue.append(page.page_name)
-        self._do_queued_updates_InNewThread()
-
-    def _do_queued_updates_InNewThread(self):
-        """ do queued index updates in a new thread
-        
-        Should be called from a user request. From a script, use indexPages.
-        """
-        if not self.lock.acquire(1.0):
-            self.request.log("can't index: can't acquire lock")
-            return
-        try:
-            from threading import Thread
-            indexThread = Thread(target=self._do_queued_updates,
-                args=(self._indexingRequest(self.request), self.lock))
-            indexThread.setDaemon(True)
-            
-            # Join the index thread after current request finish, prevent
-            # Apache CGI from killing the process.
-            def joinDecorator(finish):
-                def func():
-                    finish()
-                    indexThread.join()
-                return func
-                
-            self.request.finish = joinDecorator(self.request.finish)        
-            indexThread.start()
-        except:
-            self.lock.release()
-            raise
-
-    def indexPages(self, files=None, update=True):
-        """ Index all pages (and files, if given)
-        
-        Can be called only from a script. To index pages during a user
-        request, use indexPagesInNewThread.
-        @arg files: iterator or list of files to index additionally
-        @arg update: True = update an existing index, False = reindex everything
-        """
-        if not self.lock.acquire(1.0):
-            self.request.log("can't index: can't acquire lock")
-            return
-        try:
-            request = self._indexingRequest(self.request)
-            self._index_pages(request, None, files, update)
-        finally:
-            self.lock.release()
-    
-    def indexPagesInNewThread(self, files=None, update=True):
-        """ Index all pages in a new thread
-        
-        Should be called from a user request. From a script, use indexPages.
-        """
-        if not self.lock.acquire(1.0):
-            self.request.log("can't index: can't acquire lock")
-            return
-        try:
-            # Prevent rebuilding the index just after it was finished
-            if self.exists():
-                self.lock.release()
-                return
-            from threading import Thread
-            indexThread = Thread(target=self._index_pages,
-                args=(self._indexingRequest(self.request), self.lock, files, update))
-            indexThread.setDaemon(True)
-            
-            # Join the index thread after current request finish, prevent
-            # Apache CGI from killing the process.
-            def joinDecorator(finish):
-                def func():
-                    finish()
-                    indexThread.join()
-                return func
-                
-            self.request.finish = joinDecorator(self.request.finish)        
-            indexThread.start()
-        except:
-            self.lock.release()
-            raise
-
-    def optimize(self):
-        """ Optimize the index
-        
-        This may take from few seconds to few hours, depending on the
-        size of the wiki. Currently it's usable only from a script.
-        
-        TODO: needs special locking, so the index is readable until the
-        optimization is finished.
-        """
-        if not self.exists():
-            raise RuntimeError("Index does not exist or is not finished")
-        if not self.lock.acquire(1.0):
-            self.request.log("can't lock the index for optimization")
-            return
-        try:
-            self._optimize(self.request)
-        finally:
-            self.lock.release()
-
-    # -------------------------------------------------------------------
-    # Private
-
-    def _do_queued_updates(self, request, lock=None, amount=5):
-        """ Assumes that the write lock is acquired """
-        try:
-            pages = self.queue.pages()[:amount]
-            for name in pages:
-                p = Page(request, name)
-                self._update_page(p)
-                self.queue.remove([name])
-        finally:
-            if lock:
-                lock.release()
-
-    def _update_page(self, page):
-        """ Assumes that the write lock is acquired """
-        reader = IndexSearcher(self.dir)
-        reader.reader.deleteTerm(Term('pagename', page.page_name))
-        reader.close()
-        if page.exists():
-            writer = IndexWriter(self.dir, False, tokenizer)
-            self._index_page(writer, page, False) # we don't need to check whether it is updated
-            writer.close()
-   
-    def contentfilter(self, filename):
-        """ Get a filter for content of filename and return unicode content. """
-        import mimetypes
-        from MoinMoin import wikiutil
-        request = self.request
-        mimetype, encoding = mimetypes.guess_type(filename)
-        if mimetype is None:
-            mimetype = 'application/octet-stream'
-        def mt2mn(mt): # mimetype to modulename
-            return mt.replace("/", "_").replace("-","_").replace(".", "_")
-        try:
-            _filter = mt2mn(mimetype)
-            execute = wikiutil.importPlugin(request.cfg, 'filter', _filter)
-        except wikiutil.PluginMissingError:
-            try:
-                _filter = mt2mn(mimetype.split("/", 1)[0])
-                execute = wikiutil.importPlugin(request.cfg, 'filter', _filter)
-            except wikiutil.PluginMissingError:
-                try:
-                    _filter = mt2mn('application/octet-stream')
-                    execute = wikiutil.importPlugin(request.cfg, 'filter', _filter)
-                except wikiutil.PluginMissingError:
-                    raise ImportError("Cannot load filter %s" % binaryfilter)
-        try:
-            data = execute(self, filename)
-            request.log("Filter %s returned %d characters for file %s" % (_filter, len(data), filename))
-        except (OSError, IOError), err:
-            data = ''
-            request.log("Filter %s threw error '%s' for file %s" % (_filter, str(err), filename))
-        return data
-   
-    def test(self, request):
-        query = BooleanQuery()
-        query.add(TermQuery(Term("text", 'suchmich')), True, False)
-        docs = self._search(query)
-        for d in docs:
-            request.log("%r %r %r" % (d, d.get('attachment'), d.get('pagename')))
-
-    def _index_file(self, request, writer, filename, update):
-        """ index a file as it were a page named pagename
-            Assumes that the write lock is acquired
-        """
-        fs_rootpage = 'FS' # XXX FS hardcoded
-        try:
-            mtime = os.path.getmtime(filename)
-            mtime = wikiutil.timestamp2version(mtime)
-            if update:
-                query = BooleanQuery()
-                query.add(TermQuery(Term("pagename", fs_rootpage)), True, False)
-                query.add(TermQuery(Term("attachment", filename)), True, False)
-                docs = self._search(query)
-                updated = len(docs) == 0 or mtime > int(docs[0].get('mtime'))
-            else:
-                updated = True
-            request.log("%s %r" % (filename, updated))
-            if updated:
-                file_content = self.contentfilter(filename)
-                d = document.Document()
-                d.add(document.Keyword('pagename', fs_rootpage))
-                d.add(document.Keyword('mtime', str(mtime)))
-                d.add(document.Keyword('attachment', filename)) # XXX we should treat files like real pages, not attachments
-                pagename = " ".join(os.path.join(fs_rootpage, filename).split("/"))
-                d.add(document.Text('title', pagename, store=False))        
-                d.add(document.Text('text', file_content, store=False))
-                writer.addDocument(d)
-        except (OSError, IOError), err:
-            pass
-
-    def _index_page(self, writer, page, update):
-        """ Index a page - assumes that the write lock is acquired
-            @arg writer: the index writer object
-            @arg page: a page object
-            @arg update: False = index in any case, True = index only when changed
-        """
-        pagename = page.page_name
-        request = page.request
-        mtime = page.mtime_usecs()
-        if update:
-            query = BooleanQuery()
-            query.add(TermQuery(Term("pagename", pagename)), True, False)
-            query.add(TermQuery(Term("attachment", "")), True, False)
-            docs = self._search(query)
-            updated = len(docs) == 0 or mtime > int(docs[0].get('mtime'))
-        else:
-            updated = True
-        request.log("%s %r" % (pagename, updated))
-        if updated:
-            d = document.Document()
-            d.add(document.Keyword('pagename', pagename))
-            d.add(document.Keyword('mtime', str(mtime)))
-            d.add(document.Keyword('attachment', '')) # this is a real page, not an attachment
-            d.add(document.Text('title', pagename, store=False))        
-            d.add(document.Text('text', page.get_raw_body(), store=False))
-            
-            links = page.getPageLinks(request)
-            t = document.Text('links', '', store=False)
-            t.stringVal = links
-            d.add(t)
-            d.add(document.Text('link_text', ' '.join(links), store=False))
-
-            writer.addDocument(d)
-        
-        from MoinMoin.action import AttachFile
-
-        attachments = AttachFile._get_files(request, pagename)
-        for att in attachments:
-            filename = AttachFile.getFilename(request, pagename, att)
-            mtime = wikiutil.timestamp2version(os.path.getmtime(filename))
-            if update:
-                query = BooleanQuery()
-                query.add(TermQuery(Term("pagename", pagename)), True, False)
-                query.add(TermQuery(Term("attachment", att)), True, False)
-                docs = self._search(query)
-                updated = len(docs) == 0 or mtime > int(docs[0].get('mtime'))
-            else:
-                updated = True
-            request.log("%s %s %r" % (pagename, att, updated))
-            if updated:
-                att_content = self.contentfilter(filename)
-                d = document.Document()
-                d.add(document.Keyword('pagename', pagename))
-                d.add(document.Keyword('mtime', str(mtime)))
-                d.add(document.Keyword('attachment', att)) # this is an attachment, store its filename
-                d.add(document.Text('title', att, store=False)) # the filename is the "title" of an attachment
-                d.add(document.Text('text', att_content, store=False))
-                writer.addDocument(d)
-
-
-    def _index_pages(self, request, lock=None, files=None, update=True):
-        """ Index all pages (and all given files)
-        
-        This should be called from indexPages or indexPagesInNewThread only!
-        
-        This may take few minutes up to few hours, depending on the size of
-        the wiki.
-
-        When called in a new thread, lock is acquired before the call,
-        and this method must release it when it finishes or fails.
-        """
-        try:
-            self._unsign()
-            start = time.time()
-            writer = IndexWriter(self.dir, not update, tokenizer)
-            writer.mergeFactor = 50
-            pages = request.rootpage.getPageList(user='', exists=1)
-            request.log("indexing all (%d) pages..." % len(pages))
-            for pagename in pages:
-                p = Page(request, pagename)
-                # code does NOT seem to assume request.page being set any more
-                #request.page = p
-                self._index_page(writer, p, update)
-            if files:
-                request.log("indexing all files...")
-                for fname in files:
-                    fname = fname.strip()
-                    self._index_file(request, writer, fname, update)
-            writer.close()
-            request.log("indexing completed successfully in %0.2f seconds." % 
-                        (time.time() - start))
-            self._optimize(request)
-            self._sign()
-        finally:
-            if lock:
-                lock.release()
-
-    def _optimize(self, request):
-        """ Optimize the index """
-        self._unsign()
-        start = time.time()
-        request.log("optimizing index...")
-        writer = IndexWriter(self.dir, False, tokenizer)
-        writer.optimize()
-        writer.close()
-        request.log("optimizing completed successfully in %0.2f seconds." % 
-                    (time.time() - start))
-        self._sign()
-
-    def _indexingRequest(self, request):
-        """ Return a new request that can be used for index building.
-        
-        This request uses a security policy that lets the current user
-        read any page. Without this policy some pages will not render,
-        which will create broken pagelinks index.        
-        """
-        from MoinMoin.request import RequestCLI
-        from MoinMoin.security import Permissions        
-        request = RequestCLI(request.url)
-        class SecurityPolicy(Permissions):            
-            def read(*args, **kw):
-                return True        
-        request.user.may = SecurityPolicy(request.user)
-        return request
-
-    def _unsign(self):
-        """ Remove sig file - assume write lock acquired """
-        try:
-            os.remove(self.sig_file)
-        except OSError, err:
-            if err.errno != errno.ENOENT:
-                raise
-
-    def _sign(self):
-        """ Add sig file - assume write lock acquired """
-        f = file(self.sig_file, 'w')
-        try:
-            f.write('')
-        finally:
-            f.close()
-
--- a/MoinMoin/macro/SystemInfo.py	Sat Jun 10 16:45:05 2006 +0200
+++ b/MoinMoin/macro/SystemInfo.py	Sat Jun 10 16:52:04 2006 +0200
@@ -112,7 +112,7 @@
         ', '.join(wikiutil.wikiPlugins('parser', Macro.cfg)) or nonestr)
     
     state = (_('Disabled'), _('Enabled'))
-    row(_('Lupy search'), state[request.cfg.lupy_search])
+    row(_('Xapian search'), state[request.cfg.xapian_search])
     
     row(_('Active threads'), t_count or 'N/A')
     buf.write(u'</dl>')
--- a/MoinMoin/multiconfig.py	Sat Jun 10 16:45:05 2006 +0200
+++ b/MoinMoin/multiconfig.py	Sat Jun 10 16:52:04 2006 +0200
@@ -272,7 +272,7 @@
     language_ignore_browser = False # ignore browser settings, use language_default
                                     # or user prefs
 
-    lupy_search = False # disabled until lupy is finished
+    xapian_search = False # disabled until xapian is finished
 
     mail_login = None # or "user pwd" if you need to use SMTP AUTH
     mail_sendmail = None # "/usr/sbin/sendmail -t -i" to not use SMTP, but sendmail
@@ -520,8 +520,8 @@
         self.navi_bar = [elem % self for elem in self.navi_bar]
         self.backup_exclude = [elem % self for elem in self.backup_exclude]
 
-        # list to cache lupy searcher objects
-        self.lupy_searchers = []
+        # list to cache xapian searcher objects
+        self.xapian_searchers = []
 
         # check if mail is possible and set flag:
         self.mail_enabled = (self.mail_smarthost is not None or self.mail_sendmail is not None) and self.mail_from
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/script/index/__init__.py	Sat Jun 10 16:52:04 2006 +0200
@@ -0,0 +1,17 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - Fullsearch Index Script Package
+
+    TODO: rename this module back to xapian when script framework is
+    fixed to not confuse it with the xapian.org "xapian" module.
+
+    @copyright: 2006 by Thomas Waldmann
+    @license: GNU GPL, see COPYING for details.
+"""
+
+from MoinMoin.util import pysupport
+
+# create a list of extension scripts from the subpackage directory
+index_scripts = pysupport.getPackageModules(__file__)
+modules = index_scripts
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/script/index/build.py	Sat Jun 10 16:52:04 2006 +0200
@@ -0,0 +1,44 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - build xapian search engine's index
+
+    You must run this script as owner of the wiki files, usually this is the
+    web server user.
+
+    @copyright: 2006 by MoinMoin:ThomasWaldmann
+    @license: GNU GPL, see COPYING for details.
+"""
+
+from MoinMoin.script import MoinScript
+
+class IndexScript(MoinScript):
+    """ Xapian general index script class """
+
+    def __init__(self, argv, def_values):
+        MoinScript.__init__(self, argv, def_values)
+        self.parser.add_option(
+            "--files", metavar="FILES", dest="file_list",
+            help="filename of file list, e.g. files.lst (one file per line)"
+        )
+        self.parser.add_option(
+            "--mode", metavar="MODE", dest="mode",
+            help="either add (unconditionally add to index) or update (update an existing index)"
+        )
+    
+    def mainloop(self):
+        self.init_request()
+        # Do we have additional files to index?
+        if self.options.file_list:
+            self.files = file(self.options.file_list)
+        else:
+            self.files = None
+        self.command()
+
+class PluginScript(IndexScript):
+    """ Xapian index build script class """
+
+    def command(self):
+        from MoinMoin.Xapian import Index
+        Index(self.request).indexPages(self.files, self.options.mode)
+        #Index(self.request).test(self.request)
+
--- a/MoinMoin/script/lupy/__init__.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-# -*- coding: iso-8859-1 -*-
-"""
-    MoinMoin - Fullsearch Index Script Package
-
-    @copyright: 2006 by Thomas Waldmann
-    @license: GNU GPL, see COPYING for details.
-"""
-
-from MoinMoin.util import pysupport
-
-# create a list of extension scripts from the subpackage directory
-index_scripts = pysupport.getPackageModules(__file__)
-modules = index_scripts
-
--- a/MoinMoin/script/lupy/build.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-# -*- coding: iso-8859-1 -*-
-"""
-    MoinMoin - build lupy search engine's index
-
-    You must run this script as owner of the wiki files, usually this is the
-    web server user.
-
-    @copyright: 2005 by Florian Festi, Nir Soffer
-    @license: GNU GPL, see COPYING for details.
-"""
-
-import os
-
-from MoinMoin.script import MoinScript
-from MoinMoin.request import RequestCLI
-from MoinMoin.lupy import Index
-
-
-class IndexScript(MoinScript):
-    """ Lupy general index script class """
-
-    def __init__(self, argv, def_values):
-        MoinScript.__init__(self, argv, def_values)
-        self.parser.add_option(
-            "--files", metavar="FILES", dest="file_list",
-            help="filename of file list, e.g. files.lst (one file per line)"
-        )
-        self.parser.add_option(
-            "--update", action="store_true", dest="update",
-            help="when given, update an existing index"
-        )
-    
-    def mainloop(self):
-        self.init_request()
-        # Do we have additional files to index?
-        if self.options.file_list:
-            self.files = file(self.options.file_list)
-        else:
-            self.files = None
-        self.command()
-
-class PluginScript(IndexScript):
-    """ Lupy index build script class """
-
-    def command(self):
-        Index(self.request).indexPages(self.files, self.options.update)
-        #Index(self.request).test(self.request)
-
-        
--- a/MoinMoin/script/lupy/optimize.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
-# -*- coding: iso-8859-1 -*-
-"""
-    MoinMoin - optimize lupy search engine's index
-
-    You must run this script as owner of the wiki files, usually this is the
-    web server user.
-
-    @copyright: 2005 by Florian Festi, Nir Soffer,
-                2006 by Thomas Waldmann
-    @license: GNU GPL, see COPYING for details.
-"""
-doit = 0
-
-from MoinMoin.script.lupy.build import IndexScript
-from MoinMoin.lupy import Index
-
-class PluginScript(IndexScript):
-    def command(self):
-        if doit:
-            Index(self.request).optimize()
-        else:
-            print "See http://moinmoin.wikiwikiweb.de/MoinMoinBugs/LupyOptimizeBreaksIndex !"
-
--- a/MoinMoin/search.py	Sat Jun 10 16:45:05 2006 +0200
+++ b/MoinMoin/search.py	Sat Jun 10 16:52:04 2006 +0200
@@ -2,25 +2,21 @@
 """
     MoinMoin - search engine
     
-    @copyright: 2005 MoinMoin:FlorianFesti
-    @copyright: 2005 MoinMoin:NirSoffer
-    @copyright: 2005 MoinMoin:AlexanderSchremmer
+    @copyright: 2005 MoinMoin:FlorianFesti,
+                2005 MoinMoin:NirSoffer,
+                2005 MoinMoin:AlexanderSchremmer,
+                2006 MoinMoin:ThomasWaldmann,
+                2006 MoinMoin:FranzPletz
     @license: GNU GPL, see COPYING for details
 """
 
-import re, time, sys, StringIO
+import re, time, sys, StringIO, string
 from MoinMoin import wikiutil, config
 from MoinMoin.Page import Page
 
-from MoinMoin.support.lupy.search.term import TermQuery
-from MoinMoin.support.lupy.search.phrase import PhraseQuery
-from MoinMoin.support.lupy.search.boolean import BooleanQuery, BooleanScorer
-from MoinMoin.support.lupy.search.prefix import PrefixQuery
-from MoinMoin.support.lupy.search.camelcase import CamelCaseQuery
-from MoinMoin.support.lupy.search.regularexpression import RegularExpressionQuery
-from MoinMoin.support.lupy.index.term import Term
-
-from MoinMoin.lupy import Index, tokenizer
+import Xapian
+from xapian import Query
+from Xapian import UnicodeQuery
 
 #############################################################################
 ### query objects
@@ -152,7 +148,7 @@
     def sortByCost(self):
         tmp = [(term.costs(), term) for term in self._subterms]
         tmp.sort()
-        self._subterms = [item[1] for item in tmp]       
+        self._subterms = [item[1] for item in tmp]
 
     def search(self, page):
         """ Search for each term, cheap searches first """
@@ -173,12 +169,40 @@
             
         return '|'.join(result)
 
-    def lupy_term(self):
-        required = self.operator== " "
-        lupy_term = BooleanQuery()
+    def xapian_wanted(self):
+        wanted = True
         for term in self._subterms:
-            lupy_term.add(term.lupy_term(), required, term.negated)
-        return lupy_term
+            wanted = wanted and term.xapian_wanted()
+        return wanted
+
+    def xapian_term(self):
+        # sort negated terms
+        terms = []
+        not_terms = []
+        for term in self._subterms:
+            if not term.negated:
+                terms.append(term.xapian_term())
+            else:
+                not_terms.append(term.xapian_term())
+
+        # prepare query for not negated terms
+        if len(terms) == 1:
+            t1 = Query(terms[0])
+        else:
+            t1 = Query(Query.OP_AND, terms)
+
+        # negated terms?
+        if not not_terms:
+            # no, just return query for not negated terms
+            return t1
+        
+        # yes, link not negated and negated terms' query with a AND_NOT query
+        if len(not_terms) == 1:
+            t2 = Query(not_terms[0])
+        else:
+            t2 = Query(Query.OP_OR, not_terms)
+
+        return Query(Query.OP_AND_NOT, t1, t2)
 
 
 class OrExpression(AndExpression):
@@ -200,6 +224,10 @@
                 matches.extend(result)
         return matches
 
+    def xapian_term(self):
+        # XXX: negated terms managed by _moinSearch?
+        return Query(Query.OP_OR, [term.xapian_term() for term in self._subterms])
+
 
 class TextSearch(BaseExpression):
     """ A term that does a normal text search
@@ -255,34 +283,29 @@
             # XXX why not return None or empty list?
             return [Match()]
 
-    def lupy_term(self):
-        or_term = BooleanQuery()
-        term = self.titlesearch.lupy_term()
-        or_term.add(term, False, False)
-        pattern = self._pattern.lower()
+    def xapian_wanted(self):
+        return not self.use_re
+
+    def xapian_term(self):
         if self.use_re:
-            if pattern[0] == '^':
-                pattern = pattern[1:]
-            if pattern[:2] == '\b':
-                pattern = pattern[2:]
-            term = RegularExpressionQuery(Term("text", pattern))
+            return None # xapian can't do regex search
         else:
-            terms = pattern.lower().split()
-            terms = [list(tokenizer(t)) for t in terms]
-            term = BooleanQuery()
+            analyzer = Xapian.WikiAnalyzer()
+            terms = self._pattern.split()
+            
+            # all parsed wikiwords, AND'ed
+            queries = []
             for t in terms:
-                if len(t) == 1:
-                    term.add(CamelCaseQuery(Term("text", t[0])), True, False)
+                t = [i.encode(config.charset) for i in list(analyzer.tokenize(t))]
+                if len(t) < 2:
+                    queries.append(UnicodeQuery(t[0]))
                 else:
-                    phrase = PhraseQuery()
-                    for w in t:
-                        phrase.add(Term("text", w))
-                    term.add(phrase, True, False)
-            #term = CamelCaseQuery(Term("text", pattern))
-            #term = PrefixQuery(Term("text", pattern), 3)
-            #term = TermQuery(Term("text", pattern))
-        or_term.add(term, False, False)
-        return or_term
+                    queries.append(UnicodeQuery(Query.OP_AND, t))
+
+            # titlesearch OR parsed wikiwords
+            return Query(Query.OP_OR,
+                    (self.titlesearch.xapian_term(),
+                        Query(Query.OP_AND, queries)))
 
 
 class TitleSearch(BaseExpression):
@@ -309,7 +332,7 @@
         return u'%s!"%s"' % (neg, unicode(self._pattern))
 
     def highlight_re(self):
-        return u"(%s)" % self._pattern    
+        return u"(%s)" % self._pattern
 
     def pageFilter(self):
         """ Page filter function for single title search """
@@ -336,16 +359,28 @@
             # XXX why not return None or empty list?
             return [Match()]
 
-    def lupy_term(self):
-        pattern = self._pattern.lower()
+    def xapian_wanted(self):
+        return not self.use_re
+
+    def xapian_term(self):
         if self.use_re:
-            if pattern[0] == '^':
-                pattern = pattern[1:]
-            term = RegularExpressionQuery(Term("title", pattern))
+            return None # xapian doesn't support regex search
         else:
-            term = PrefixQuery(Term("title", pattern), 1000000) # number of chars which are ignored behind the match
-            #term.boost = 100.0
-        return term
+            analyzer = Xapian.WikiAnalyzer()
+            terms = self._pattern.split()
+            terms = [list(analyzer.tokenize(t)) for t in terms]
+
+            # all parsed wikiwords, AND'ed
+            queries = []
+            for t in terms:
+                t = ['%s%s' % (Xapian.Index.prefixMap['title'], i)
+                        for i in list(analyzer.tokenize(t))]
+                if len(t) < 2:
+                    queries.append(UnicodeQuery(t[0]))
+                else:
+                    queries.append(UnicodeQuery(Query.OP_AND, t))
+
+            return Query(Query.OP_AND, queries)
 
 
 class LinkSearch(BaseExpression):
@@ -389,7 +424,7 @@
         return u'%s!"%s"' % (neg, unicode(self._pattern))
 
     def highlight_re(self):
-        return u"(%s)" % self._textpattern    
+        return u"(%s)" % self._textpattern
 
     def search(self, page):
         # Get matches in page name
@@ -403,7 +438,7 @@
                 break
         else:
             Found = False
-                
+
         if Found:
             # Search in page text
             results = self.textsearch.search(page)
@@ -422,16 +457,16 @@
             # XXX why not return None or empty list?
             return [Match()]
 
-    def lupy_term(self):        
+    def xapian_wanted(self):
+        return not self.use_re
+
+    def xapian_term(self):
         pattern = self.pattern
         if self.use_re:
-            if pattern[0] == "^":
-                pattern = pattern[1:]
-            term = RegularExpressionQuery(Term("links", pattern))
+            return None # xapian doesnt support regex search
         else:
-            term = TermQuery(Term("links", pattern))
-        term.boost = 10.0
-        return term
+            return UnicodeQuery('%s:%s' %
+                    (Xapian.Index.prefixMap['linkto'], pattern))
 
 ############################################################################
 ### Results
@@ -625,10 +660,33 @@
         return []
 
 
+class FoundRemote(FoundPage):
+    """ Represent an attachment in search results """
+    
+    def __init__(self, wikiname, page_name, attachment, matches=None, page=None):
+        self.wikiname = wikiname
+        self.page_name = page_name
+        self.attachment = attachment
+        self.page = page
+        if matches is None:
+            matches = []
+        self._matches = matches
+
+    def weight(self, unique=1):
+        return 1
+
+    def get_matches(self, unique=1, sort='start', type=Match):
+        return []
+
+    def _unique_matches(self, type=Match):
+        return []
+
+
 ##############################################################################
 ### Parse Query
 ##############################################################################
 
+
 class QueryParser:
     """
     Converts a String into a tree of Query objects
@@ -646,13 +704,15 @@
         self.regex = kw.get('regex', 0)
 
     def parse_query(self, query):
-        """ transform an string into a tree of Query objects"""
+        """ transform an string into a tree of Query objects """
+        if isinstance(query, str):
+            query = query.decode(config.charset)
         self._query = query
         result = self._or_expression()
         if result is None:
             result = BaseExpression()
         return result
-  
+
     def _or_expression(self):
         result = self._and_expression()
         if self._query:
@@ -683,7 +743,7 @@
                  r'(?P<OPS>\(|\)|(or\b(?!$)))|' +  # or, (, )
                  r'(?P<MOD>(\w+:)*)' +
                  r'(?P<TERM>("[^"]+")|' +
-                  r"('[^']+')|(\S+)))")             # search word itself
+                 r"('[^']+')|(\S+)))")             # search word itself
         self._query = self._query.strip()
         match = re.match(regex, self._query, re.U)
         if not match:
@@ -727,7 +787,7 @@
 
         if match.group("NEG"):
             obj.negate()
-        return obj                
+        return obj
 
     def isQuoted(self, text):
         # Empty string '' is not considered quoted
@@ -837,7 +897,7 @@
                     matchInfo,
                     f.listitem(0),
                     ]
-                write(''.join(item))           
+                write(''.join(item))
             write(list(0))
 
         return self.getvalue()
@@ -1162,8 +1222,8 @@
     def run(self):
         """ Perform search and return results object """
         start = time.time()
-        if self.request.cfg.lupy_search:
-            hits = self._lupySearch()
+        if self.request.cfg.xapian_search:
+            hits = self._xapianSearch()
         else:
             hits = self._moinSearch()
             
@@ -1172,12 +1232,14 @@
             hits = self._filter(hits)
         
         result_hits = []
-        for page, attachment, match in hits:
-            if attachment:
-                result_hits.append(FoundAttachment(page.page_name, attachment))
+        for wikiname, page, attachment, match in hits:
+            if wikiname in (self.request.cfg.interwikiname, 'Self'): # a local match
+                if attachment:
+                    result_hits.append(FoundAttachment(page.page_name, attachment))
+                else:
+                    result_hits.append(FoundPage(page.page_name, match))
             else:
-                result_hits.append(FoundPage(page.page_name, match))
-            
+                result_hits.append(FoundRemote(wikiname, page, attachment, match))
         elapsed = time.time() - start
         count = self.request.rootpage.getPageCount()
         return SearchResults(self.query, result_hits, count, elapsed)
@@ -1185,22 +1247,34 @@
     # ----------------------------------------------------------------
     # Private!
 
-    def _lupySearch(self):
-        """ Search using lupy
+    def _xapianSearch(self):
+        """ Search using Xapian
         
-        Get a list of pages using fast lupy search and return moin
-        search in those pages.
+        Get a list of pages using fast xapian search and
+        return moin search in those pages.
         """
         pages = None
-        index = Index(self.request)
-        if index.exists():
-            self.request.clock.start('_lupySearch')
+        index = Xapian.Index(self.request)
+        if index.exists() and self.query.xapian_wanted():
+            self.request.clock.start('_xapianSearch')
             try:
-                hits = index.search(self.query.lupy_term())
-                pages = [(hit.get('pagename'), hit.get('attachment')) for hit in hits]
+                from MoinMoin.support import xapwrap
+                query = self.query.xapian_term()
+                self.request.log("xapianSearch: query = %r" %
+                        query.get_description())
+                query = xapwrap.index.QObjQuery(query)
+                hits = index.search(query)
+                self.request.log("xapianSearch: finds: %r" % hits)
+                def dict_decode(d):
+                    """ decode dict values to unicode """
+                    for k, v in d.items():
+                        d[k] = d[k].decode(config.charset)
+                    return d
+                pages = [dict_decode(hit['values']) for hit in hits]
+                self.request.log("xapianSearch: finds pages: %r" % pages)
             except index.LockedException:
                 pass
-            self.request.clock.stop('_lupySearch')
+            self.request.clock.stop('_xapianSearch')
         return self._moinSearch(pages)
 
     def _moinSearch(self, pages=None):
@@ -1212,23 +1286,29 @@
         self.request.clock.start('_moinSearch')
         from MoinMoin.Page import Page
         if pages is None:
-            # if we are not called from _lupySearch, we make a full pagelist,
+            # if we are not called from _xapianSearch, we make a full pagelist,
             # but don't search attachments (thus attachment name = '')
-            pages = [(p, '') for p in self._getPageList()]
+            pages = [{'pagename': p, 'attachment': '', 'wikiname': 'Self', } for p in self._getPageList()]
         hits = []
         fs_rootpage = self.fs_rootpage
-        for pagename, attachment in pages:
-            page = Page(self.request, pagename)
-            if attachment:
-                if pagename == fs_rootpage: # not really an attachment
-                    page = Page(self.request, "%s%s" % (fs_rootpage, attachment))
-                    hits.append((page, None, None))
+        for valuedict in pages:
+            wikiname = valuedict['wikiname']
+            pagename = valuedict['pagename']
+            attachment = valuedict['attachment']
+            if wikiname in (self.request.cfg.interwikiname, 'Self'): # THIS wiki
+                page = Page(self.request, pagename)
+                if attachment:
+                    if pagename == fs_rootpage: # not really an attachment
+                        page = Page(self.request, "%s%s" % (fs_rootpage, attachment))
+                        hits.append((wikiname, page, None, None))
+                    else:
+                        hits.append((wikiname, page, attachment, None))
                 else:
-                    hits.append((page, attachment, None))
-            else:
-                match = self.query.search(page)
-                if match:
-                    hits.append((page, attachment, match))
+                    match = self.query.search(page)
+                    if match:
+                        hits.append((wikiname, page, attachment, match))
+            else: # other wiki
+                hits.append((wikiname, pagename, attachment, None))
         self.request.clock.stop('_moinSearch')
         return hits
 
@@ -1252,8 +1332,11 @@
         """ Filter out deleted or acl protected pages """
         userMayRead = self.request.user.may.read
         fs_rootpage = self.fs_rootpage + "/"
-        filtered = [(page, attachment, match) for page, attachment, match in hits
-                    if page.exists() and userMayRead(page.page_name) or page.page_name.startswith(fs_rootpage)]    
+        thiswiki = (self.request.cfg.interwikiname, 'Self')
+        filtered = [(wikiname, page, attachment, match) for wikiname, page, attachment, match in hits
+                    if not wikiname in thiswiki or
+                       page.exists() and userMayRead(page.page_name) or
+                       page.page_name.startswith(fs_rootpage)]    
         return filtered
         
         
--- a/MoinMoin/support/lupy/__init__.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-"""Lupy Package"""
-
-__version__ = '0.2.1'
--- a/MoinMoin/support/lupy/document.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,110 +0,0 @@
-# -*- test-case-name: lupy.test.test_document -*-
-"""Documents and Fields"""
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-class Document(object):
-    """Documents are the unit of indexing and search.
-
-    A Document is a set of fields.  Each field has a name and a textual
-    value.  A field may be stored with the document, in which case it is
-    returned with search hits on the document.  Thus each document should
-    typically contain stored fields which uniquely identify it.
-    """
-
-    def __init__(self):
-        self._fields = {}
-        self.fieldNames = []
-    
-    def add(self, field):
-        """Adds a field to a document."""
-        name = field.name()
-        self._fields[name] = field
-        if name not in self.fieldNames:
-            self.fieldNames.append(name)
-    
-    def getField(self, name):
-        """Returns a field with the given name, or None if none exist."""
-        return self._fields.get(name, None)
-
-    def get(self, name):
-        """Returns the string value of a field, or None."""
-        field = self.getField(name)
-        if field is not None:
-            return field.stringValue()
-        else:
-            return None
-
-    def fields(self):
-        """Return Python iterator over fields."""
-        return [self._fields[name] for name in self.fieldNames]
-
-    def __repr__(self):
-        return '<Document[%s]>' % ("|".join(self.fieldNames),)
-
-
-class Field(object):
-    """A field is a section of a Document.
-
-    Each field has two parts, a name and a value.  Values may be free
-    text, provided as a string or as a file, or they may be atomic
-    keywords, which are not further processed.  Such keywords may be used
-    to represent dates, urls, etc.  Fields are optionally stored in the
-    index, so that they may be returned with hits on the document.
-    """
-
-    def __init__(self, name, string, store=False, index=True, token=True):
-        self.nom = name
-        self.stringVal = string
-        self.readerVal = None
-        self.isStored = store
-        self.isIndexed = index
-        self.isTokenized = token        
-
-    def __repr__(self):
-        if self.isStored and self.isIndexed and not self.isTokenized:
-            return '<Keyword<' + self.nom + ':' + self.stringVal + '>>'
-        elif self.isStored and not self.isIndexed and not self.isTokenized:
-            return '<Unindexed<' + self.nom + ':' + self.stringVal + '>>'
-        elif self.isStored and self.isIndexed and self.isTokenized and self.stringVal is not None:
-            return '<Text<' + self.nom + ':' + self.stringVal + '>>'
-        elif self.isStored and self.isIndexed and self.isTokenized and self.stringVal is not None:
-            return '<Text<' + self.nom + ':' + self.readerVal + '>>'
-        else:
-            return '<Field<???>'
-
-    def name(self):
-        return self.nom
-
-    def stringValue(self):
-        return self.stringVal
-
-    def readerValue(self):
-        return self.readerVal
-
-
-def Keyword(name, value):
-    "An untokenized field that is included in the index and returned with search results."
-    return Field(name, value, True, True, False)
-
-
-def Text(name, strOrFile, store=True):
-    """A tokenized field that is included in the index and returned
-    with search results.  Accepts string or file-like object."""
-    if isinstance(strOrFile, (str, unicode)):
-        res = Field(name, strOrFile, store, True, True)
-    else:
-        res = Field(name, None)
-        res.readerVal = strOrFile
-        res.stringVal = None
-    return res
-
-
-def UnIndexed(name, value):
-    return Field(name, value, True, False, False)
-
-
-def UnStored(name, value):
-    return Field(name, value, False, True, True)
--- a/MoinMoin/support/lupy/index/__init__.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-"""indexing classes"""
--- a/MoinMoin/support/lupy/index/documentwriter.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,181 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-from StringIO import StringIO
-from array import array
-import re
-from MoinMoin.support.lupy.search import similarity
-from MoinMoin.support.lupy.index import field, term
-
-def standardTokenizer(string):
-    """Yield a stream of downcased words from a string."""
-    r = re.compile("\\w+", re.U)
-    tokenstream = re.finditer(r, string)
-    for m in tokenstream:
-        yield m.group().lower()
-        
-class DocumentWriter(object):
-
-    def __init__(self, directory, analyzer=None, mfl=None):
-        self.directory = directory
-        self.maxFieldLength = mfl
-        self.postingTable = {}
-        self.termBuffer = term.Term('','')
-        self.analyzer=analyzer or standardTokenizer
-        
-    def addDocument(self, segment, doc):
-        # Write field names
-        fi = self.fieldInfos = field.FieldInfos()
-        fi.add(doc)
-        fi.writeDir(self.directory, segment + '.fnm')
-
-        # Write field values
-        fieldsWriter = field.FieldsWriter(self.directory,
-                                                 segment,
-                                                 self.fieldInfos)
-        try:
-            fieldsWriter.addDocument(doc)
-        finally:
-            fieldsWriter.close()
-
-        # Invert doc into postingTable
-        self.postingTable = {}
-        self.fieldLengths = [0] * (len(self.fieldInfos))
-        self.invertDocument(doc)
-
-        # Sort postingTable into an array
-        postings = self.sortPostingTable()
-
-
-        # Write postings
-        self.writePostings(postings, segment)
-        
-        # Write noms of indexed files
-        self.writeNorms(doc, segment)
-
-
-    def invertDocument(self, doc):
-        fields = doc.fields()
-        for field in doc.fields():
-            fieldName = field.name()
-            fieldNumber = self.fieldInfos.fieldNumber(fieldName)
-            
-            position = self.fieldLengths[fieldNumber]    # Position in field
-
-            if field.isIndexed:
-                if not field.isTokenized:
-                    # Untokenized
-                    self.addPosition(fieldName, field.stringValue(), position)
-                    position += 1
-                else:
-                    # Find or make a reader
-                    if field.readerValue() is not None:
-                        val = field.readerValue().read()
-                    elif field.stringValue() is not None:
-                        val = field.stringValue()
-                    else:
-                        raise Exception, 'Field must have either a String or Reader value'
-                    
-                    for tok in self.analyzer(val):
-                        self.addPosition(fieldName, tok, position)
-                        position += 1
-
-                        if self.maxFieldLength and (position > self.maxFieldLength):
-                            break
-                        
-            self.fieldLengths[fieldNumber] = position 
-                    
-
-    def addPosition(self, field, text, position):
-        self.termBuffer.set(field, text)
-
-        ti = self.postingTable.get(self.termBuffer, None)
-        
-        if ti is not None:
-            freq = ti.freq
-            ti.positions.append(position)
-            ti.freq = freq + 1
-        else:
-            trm = term.Term(field, text, False)
-            self.postingTable[trm] = Posting(trm, position)
-
-
-    def sortPostingTable(self):
-        arr = self.postingTable.values()
-        arr.sort()
-        return arr
-
-
-    def writePostings(self, postings, segment):
-        freq = None
-        prox = None
-        tis = None
-
-        try:
-            freq = self.directory.createFile(segment + '.frq')
-            prox = self.directory.createFile(segment + '.prx')
-
-            tis = term.TermInfosWriter(self.directory,
-                                                  segment,
-                                                  self.fieldInfos)
-            ti = term.TermInfo()
-
-            for posting in postings:
-                # print 'writing', posting, posting.term
-                # Add entry to the dictionary with pointers to prox and freq files
-                ti.set(1, freq.getFilePointer(), prox.getFilePointer())
-                tis.add(posting.term, ti)
-
-                # Add an entry to the freq file
-                f = posting.freq
-                if f == 1:                  # optimize freq == 1
-                    freq.writeVInt(1)       # set low bit of doc num
-                else:
-                    freq.writeVInt(0)       # the document number
-                    freq.writeVInt(f)       # frequency in doc
-
-                lastPosition = 0
-                positions = posting.positions
-
-                for position in positions:
-                    prox.writeVInt(position - lastPosition)
-                    lastPosition = position
-                    
-        finally:
-            if freq is not None:
-                freq.close()
-            if prox is not None:
-                prox.close()
-            if tis is not None:
-                tis.close()
-
-
-    def writeNorms(self, doc, segment):
-        for field in doc.fields():
-            if field.isIndexed:
-                fieldNumber = self.fieldInfos.fieldNumber(field.name())
-                norm = self.directory.createFile(segment +
-                                                 '.f' + str(fieldNumber))
-                try:
-                    norm.writeByte(similarity.normInt(self.fieldLengths[fieldNumber]))
-                finally:
-                    norm.close()
-
-
-class Posting(object):
-
-    def __init__(self, t, position):
-        self.term = t
-        self.freq = 1
-        self.positions = array('i',[1])
-        self.positions[0] = position
-
-    def __repr__(self):
-        s = '<Posting:'
-        s += str(self.term) + '>'
-        return s
-
-    def __cmp__(self, other):
-        return cmp(self.term, other.term)
--- a/MoinMoin/support/lupy/index/field.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,173 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-from MoinMoin.support.lupy import document
-
-class FieldInfo(object):
-
-    def __init__(self, na, tk, nu):
-        self.name = na
-        self.isIndexed = tk
-        self.number = nu
-
-
-class FieldInfos(object):
-        
-    def __init__(self, d=None, name=None):
-        self.byNumber = []
-        self.byName = {}
-        if d is None and name is None:
-            self.addString('',False)
-        else:
-            input = d.openFile(name)
-            try:
-                self.read(input)
-            finally:
-                input.close()
-
-    def add(self, doc):
-        """Adds field info for a Document"""
-        for field in doc.fields():
-            self.addString(field.name(), field.isIndexed)
-
-    def addString(self, name, isIndxd):
-        fi = self.fieldInfo(name)
-        if fi is None:
-            self.addInternal(name, isIndxd)
-        elif fi.isIndexed is not isIndxd:
-            fi.isIndexed = True
-
-    def addFieldInfos(self, other):
-        """Merges in information from another FieldInfos"""
-        for i in range(len(other)):
-            fi = other.fieldInfoInt(i)
-            self.addString(fi.name, fi.isIndexed)
-
-    def addInternal(self, name, isIndexed):
-        fi = FieldInfo(name, isIndexed, len(self.byNumber))
-
-        self.byNumber.append(fi)
-        self.byName[name]=fi
-
-    def fieldNumber(self, fieldName):
-        fi = self.fieldInfo(fieldName)
-        if fi is not None:
-            return fi.number
-        else:
-            return -1
-
-    def fieldInfo(self, fieldName):
-        return self.byName.get(fieldName, None)
-
-    def fieldName(self, fieldNumber):
-        return self.byNumber[fieldNumber].name
-
-    def fieldInfoInt(self, fieldNumber):
-        return self.byNumber[fieldNumber]
-
-    def __len__(self):
-        return len(self.byNumber)
-
-    def writeDir(self, d, name):
-        output = d.createFile(name)
-        try:
-            self.write(output)
-        finally:
-            output.close()
-
-    def write(self, output):
-        output.writeVInt(len(self))
-
-        for i in range(len(self)):
-            fi = self.fieldInfoInt(i)
-            output.writeString(fi.name)
-            if fi.isIndexed:
-                output.writeByte(1)
-            else:
-                output.writeByte(0)
-
-    def read(self, input):
-        size = input.readVInt()
-        for i in range(size):
-            self.addInternal(input.readString(), (input.readByte() != 0))
-
-    def fieldNames(self):
-        # Experimental for auto-queries
-        return self.byName.keys()
-
-class FieldsWriter(object):
-
-    def __init__(self, d, segment, fn):
-        self.fieldInfos = fn
-        self.fieldsStream = d.createFile(segment + '.fdt')
-        self.indexStream = d.createFile(segment + '.fdx')
-
-
-    def addDocument(self, doc):
-        self.indexStream.writeLong(self.fieldsStream.getFilePointer())
-        storedCount = 0
-        for field in doc.fields():
-            if field.isStored:
-                storedCount += 1
-
-        self.fieldsStream.writeVInt(storedCount)
-
-        for field in doc.fields():
-            if field.isStored:
-                self.fieldsStream.writeVInt(self.fieldInfos.fieldNumber(field.name()))
-    
-                bits = 0
-                if field.isTokenized:
-                    bits |= 1
-                self.fieldsStream.writeByte(bits)
-    
-                self.fieldsStream.writeString(field.stringValue())
-
-
-    def close(self):
-        self.fieldsStream.close()
-        self.indexStream.close()
-
-                                           
-class FieldsReader(object):
-
-    def __init__(self, d, segment, fn):
-        self.fieldInfos = fn
-
-        self.fieldsStream = d.openFile(segment + '.fdt')
-        self.indexStream = d.openFile(segment + '.fdx')
-                                      
-        self.sze = self.indexStream.length / 8
-
-
-    def close(self):
-        self.fieldsStream.close()
-        self.indexStream.close()
-
-
-    def size(self):
-        return self.sze
-
-
-    def doc(self, n):
-        self.indexStream.seek(n * 8L)
-        position = self.indexStream.readLong()
-        self.fieldsStream.seek(position)
-
-        doc = document.Document()
-        numFields = self.fieldsStream.readVInt()
-        for i in range(numFields):
-            fieldNumber = self.fieldsStream.readVInt()
-            fi = self.fieldInfos.fieldInfoInt(fieldNumber)
-
-            bits = self.fieldsStream.readByte()
-            tokenized = ((bits & 1) != 0)
-
-            doc.add(document.Field(fi.name, self.fieldsStream.readString(),
-                          True, fi.isIndexed, tokenized))
-
-        return doc
-
-    
--- a/MoinMoin/support/lupy/index/indexwriter.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,228 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-import sys
-
-from MoinMoin.support.lupy import store
-from MoinMoin.support.lupy.index import segmentmerger, segment, documentwriter
-
-class IndexWriter(object):
-
-    def __init__(self, path, create=False, analyzer=None):
-        if path is None:
-            if create is True:
-                self.directory = store.RAMDirectory()
-            else:
-                self.directory = path
-        else:
-            self.directory = store.FSDirectory(path, create)
-            
-        self.infoStream = None
-        self.analyzer = analyzer
-        self.maxMergeDocs = sys.maxint
-        self.mergeFactor = 20 # Never < 2
-        self.segmentInfos = segment.SegmentInfos()
-        self.ramDirectory = store.RAMDirectory()
-        # self.writeLock = open("write.lock", "wb")
-        # locker.lock(self.writeLock, locker.LOCK_EX)
-        
-        if create is True:
-            self.segmentInfos.write(self.directory)
-        else:
-            self.segmentInfos.read(self.directory)
-
-
-    def close(self):
-        self.flushRamSegments()
-        self.ramDirectory.close()
-        # self.writeLock.close()
-        self.directory.close()
-
-
-    def docCount(self):
-        count = 0
-        for si in self.segmentInfos:
-            count += si.docCount
-        return count
-
-
-    def addDocument(self, doc):
-        dw = documentwriter.DocumentWriter(self.ramDirectory, self.analyzer)
-        segmentName = self.newSegmentName()
-        dw.addDocument(segmentName, doc)
-        self.segmentInfos.append(segment.SegmentInfo(segmentName, 1, self.ramDirectory))
-        self.maybeMergeSegments()
-
-
-    def newSegmentName(self):
-        res = '_' + str(self.segmentInfos.counter)
-        self.segmentInfos.counter += 1
-        return res
-
-
-    def optimize(self):
-        self.flushRamSegments()
-        while ((len(self.segmentInfos) > 1) or (len(self.segmentInfos) == 1 and
-                (segmentmerger.SegmentReader.hasDeletions(self.segmentInfos[0]) or
-                 self.segmentInfos[0].dir != self.directory))):
-            minSegment = (len(self.segmentInfos) - self.mergeFactor)
-            if minSegment < 0:
-                self.mergeSegments(0)
-            else:
-                self.mergeSegments(minSegment)
-
-
-    def addIndexes(self, dirs):
-        """Merges all segments from an array of indexes into this index.
-        
-        This may be used to parallelize batch indexing.  A large document
-        collection can be broken into sub-collections.  Each sub-collection can be
-        indexed in parallel, on a different thread, process or machine.  The
-        complete index can then be created by merging sub-collection indexes
-        with this method.
-        
-        After this completes, the index is optimized."""
-        #### UNTESTED ####
-        self.optimize()
-        for d in dirs:
-            sis = segment.SegmentInfos()
-            sis.read(d)
-            for j in range(len(sis)):
-                self.segmentInfos.append(sis[j])
-        self.optimize()
-
-
-    def flushRamSegments(self):
-        """Merges all RAM-resident segments."""
-        
-        sis = self.segmentInfos
-        minSegment = len(sis) - 1
-        docCount = 0
-
-        while minSegment >= 0 and ((sis[minSegment]).dir == self.ramDirectory):
-            docCount += sis[minSegment].docCount
-            minSegment -= 1
-
-        if (minSegment < 0 or (docCount + sis[minSegment].docCount) > self.mergeFactor or
-            not (sis[len(sis)-1].dir == self.ramDirectory)):
-            minSegment += 1
-
-        if minSegment >= len(sis):
-            return
-        self.mergeSegments(minSegment)
-
-
-    def maybeMergeSegments(self):
-        """Incremental segment merger"""
-        
-        targetMergeDocs = self.mergeFactor
-        while targetMergeDocs <= self.maxMergeDocs:
-            # Find segment smaller than the current target size
-            minSegment = len(self.segmentInfos)
-            mergeDocs = 0
-            minSegment -= 1
-            while minSegment >= 0:
-                si = self.segmentInfos[minSegment]
-                if si.docCount >= targetMergeDocs:
-                    break
-                mergeDocs += si.docCount
-                minSegment -= 1
-            if mergeDocs >= targetMergeDocs:    #found a merge to do
-                self.mergeSegments(minSegment + 1)
-            else:
-                break
-            targetMergeDocs *= self.mergeFactor       # increase target size
-            
-
-    def mergeSegments(self, minSegment):
-        """Pops segments off of segmentInfos stack down to minSegment,
-        merges them, and pushes the merged index onto the top of the
-        segmentInfos stack"""
-        
-        mergedName = self.newSegmentName()
-        mergedDocCount = 0
-        merger = segmentmerger.SegmentMerger(self.directory, mergedName)
-        segmentsToDelete = []
-
-        for i in range(minSegment, len(self.segmentInfos)):
-            si = self.segmentInfos[i]
-            reader = segmentmerger.SegmentReader(si)
-            merger.add(reader)
-            if reader.directory is self.directory or reader.directory is self.ramDirectory:
-                segmentsToDelete.append(reader)
-            mergedDocCount += si.docCount
-        merger.merge()
-
-        self.segmentInfos = self.segmentInfos[:minSegment]
-        self.segmentInfos.append(segment.SegmentInfo(mergedName,
-                                                         mergedDocCount,
-                                                         self.directory))
-
-        # TODO some locking here
-        self.segmentInfos.write(self.directory)     # commit before deleting
-        self.deleteSegments(segmentsToDelete)      # delete now-unused segments
-        
-
-    def deleteSegments(self, segs):
-        """Some operating systems (e.g. Windows) don't permit a file to be deleted
-        while it is opened for read (e.g. by another process or thread).  So we
-        assume that when a delete fails it is because the file is open in another
-        process, and queue the file for subsequent deletion."""
-        
-        deletable = []
-
-        self.deleteFilesList(self.readDeleteableFiles(), deletable) # try to delete deletable
-
-        for reader in segs:
-            if reader.directory is self.directory:
-                self.deleteFilesList(reader.files(), deletable)     # try to delete our files
-            else:
-                self.deleteFilesDir(reader.files(), reader.directory)  # delete, eg, RAM files
-            self.writeDeleteableFiles(deletable)                # note files we can't delete
-            
-
-    def deleteFilesDir(self, files, dir):
-        for file in files:
-            dir.deleteFile(file)
-
-
-    def deleteFilesList(self, files, deletable):
-        for file in files:
-            try:
-                self.directory.deleteFile(file)
-            except OSError:
-	        # this occurs on windows where sometimes
-		# win reports a file to be in use
-		# in reality it is windows that is fiddling
-		# with the file and locking it temporarily
-                if self.directory.fileExists(file):
-		    # schedule the file for later deletion
-                    deletable.append(file)
-
-
-    def readDeleteableFiles(self):
-        result = []
-        if not self.directory.fileExists('deletable'):
-            return result
-        input = self.directory.openFile('deletable')
-        try:
-            i = input.readInt()
-            while i > 0:
-                result.append(input.readString())
-                i -= 1
-        finally:
-            input.close()
-        return result
-
-
-    def writeDeleteableFiles(self, files):
-        output = self.directory.createFile('deletable.new')
-        try:
-            output.writeInt(len(files))
-            for file in files:
-                output.writeString(file)
-        finally:
-            output.close()
-        self.directory.renameFile('deletable.new','deletable')
--- a/MoinMoin/support/lupy/index/segment.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,167 +0,0 @@
-# -*- test-case-name: lupy.test -*- 
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-from MoinMoin.support.lupy.index import term
-
-#import copy #broken, see comments at top of this file:
-from MoinMoin.support import copy
-
-class SegmentTermEnum:
-
-    def __init__(self, i, fis, isi):
-        self.input = i
-        self.fieldInfos = fis
-        self.size = self.input.readInt()
-        self.isIndex = isi
-
-        self.indexPointer = 0
-        self.position = -1
-        self.prev = None
-        self.prevTxt = ''
-        self.term = term.Term('','')
-        self.trmInfo = term.TermInfo()
-    
-    
-    def clone(self):
-        """Return a copy of self.
-        """
-        
-        # TODO: implement as __copy__
-        clone = copy.copy(self)
-        clone.input = self.input.clone()
-       
-        clone.trmInfo = term.TermInfo()
-        clone.trmInfo.setTo(self.trmInfo)
-        #clone.prevTxt = self.term.text()
-        return clone
-
-
-    def close(self):
-        self.input.close()
-
-
-    def docFreq(self):
-        return self.trmInfo.docFreq
-
-
-    def freqPointer(self):
-        return self.trmInfo.freqPointer
-
-
-    def next(self):
-        self.position += 1
-        
-        if self.position > self.size -1:
-            self.position += 1
-            self.term = None
-            raise StopIteration
-
-        self.prev = self.term
-        self.term = self.readTerm()
-
-        self.trmInfo.docFreq = self.input.readVInt()
-        self.trmInfo.freqPointer += self.input.readVLong()
-        self.trmInfo.proxPointer += self.input.readVLong()
-
-        if self.isIndex:
-            self.indexPointer += self.input.readVLong()
-            
-        return self.term, self.indexPointer
-
-    def __iter__(self):
-        return self
-
-    def proxPointer(self):
-        return self.trmInfo.proxPointer
-
-
-    def readTerm(self):
-        # this bit is a mite tricky. in the java version they use a
-        # buffer for reading and just use 'start' as the offset for
-        # putting the read string into the buffer; when strings with
-        # common prefixes were read in, the offset would preserve the
-        # prefix. So here we just remember the last string and slice
-        # the common prefix from it.        
-        start = self.input.readVInt()        
-        self.prevTxt = txt = self.prevTxt[:start] + self.input.readString()        
-        fi = self.input.readVInt()
-        fld = self.fieldInfos.fieldName(fi)        
-        t = term.Term(fld,txt,False)
-        return t
-
-
-    def seek(self, pointer, p, t, ti):
-        self.input.seek(pointer)
-        self.position = p
-        self.term = t
-        self.prev = None
-        self.trmInfo.setTo(ti)
-        self.prevTxt = self.term.text()
-
-    def termInfo(self, ti=None):
-        if ti is None:
-            nti = term.TermInfo()
-            nti.setTo(self.trmInfo)
-            return nti
-        else:
-            ti.setTo(self.trmInfo)
-
-    def __cmp__(a, b):
-        return cmp(a.term, b.term)
-
-
-class SegmentInfo(object):
-
-    def __init__(self, name, docCount, d):
-        self.name = name
-        self.docCount = docCount
-        self.dir = d
-
-
-class SegmentInfos(list):
-
-    def __init__(self, lst = None):
-        self.counter = 0
-        if lst is not None:
-            self.extend(lst)
-    
-    def __getslice__(self, lo, hi):
-        res = SegmentInfos(list.__getslice__(self, lo, hi))
-        res.counter = self.counter
-        return res
-    
-    def read(self, directory):
-        input = directory.openFile('segments')
-        try:
-            self.counter = input.readInt()      # read counter
-            i = input.readInt()
-            while i > 0:                        # read segment infos
-                si = SegmentInfo(input.readString(),
-                                             input.readInt(),
-                                             directory)
-                self.append(si)
-                i -= 1
-        finally:
-            input.close()
-
-    def write(self, directory):
-        output = directory.createFile('segments.new')
-        try:
-            output.writeInt(self.counter)
-            output.writeInt(len(self))
-            for si in self:
-                output.writeString(si.name)
-                output.writeInt(si.docCount)
-        finally:
-            output.close()
-
-        # Install new segment info
-        directory.renameFile('segments.new','segments')
-        
-    def __repr__(self):
-        return 'SegInfo' + list.__repr__(self)
-        
-
--- a/MoinMoin/support/lupy/index/segmentmerger.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1078 +0,0 @@
-# -*- test-case-name: lupy.test -*-
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-import sys
-
-from array import array
-
-from  MoinMoin.support.lupy.util import BitVector
-
-from MoinMoin.support.lupy.index import field, term, segment
-
-#import copy #broken, see comments at top of this file:
-from MoinMoin.support import copy
-
-from bisect import insort
-import os
-
-class IndexReader(object):
-    
-    """IndexReader is an abstract class, providing an interface for
-    accessing an index. Search of an index is done entirely through this abstract
-    interface, so that any subclass which implements it is searchable.
-
-    Concrete subclasses of IndexReader are usually constructed with a call to L{lupy.search.indexsearcher.open}C{(path)}.
-
-    For efficiency, in this API documents are often referred to via document
-    numbers, non-negative integers which each name a unique document in the index.
-    These document numbers are ephemeral--they may change as documents are added
-    to and deleted from an index. Clients should thus not rely on a given document
-    having the same number between sessions. """
-
-    def __init__(self, d):
-        self.directory = d
-
-    def indexExists(self, d):
-        """Returns True if an index exists at the specified directory."""
-        return self.directory.fileExists('segments')
-
-    def isLocked(self):
-        # return self.directory.fileExists('write.lock')
-        return False
-
-    def lastModified(self, d):
-        """Returns the time the index in this directory was last modified."""
-        return self.directory.fileModified('segments')
-    
-    def lastModifiedString(self, d):
-        return self.lastModified(d)
-    
-
-    #def unlock(self, directory):
-    #    """Forcibly unlocks the index in the named directory.
-    #    
-    #    Caution: this should only be used by failure recovery code,
-    #    when it is known that no other process nor thread is in fact
-    #    currently accessing this index."""
-    #    
-    #    directory.deleteFile('write.lock')
-    #    directory.deleteFile('commit.lock')
-        
-
-    def close(self):
-        """Closes files associated with this index.
-        Also saves any new deletions to disk.
-        No other methods should be called after this has been called."""
-        
-        self.doClose()
-        
-
-    def doClose(self):
-        pass
-
-
-    def delete(self, docNum):
-        
-        """Deletes the document numbered C{docNum}.  Once a document
-        is deleted it will not appear in TermDocs or TermPositions
-        enumerations.  Attempts to read its field with the L{document}
-        method will result in an error.  The presence of this document
-        may still be reflected in the C{docFreq} statistic, though
-        this will be corrected eventually as the index is further
-        modified.  """
-        self.doDelete(docNum)
-        
-
-    def deleteTerm(self, term):
-        """ Deletes all documents containing C{term}.
-        This is useful if one uses a document field to hold a unique ID string for
-        the document.  Then to delete such a document, one merely constructs a
-        term with the appropriate field and the unique ID string as its text and
-        passes it to this method.  Returns the number of documents deleted.
-        """
-        docs = self.termDocsTerm(term)
-        try:        
-            return len([self.delete(doc) for doc,freq in docs])
-        finally:
-            docs.close()
-
-    
-    
-    def termDocs(self):
-        """Returns an unpositioned TermDocs enumerator.
-        """
-    
-    
-    def termDocsTerm(self, term):
-        """ Returns an enumeration of all the documents which contain
-        C{term}. For each document, the document number, the frequency of
-        the term in that document is also provided, for use in search scoring.
-        Thus, this method implements the mapping:
-
-        Term &nbsp;&nbsp; S{->} <docNum, freq>*
-
-        The enumeration is ordered by document number.  Each document number
-        is greater than all that precede it in the enumeration."""
-        
-        termDocs = self.termDocs()
-        termDocs.seekTerm(term)
-        return termDocs
-    
-    
-    def termPositionsTerm(self, term):
-
-        """Returns an enumeration of all the documents which contain
-        C{term}.  For each document, in addition to the document
-        number and frequency of the term in that document, a list of
-        all of the ordinal positions of the term in the document is
-        available.  Thus, this method implements the mapping:
-    
-        M{Term S{->} <docNum, freq, <pos(1), pos(2), ... , pos(freq-1)>>*}
-
-        This positional information faciliates phrase and proximity searching.
-        
-        The enumeration is ordered by document number.  Each document
-        number is greater than all that precede it in the
-        enumeration."""
-        
-        termPositions = self.termPositions()
-        termPositions.seekTerm(term)
-        return termPositions
-        
-class SegmentTermDocs(object):
-    
-    def __init__(self, parent):
-        self.parent = parent
-        self.freqStream = parent.freqStream.clone()
-        self.deletedDocs = parent.deletedDocs
-
-        self.docu = 0
-        self.frq = 0
-        
-    def close(self):
-        self.freqStream.close()
-
-    def __iter__(self):
-        return self
-
-    def next(self):
-        while True:
-            if self.freqCount == 0:
-                raise StopIteration
-            
-            docCode = self.freqStream.readVInt()
-            self.docu += docCode >> 1
-            if (docCode & 1):
-                self.frq = 1
-            else:
-                self.frq = self.freqStream.readVInt()
-
-            self.freqCount -= 1
-            
-            if self.deletedDocs is None or (not self.deletedDocs.get(self.docu)):
-                return self.docu, self.frq
-            self.skippingDoc()
-            
-
-    def read(self):
-        return list(self)
-
-    def skippingDoc(self):
-        pass
-
-    def seekTerm(self, term):
-        ti = self.parent.tis.getTerm(term)
-        self.seekTi(ti)
-
-
-    def seekTi(self, ti):
-        if ti is None:
-            self.freqCount = 0
-        else:
-            self.freqCount = ti.docFreq
-            self.docu = 0
-            self.freqStream.seek(ti.freqPointer)
-            
-
-class SegmentTermPositions(SegmentTermDocs):
-
-    def __init__(self, p):
-        self.proxCount = 0
-        self.position = 0
-        SegmentTermDocs.__init__(self, p)
-
-        self.proxStream = self.parent.proxStream.clone()
-
-    def close(self):
-        SegmentTermDocs.close(self)
-        self.proxStream.close()
-
-
-    def next(self):
-        #generator for accessing positions in the current doc
-        #kinda lame since it utterly breaks after next iteration
-        def nextPosition(freq):
-            for i in range(freq):
-                self.proxCount -= 1
-                self.position += self.proxStream.readVInt()
-                yield self.position
-
-        #skip unused positions
-        for i in range(self.proxCount):
-
-            self.proxStream.readVInt()            
-
-        self.doc, self.frq = SegmentTermDocs.next(self)
-        self.proxCount = self.frq
-        self.position = 0
-        return self.doc, self.frq, nextPosition(self.frq)
-
-
-    def skippingDoc(self):
-        # skip all positions
-        for f in range(self.frq, 0, -1):
-            self.proxStream.readVInt()    
-
-    def seekTi(self, ti):
-        SegmentTermDocs.seekTi(self, ti)
-
-        if ti is not None:
-            self.proxStream.seek(ti.proxPointer)
-        else:
-            self.proxCount = 0           
-            
-    def __repr__(self):
-        s = '<stp>' + str(self.position)
-        return s
-
-class SegmentMergeInfo(object):
-
-    def __init__(self, b, te, r):
-        self.base = b
-        self.reader = r
-        self.termEnum = te
-        self.term = te.term
-        self.docMap = None
-        self.postings = SegmentTermPositions(r)
-
-        if self.reader.deletedDocs is not None:
-            # build array with maps document numbers around deletions
-            deletedDocs = self.reader.deletedDocs
-            maxDoc = self.reader.maxDoc()
-            self.docMap = [0] * maxDoc
-            j = 0
-            for i in range(maxDoc):
-                if deletedDocs.get(i):
-                    self.docMap[i] = -1
-                else:
-                    self.docMap[i] += 1
-            
-
-    def close(self):
-        self.termEnum.close()
-        self.postings.close()
-
-
-    def advance(self):
-        #I don't see a reasonable way out of this one.
-        try:
-            self.term, self.indexPointer= self.termEnum.next()
-            self.trmInfo = self.termEnum.termInfo()
-            return True
-        except StopIteration: 
-            self.term = None
-            return False
-
-    def __repr__(self):
-        return '<SegMergInfo' + str(self.term) +'>'
-
-    def __lt__(a, b):
-        if a.term == b.term:
-            return a.base < b.base
-        else:
-            return a.termEnum < b.termEnum 
-
-
-    
-class SegmentMerger(object):
-
-    def __init__(self, dir, name):
-        self.directory = dir
-        self.segment = name
-        self.freqOutput = None
-        self.proxOutput = None
-        self.termInfosWriter = None
-        self.readers = []
-        self.termInfo = term.TermInfo()
-        self.smis = []
-        
-    def add(self, reader):
-        self.readers.append(reader)
-
-        
-    def appendPostings(self, smis, n):
-        lastDoc = 0
-        df = 0          # number of with term
-
-        for i in range(n):
-            smi = smis[i]
-            postings = smi.postings
-            base = smi.base
-            docMap = smi.docMap
-            smi.termEnum.termInfo(self.termInfo)
-            postings.seekTi(self.termInfo)
-
-            for doc, freq, nextPos in postings:
-                if docMap is None:
-                    # no deletions
-                    d = base + doc
-                else:
-                    # re-map around deletions
-                    d = base + docMap[postings.doc]
-                if d < lastDoc:
-                    raise RuntimeException, 'docs out of order'
-
-                # use low bit ot flag freq = 1
-                docCode = (d - lastDoc) << 1
-                lastDoc = d
-    
-                if freq == 1:
-                    # write doc & freq=1
-                    self.freqOutput.writeVInt(docCode | 1)
-                else:
-                    # write doc
-                    self.freqOutput.writeVInt(docCode)
-                    # write frequency in doc
-                    self.freqOutput.writeVInt(freq)
-    
-                lastPosition = 0
-                for position in nextPos:
-                    self.proxOutput.writeVInt(position - lastPosition)
-                    lastPosition = position
-    
-                df += 1
-
-        return df
-
-
-    def merge(self):
-        try:
-            self.mergeFields()
-            self.mergeTerms()
-            self.mergeNorms()
-        finally:
-            for reader in self.readers:
-                reader.close()
-
-
-    def mergeFields(self):
-        # merge field names
-        self.fieldInfos = field.FieldInfos()
-        for reader in self.readers:
-            self.fieldInfos.addFieldInfos(reader.fieldInfos)
-        self.fieldInfos.writeDir(self.directory, self.segment + '.fnm')
-
-        # merge field values
-        fieldsWriter = field.FieldsWriter(self.directory,
-                                                 self.segment,
-                                                 self.fieldInfos)
-
-        try:
-            for reader in self.readers:
-                deletedDocs = reader.deletedDocs
-                maxDoc = reader.maxDoc()
-                for j in range(maxDoc):
-                    if deletedDocs is None or not deletedDocs.get(j):
-                        # skip deleted docs
-                        fieldsWriter.addDocument(reader.document(j))
-        finally:
-            fieldsWriter.close()
-                                                 
-        
-
-    def mergeNorms(self):
-        for i in range(len(self.fieldInfos)):
-            fi = self.fieldInfos.fieldInfoInt(i)
-            if fi.isIndexed:
-                output = self.directory.createFile(self.segment + '.f' + str(i))
-                try:
-                    for reader in self.readers:
-                        deletedDocs = reader.deletedDocs
-                        input = reader.normStream(fi.name)
-                        maxDoc = reader.maxDoc()
-                        try:
-                            for k in range(maxDoc):
-                                if input is None:
-                                    norm = 0
-                                else:
-                                    norm = input.readByte()
-                                output.writeByte(norm)
-                        finally:
-                            if input is not None:
-                                input.close()
-                finally:
-                    output.close()
-
-
-    def mergeTermInfo(self, smis, n):
-        freqPointer = self.freqOutput.getFilePointer()
-        proxPointer = self.proxOutput.getFilePointer()
-
-        # Append posting data
-        df = self.appendPostings(smis, n)
-
-        if df > 0:
-            # add an entry to the dictionary with pointers to prox and freq files
-            self.termInfo.set(df, freqPointer, proxPointer)
-            self.termInfosWriter.add(smis[0].term, self.termInfo)
-
-
-
-    def mergeTermInfos(self):
-        smis = self.smis 
-        base = 0
-
-
-        for reader in self.readers:
-            termEnum = reader.terms()
-            smi = SegmentMergeInfo(base, termEnum, reader)
-            base += reader.numDocs()
-            if smi.advance():
-                insort(smis, smi)
-            else:
-                smi.close()
-
-        match = [0] * len(self.readers)
-        while len(smis) > 0:
-            # pop matching terms
-            matchSize = 0
-            match[matchSize] = smis.pop(0)
-            matchSize += 1
-            term = match[0].term
-            top =  smis and smis[0] or None
-            
-            while top is not None and cmp(term,top.term) == 0:
-                match[matchSize] = smis.pop(0)
-                matchSize += 1
-                top =  smis and smis[0] or None
-                    
-            # add new TermInfo
-            self.mergeTermInfo(match, matchSize)
-            
-            while matchSize > 0:
-                matchSize -= 1
-                smi = match[matchSize]
-                if smi.advance():
-                    insort(smis, smi)
-                else:
-                    smi.close()
-
-
-    def mergeTerms(self):
-        try:
-            self.freqOutput = self.directory.createFile(self.segment + '.frq')
-            self.proxOutput = self.directory.createFile(self.segment + '.prx')
-            self.termInfosWriter = term.TermInfosWriter(self.directory,
-                                                                  self.segment,
-                                                                  self.fieldInfos)
-            self.mergeTermInfos()
-        finally:
-            if self.freqOutput is not None:
-                self.freqOutput.close()
-            if self.proxOutput is not None:
-                self.proxOutput.close()
-            if self.termInfosWriter is not None:
-                self.termInfosWriter.close()
-            for smi in self.smis:
-                smi.close()
-    
-    def segmentReader(self, i):
-        return self.readers[i]
-
-    
-class SegmentReader(IndexReader):
-    
-    # Class methods
-    def hasDeletions(cls, si):
-        return si.dir.fileExists(si.name + '.del')
-    
-    hasDeletions = classmethod(hasDeletions)
-    
-
-    # instance methods
-    def __init__(self, si, closeDir=False):
-        self.directory = si.dir
-        self.closeDirectory = closeDir
-        self.segment = si.name
-        self.nrms = {}
-        self.deletedDocsDirty = False
-
-        self.fieldInfos = field.FieldInfos(self.directory,
-                                                self.segment + '.fnm')
-        self.fieldsReader = field.FieldsReader(self.directory,
-                                                      self.segment,
-                                                      self.fieldInfos)
-
-        self.tis = TermInfosReader(self.directory,
-                                                   self.segment,
-                                                   self.fieldInfos)
-
-        if SegmentReader.hasDeletions(si):
-            self.deletedDocs = BitVector(self.directory,
-                                                   self.segment + '.del')
-        else:
-            self.deletedDocs = None
-
-        # makes sure that all index files have been read or are kept open
-        # so that if an index update removes them we'll still have them
-        self.freqStream = self.directory.openFile(self.segment + '.frq')
-        self.proxStream = self.directory.openFile(self.segment + '.prx')
-
-        self.openNorms()
-
-
-    def closeNorms(self):
-        for v in self.nrms.values():
-            norm = v
-            v.inStream.close()
-
-
-    def docFreq(self, t):
-        ti = self.tis.getTerm(t)
-        if ti is None:
-            return 0
-        else:
-            return ti.docFreq
-
-
-    def doClose(self):
-        if self.deletedDocsDirty:
-            self.deletedDocs.write(self.directory, self.segment + ".tmp")
-            self.directory.renameFile(self.segment + ".tmp",
-                                      self.segment + ".del")
-            self.deletedDocsDirty = False
-
-        self.fieldsReader.close()
-        self.tis.close()
-
-        if self.freqStream is not None:
-            self.freqStream.close()
-        if self.proxStream is not None:
-            self.proxStream.close()
-
-        self.closeNorms()
-
-        if self.closeDirectory:
-            self.directory.close()
-
-
-    def document(self, n):
-        if self.isDeleted(n):
-            raise Exception, 'attempt to access deleted document'
-        return self.fieldsReader.doc(n)
-
-
-    def doDelete(self, docNum):
-        if self.deletedDocs is None:
-            self.deletedDocs = BitVector(self.maxDoc())
-        self.deletedDocsDirty = True
-        self.deletedDocs.set(docNum)
-
-
-    def files(self):
-        suffix = ['.fnm','.fdx','.fdt','.tii','.tis','.frq','.prx']
-        files = map((lambda x: self.segment + x), suffix)
-
-        if self.directory.fileExists(self.segment + '.del'):
-            files.append(self.segment + '.del')
-            
-        for i in range(len(self.fieldInfos)):
-            fi = self.fieldInfos.fieldInfoInt(i)
-            if fi.isIndexed:
-                files.append(self.segment + '.f' + str(i))
-                
-        return files
-
-
-    def isDeleted(self, n):
-        return (self.deletedDocs is not None and self.deletedDocs.get(n))
-
-
-    def maxDoc(self):
-        return self.fieldsReader.size()
-    
-
-    def normsField(self, field):
-        norm = self.nrms.get(field, None)
-        if norm is None:
-            return None
-        if norm.bytes is None:
-            bytes = array('B',[0x00]*self.maxDoc())
-            self.norms(field, bytes, 0)
-            norm.bytes = bytes
-
-        return norm.bytes
-    
-
-    def norms(self, field, bytes, offset):
-        normStream = self.normStream(field)
-        if normStream is None:
-            return
-        try:
-            normStream.readBytes(bytes, offset, self.maxDoc())
-        finally:
-            normStream.close()
-
-
-    def normStream(self, field):
-        norm = self.nrms.get(field, None)
-        if norm is None:
-            return None
-        # Cloning????
-        result = norm.inStream.clone()
-        result.seek(0)
-        return result
-
-
-    def numDocs(self):
-        n = self.maxDoc()
-        if self.deletedDocs is not None:
-            n -= self.deletedDocs.count()
-        return n
-
-    def openNorms(self):
-        for i in range(len(self.fieldInfos)):
-            fi = self.fieldInfos.fieldInfoInt(i)
-            if fi.isIndexed:
-                self.nrms[fi.name]=Norm(self.directory.openFile(
-                    (self.segment + '.f' + str(fi.number))))
-                
-
-    def termDocs(self):
-        return SegmentTermDocs(self)
-        
-
-
-    def termPositions(self):
-        return SegmentTermPositions(self)
-    
-
-    def terms(self, t = None):
-        return self.tis.terms(t)
-
-    def fieldNames(self):
-        # Experimental for auto-queries
-        # Return a sorted list of all the field names
-        fNames = self.fieldInfos.fieldNames()
-        if not fNames:
-            return []
-        # Remove the field with no name
-        fNames.remove('')
-        return fNames
-        
-
-
-class Norm(object):
-
-    def __init__(self, inStream):
-        self.inStream = inStream
-        self.bytes = None
-
-            
-class SegmentsReader(IndexReader):
-
-    def __init__(self, directory, r):
-        IndexReader.__init__(self, directory)
-        self.readers = r
-        self.maxiDoc = 0
-        self.normsCache = {}
-        self.numiDocs = -1
-        self.starts = [0]
-        
-        i = 0
-        for reader in self.readers:
-            self.maxiDoc += reader.maxDoc()
-            self.starts.append(self.maxiDoc)
-
-    def docFreq(self, t):
-        total = 0
-        for r in self.readers:
-            total += r.docFreq(t)
-        return total
-
-
-    def doClose(self):
-        for r in self.readers:
-            r.close()
-
-
-    def document(self, n):
-        # find segment num
-        i = self.readerIndex(n)
-        # dispatch to segment reader
-        return self.readers[i].document(n - self.starts[i])    
-
-
-    def doDelete(self, n):
-        # invalidate cache
-        self.numiDocs = -1
-        # find seg num
-        i = self.readerIndex(n)
-        # dispatch to seg reader
-        self.readers[i].doDelete(n - self.starts[i])
-
-
-    def isDeleted(self, n):
-        # find segment num
-        i = self.readerIndex(n)
-        # dispatch to segment reader
-        return self.readers[i].isDeleted(n - self.starts[i])
-
-
-    def maxDoc(self):
-        return self.maxiDoc
-
-
-    def normsField(self, field):
-        bytes = self.normsCache.get(field, None)
-        if bytes is not None:
-            # cache hit
-            return bytes
-
-        bytes = array('B',[0x00] * self.maxDoc())
-        for i in range(len(self.readers)):
-            self.readers[i].norms(field, bytes, self.starts[i])
-        # update cache
-        self.normsCache[field]=bytes
-        return bytes
-
-
-    #def numDocs(self):
-    #    # check cache
-    #    if numiDocs == -1:
-    #        # cache miss - recompute
-    #        n = 0
-    #        for r in self.readers:
-    #            # sum from readers
-    #            n += r.numDocs()
-    #        self.numiDocs = n
-    #    return self.numiDocs
-
-
-    def readerIndex(self, n):
-        # Search starts array for first element less than n
-        lo = 0
-        hi = len(self.readers) - 1
-
-        while hi >= lo:
-            mid = (lo + hi) >> 1
-            midValue = self.starts[mid]
-            if n < midValue:
-                hi = mid - 1
-            elif n > midValue:
-                lo = mid + 1
-            else:
-                return mid
-        return hi
-
-
-    def termDocs(self):
-        return SegmentsTermDocs(self.readers, self.starts)
-
-
-    def termPositions(self):
-        return SegmentsTermPositions(self.readers, self.starts)
-
-    def terms(self, t = None):
-        return SegmentsTermEnum(self, t)
-    
-    def fieldNames(self):
-        # Experimental for auto-queries
-        if self.readers:
-            return self.readers[0].fieldInfos.fieldNames()
-        else:
-            return []
-
-class SegmentsTermEnum(segment.SegmentTermEnum):
-
-    def __init__(self, segmentsreader, term=None):
-        self.enums = [sr.terms(term) for sr in segmentsreader.readers]
-        self.prev = None
-        min = self.enums[0]
-        for enum in self.enums:
-            if enum.term is not None and enum < min:
-                min = enum
-        self.term = min.term
-
-    def close(self):
-        for e in self.enums: e.close()
-
-    def next(self):
-        min = self.enums[0]
-        for enum in self.enums:
-            if enum.term is not None and enum<min:
-                min = enum
-        if min.term is None:
-            raise StopIteration
-        else:
-            self.prev = self.term
-            self.term = min.term
-            try:
-                min.next()
-            except StopIteration:
-                pass
-
-
-class SegmentsTermDocs(object):
-
-    def __init__(self, r, s):
-        self.readers = r
-        self.starts = s
-
-        self.base = 0
-        self.pointer = 0
-        self.current = None
-        self.term = None
-        
-        self.segTermDocs = [None] * len(r)
-
-
-    def close(self):
-        for segtdoc in self.segTermDocs:
-            if segtdoc is not None:
-                segtdoc.close()
-
-    def freq(self):
-        return self.current.frq
-    frq = property(freq) # what can i say? API in transition
-
-    def __iter__(self):
-        def x():
-            if self.current is not None:
-                for item in self.current:
-                    yield item
-            for ptr, reader in list(enumerate(self.readers))[self.pointer:]:
-                self.pointer = ptr
-                self.base = self.starts[self.pointer]
-                self.current = self.termDocsInt(self.pointer)
-                for item in self.current:
-                    yield (item[0]+self.base,) + item[1:]
-        return x()
-
-
-    def read(self):
-        dfs = []
-        while True:
-            while self.current is None:
-                if self.pointer < len(self.readers):
-                    # try next segment
-                    self.base = self.starts[self.pointer]
-                    self.current = self.termDocsInt(self.pointer)
-                    self.pointer += 1
-                else:
-                    return dfs
-            segmentDFs = self.current.read()
-            if segmentDFs:
-                b = self.base
-                for i, (d, f) in enumerate(segmentDFs):
-                    segmentDFs[i] = d + b, f
-                dfs.extend(segmentDFs)
-            else:
-                self.current = None
-
-
-    def seekTerm(self, term):
-        self.term = term
-        self.base = 0
-        self.pointer = 0
-        self.current = None
-
-    def termDocsInt(self, i):
-        if self.term is None:
-            return None
-        result = self.segTermDocs[i]
-        if result is None:
-            result = self.termDocsReader(self.readers[i])
-            self.segTermDocs[i] = result
-        result.seekTerm(self.term)
-        return result
-
-
-    def termDocsReader(self, reader):
-        return reader.termDocs()
-
-
-
-class SegmentsTermPositions(SegmentsTermDocs):
-        
-    def termDocsReader(self, reader):
-        return reader.termPositions()
-
-
-    #def nextPosition(self):
-    #    return self.current.nextPosition()
-
-class TermInfosReader(object):
-
-    def __init__(self, d, seg, fis):
-        self.directory = d
-        self.segment = seg
-        self.fieldInfos = fis
-        
-        self.indexTerms = None
-
-        self.enum = segment.SegmentTermEnum(
-            self.directory.openFile(self.segment + '.tis'),
-            self.fieldInfos,
-            False)
-        
-        self.sze = self.enum.size
-        self.readIndex()
-
-
-    def close(self):
-        if self.enum is not None:
-            self.enum.close()
-
-
-
-    def getInt(self, position):
-        if self.sze == 0:
-            return None
-
-        if (self.enum is not None and self.enum.term() is not None and
-            position > self.enum.position and
-            position < (self.enum.position + term.TermInfosWriter.INDEX_INTERVAL)):
-            # can avoid seek
-            return self.scanEnum(position)
-
-        # must seek
-        self.seekEnum(position/term.TermInfosWriter.INDEX_INTERVAL)
-        return self.scanEnum(position)
-
-
-    def getIndexOffset(self, term):
-        #TODO - use bisect module?
-  
-        lo = 0
-        hi = len(self.indexTerms) - 1
-        
-        while hi >= lo:
-            mid = (lo + hi) >> 1
-            delta = cmp(term, self.indexTerms[mid])
-            if delta < 0:
-                hi = mid - 1
-            elif delta > 0:
-                lo = mid + 1
-            else:
-                return mid
-                
-        return hi
-    
-
-    def getTerm(self, t):
-        if self.sze == 0:
-            return None
-
-        # Optimize sequential access: first try scanning
-        # cached enum w/o seeking
-
-        if (self.enum.term is not None and
-            ((self.enum.prev is not None and cmp(t,self.enum.prev) > 0) or
-             cmp(t,self.enum.term) >= 0)):
-            # term is at or past current
-            enumOffset = (self.enum.position/term.TermInfosWriter.INDEX_INTERVAL)+1
-
-            if (len(self.indexTerms) == enumOffset or
-                cmp(t, self.indexTerms[enumOffset]) < 0):
-                # but before end of block
-                # no need to seek
-                return self.scanEnum(t)
-
-        # random-access: must seek
-        self.seekEnum(self.getIndexOffset(t))
-        return self.scanEnum(t)
-                                          
-
-
-    def getPosition(self, term):
-        if size == 0:
-            return -1
-
-        indexOffset = self.getIndexOffest(term)
-        self.seekEnum(indexOffset)
-
-        while (term > self.enum.term()) and self.enum.advance():
-            pass
-
-        if term == self.enum.term():
-            return self.enum.position
-        else:
-            return -1
-
-
-    def readIndex(self):
-        indexEnum = segment.SegmentTermEnum(
-            self.directory.openFile(self.segment + '.tii'),
-            self.fieldInfos,
-            True)
-
-        try:
-            indexSize = indexEnum.size
-
-            self.indexTerms = []
-            self.indexInfos = []
-            self.indexPointers = []
-
-            for term, indexPointer in indexEnum:
-                self.indexTerms.append(indexEnum.term)
-                self.indexInfos.append(indexEnum.termInfo())
-                self.indexPointers.append(indexEnum.indexPointer)
-
-        finally:
-            indexEnum.close()
-
-
-    def scanEnum(self, position):
-        while(self.enum.position < position):
-            if not enum.next():
-                return None
-        return self.enum.term()
-
-
-    def scanEnum(self, term):
-        # Scans within block for matching term.
-        t = self.enum.term
-        while (cmp(term, t) > 0):
-            try:
-                #ugh ugh it is 7am make it stop
-                t = self.enum.next()[0]
-            except StopIteration:
-                break
-        if (self.enum.term is not None and cmp(term, self.enum.term) == 0):
-            return self.enum.termInfo()
-        else:
-            return None
-
-
-    def seekEnum(self, indexOffset):
-        self.enum.seek(self.indexPointers[indexOffset],
-                       (indexOffset * term.TermInfosWriter.INDEX_INTERVAL) - 1,
-                       self.indexTerms[indexOffset], self.indexInfos[indexOffset])
-
-    def terms(self, term = None):
-        if term is None:
-            # Returns an enumeration of all the Terms and TermInfos in the set
-            if (self.enum.position != -1):
-                # if not at start
-                # reset to start
-                self.seekEnum(0)
-        else:
-            self.getTerm(term)
-            
-        res = self.enum.clone()
-        return res
-    
--- a/MoinMoin/support/lupy/index/term.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,157 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-class Term(object):
-    
-    def __init__(self, fld, txt, intern=False):
-        self.set(fld, txt)
-
-    def __cmp__(self, other):
-        """Compares two terms, returning an integer which is less than zero iff this
-        term belongs after the argument, equal zero iff this term is equal to the
-        argument, and greater than zero iff this term belongs after the argument.
-
-        The ordering of terms is first by field, then by text."""
-
-        if self.fld == other.fld:
-            # fields are interned
-            return cmp(self.txt, other.txt)
-        else:
-            return cmp(self.fld, other.fld)
-
-    def __hash__(self):
-        return self._hash
-    
-    def field(self):
-        return self.fld
-    
-    def readObject(self, inp):
-        inp.defaultReadObject()
-
-    def set(self, fld, txt):
-        self.fld = fld
-        self.txt = txt
-        self._hash = hash(fld + txt)
-
-    def text(self):
-        return self.txt
-
-    def __repr__(self):
-        return 'Term<'+self.fld.encode('utf8')+':'+self.txt.encode('utf8')+'>'
-
-class TermInfo(object):
-
-    def __init__(self):
-        self.docFreq = 0
-        self.freqPointer = 0
-        self.proxPointer = 0
-
-    def set(self, df, fp, pp):
-        self.docFreq = df
-        self.freqPointer = fp
-        self.proxPointer = pp
-
-    def setTo(self, ti):
-        self.docFreq = ti.docFreq
-        self.freqPointer = ti.freqPointer
-        self.proxPointer = ti.proxPointer
-
-    def __repr__(self):
-        return '<TermInfo:d:' + str(self.docFreq)+ ' f:' + str(self.freqPointer) +\
-               ' p:' + str(self.proxPointer) + '>'
-
-
-class TermInfosWriter(object):
-    INDEX_INTERVAL = 128
-
-
-    def __init__(self, d, seg, fis, isIndex = False):
-        
-        self.initialize(d, seg, fis, isIndex)
-        
-        self.size = 0
-        self.lastIndexPointer = 0
-        self.lastTerm = Term('','')
-        self.lastTi = TermInfo()
-        
-        if isIndex is False:
-            self.other = TermInfosWriter(d, seg, fis, True)
-            self.other.other = self
-
-            
-    def initialize(self, d, seg, fis, isi):
-        self.fieldInfos = fis
-        self.isIndex = isi
-        if isi is True:
-            ext = '.tii'
-        else:
-            ext = '.tis'
-            
-        self.output=d.createFile(seg + ext)
-        # leave space for size
-        self.output.writeInt(0)
-
-
-    def stringDifference(self, s1, s2):
-        prefixLength = min(len(s1), len(s2))
-        for i in range(prefixLength):
-            if s1[i] != s2[i]:
-                return i
-        
-        return prefixLength
-
-
-    def add(self, term, ti):
-        if not self.isIndex and term <= self.lastTerm:
-            raise Exception, "term out of order: " + str(term) + str(self.lastTerm)
-        if ti.freqPointer < self.lastTi.freqPointer:
-            raise Exception, "freqPointer out of order"
-        if ti.proxPointer < self.lastTi.proxPointer:
-            raise Exception, "proxPointer out of order"
-
-        if (not self.isIndex and self.size % self.INDEX_INTERVAL == 0):
-            # add an index term
-            self.other.add(self.lastTerm, self.lastTi)
-
-        # write term
-        self.writeTerm(term)
-        # write doc freq
-        self.output.writeVInt(ti.docFreq)
-        # write pointers
-        self.output.writeVLong(ti.freqPointer - self.lastTi.freqPointer)
-        self.output.writeVLong(ti.proxPointer - self.lastTi.proxPointer)
-
-        if self.isIndex:
-            self.output.writeVLong(self.other.output.getFilePointer() - self.lastIndexPointer)
-            self.lastIndexPointer = self.other.output.getFilePointer()
-
-        self.lastTi.setTo(ti)
-        self.size += 1
-
-
-    def close(self):
-        self.output.seek(0)
-        self.output.writeInt(self.size)
-        self.output.close()
-
-        if self.isIndex is not True:
-            self.other.close()
-
-
-    def writeTerm(self, term):
-        a, b = self.lastTerm.text(), term.text()
-        start = self.stringDifference(a, b)
-        delta = term.text()[start:]
-        # write shared prefix length
-        self.output.writeVInt(start)
-        # write delta chars
-        self.output.writeString(delta)
-        # write field num
-        i = self.fieldInfos.fieldNumber(term.field())
-        self.output.writeVInt(i)
-        self.lastTerm = term
-
-
-
--- a/MoinMoin/support/lupy/index/terminfo.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-class TermInfo(object):
-
-    def __init__(self):
-        self.docFreq = 0
-        self.freqPointer = 0
-        self.proxPointer = 0
-
-    def set(self, df, fp, pp):
-        self.docFreq = df
-        self.freqPointer = fp
-        self.proxPointer = pp
-
-    def setTo(self, ti):
-        self.docFreq = ti.docFreq
-        self.freqPointer = ti.freqPointer
-        self.proxPointer = ti.proxPointer
-
-    def __repr__(self):
-        return '<TermInfo:d:' + str(self.docFreq)+ ' f:' + str(self.freqPointer) +\
-               ' p:' + str(self.proxPointer) + '>'
-
-    
--- a/MoinMoin/support/lupy/indexer.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,262 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-"""A simple interface to indexing and searching.
-"""
-import os, sys, re
-
-from MoinMoin.support.lupy.index.indexwriter import IndexWriter
-from MoinMoin.support.lupy.index.documentwriter import standardTokenizer
-from MoinMoin.support.lupy.index.term import Term
-
-from MoinMoin.support.lupy import document
-
-from MoinMoin.support.lupy.search import indexsearcher
-from MoinMoin.support.lupy.search.term import TermQuery
-from MoinMoin.support.lupy.search.phrase import PhraseQuery
-from MoinMoin.support.lupy.search.boolean import BooleanQuery
-
-
-class Index:
-
-    def __init__(self, name, create=False, analyzer=None):
-        """
-        @param name: Name of the directory for this index.
-        @param create: Whether to create this directory or not.
-        @type create: boolean
-        """
-        
-        self.name = name
-        self.analyzer = analyzer or standardTokenizer
-        # Create the index if we need to. From here on we assume
-        # that the index exists
-        self.indexer = IndexWriter(self.name, create, analyzer)
-        # Remember the default merge factor
-        self.mergeFactor = self.indexer.mergeFactor
-        # Clean up
-        self.indexer.close()
-        self.indexer = self.searcher = None
-        
-    def index(self, **kw):
-        """Add a document to the index.
-        
-        **kw contains the name and values of each Field in the
-        Document that we are creating.
-
-        If the key in **kw starts with '_' the field will be created
-        as a Keyword. If it starts with '__', it is created as a
-        stored Text field (e.g. tokenized and stored), otherwise it
-        will be created as a Text field. The leading '_' are removed
-        before field creation.
-
-        Text fields will have their value tokenized before
-        indexing. The value is not stored in the index.  This is the
-        usual type of field that you need for plain text.
-
-        Keyword fields will not have their value tokenized.  The value
-        is stored in the index and is returned with search hits on the
-        Document. If you wanted to store the path to a document along
-        with each document, you would use a Keyword field. The path
-        would not be tokenized and its value would be returned in the
-        query results, so you could easily open and display the file.
-        """
-        self._setupIndexer()
-        
-        # create document
-        d = document.Document()
-
-        # TODO - Please find another way of defining fields
-        # than magic field names!!!
-
-        # add a file field containing the path to this file
-        for key, value in kw.items():
-            if key[:2] == '__':
-                key = key[2:]
-                # Tokenized and stored
-                f = document.Text(key, value, True)
-            elif key[0] == '_':
-                # Not tokenized and stored
-                key = key[1:]
-                # keyword
-                f = document.Keyword(key, value)
-            else:
-                # Tokenized and not stored
-                f = document.Text(key, value, False)
-            d.add(f)
-        self.indexer.addDocument(d)
-
-    def _setupIndexer(self):
-        if self.searcher is not None:
-            self.searcher.close()
-            self.searcher = None
-        if self.indexer is None:
-            self.indexer = IndexWriter(self.name, False, self.analyzer)
-            self.indexer.mergeFactor = self.mergeFactor
-
-    def _setupSearcher(self):
-        if self.indexer is not None:
-            self.indexer.close()
-            self.indexer = None
-        if self.searcher is None:
-            self.searcher = indexsearcher.IndexSearcher(self.name)
-
-    def delete(self, **kw):
-        "Delete the first document containing the specified term. See also L{deleteAll}."
-        # Not very efficient for bulk deletes
-        # Use deleteAll for bulk deletes
-        self._setupSearcher()
-        if len(kw) != 1:
-            raise RuntimeError, 'one and only one field for the moment'
-        field, value = kw.items()[0]
-        t = Term(field, value)
-        self.searcher.reader.deleteTerm(t)
-        
-    def deleteAll(self, **kw):
-        "Remove all documents containing this field and value."
-        self.close()
-        reader = indexsearcher.open(self.name)
-        if len(kw) != 1:
-            raise RuntimeError, 'one and only one field for the moment'
-        field, values = kw.items()[0]
-        for value in values:
-            t = Term(field, value)
-            reader.deleteTerm(t)
-        # commit the deletes
-        reader.close()
-
-    def close(self):
-        # Indexer and Searchers are different
-        # and we have to open the right kind
-        # for the operation we are performing.
-        # The actual creation is done in the index and find
-        # methods. Here we close whatever is open.
-        if self.searcher is not None:
-            self.searcher.close()
-            self.searcher = None
-        if self.indexer is not None:
-            self.indexer.close()
-            self.indexer = None
-
-    def flush(self):
-       """Flush outstanding indexes to disk.
-
-       This makes sure we are searching the latest stuff.
-       """
-       if self.indexer is not None:
-           self.indexer.flushRamSegments()
-
-    def optimize(self):
-        """Merge all on-disk segments into a single segment. Saves space and can speed up queries."""
-        self._setupIndexer()
-        self.indexer.optimize()
-
-    def parse(self, field, qString):
-        if qString.startswith('"'):
-            qString = qString.strip('"')
-            #qWords = qString.strip('"').split()
-            qWords = self._tokenize(qString)
-            return self.phraseSearch(field, qWords)
-        else:
-            qWords = self._tokenize(qString)
-            if len(qWords) == 1:
-                return self.termSearch(field, qWords[0])
-            else:
-                return self.boolSearch(field, qWords)
-
-    def _tokenize(self, qString):
-        return list(self.analyzer(qString))
-
-    def find(self, qStr):
-        """Perform a search in any field in this index.
-
-        If the search string is enclosed in double quotes, a phrase
-        search will be run; otherwise, the search will be for
-        documents containing all words specified."""
-        
-        self._setupSearcher()
-            
-        fields = self.searcher.fieldNames()
-        if not fields:
-            return []
-        all = [self.parse(field, qStr) for field in fields]
-        if len(all) is 1:
-            # simple case
-            return self.searcher.search(all[0])
-        
-        q = BooleanQuery()
-        for query in all:
-            # OR all of the field queries
-            q.add(query, False, False)
-        hits = self.searcher.search(q)
-        return hits
-
-    def findInField(self, **kw):
-        """Search only in a single field."""
-        # eg index.findInField(text='flute')
-        if len(kw) != 1:
-            raise RuntimeError, 'one and only one field for the moment'
-        self._setupSearcher()
-        field, query = kw.items()[0]
-        q = self.parse(field, query)
-        hits = self.searcher.search(q)
-        return hits
-    
-    def termSearch(self, field, term):
-        "Search for a single C{term} in a C{field}."
-        t = Term(field, term)
-        q = TermQuery(t)
-        return q
-
-    def phraseSearch(self, field, words):
-        "Search for a phrase (given as a list of words) in C{field}."
-        q = PhraseQuery()
-        for word in words:
-            t = Term(field, word)
-            q.add(t)  
-        return q
-            
-    def boolSearch(self, field, ands=[], ors=[], nots=[]):
-        """Build a simple boolean query.
-
-        Each word in C{ands} is equiv to +word
-        Each word in C{ors} is equiv to word
-        Each word in C{nots} is equiv to -word
-
-        E.g. C{boolSearch(['spam'], ['eggs'], ['parrot', 'cheese'])} is
-        equiv to C{+spam eggs -parrot -cheese} in Google/Lucene syntax.
-        """
-        q = BooleanQuery()
-
-        for a in ands:
-            t = Term(field, a)
-            tq = TermQuery(t)
-            q.add(tq, True, False)
-            
-        for a in ors:
-            t = Term(field, a)
-            tq = TermQuery(t)
-            q.add(tq, False, False)
-            
-        for a in nots:
-            t = Term(field, a)
-            tq = TermQuery(t)
-            q.add(tq, False, True)
-        
-        return q
-            
-    def printHits(self, hits):
-        if len(hits) == 0:
-            print 'Nothing found!'
-        else:
-            for i in range(len(hits)):
-                print hits.doc(i), hits.score(i)
-
-    def setMergeFactor(self, anInt):
-        "Set how many documents will be processed before the indexes will be merged. Never less than 2."
-        # Never less than 2
-        if anInt >= 2:
-            self.mergeFactor = anInt
-        
-          
--- a/MoinMoin/support/lupy/search/__init__.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-"""indexing classes"""
--- a/MoinMoin/support/lupy/search/boolean.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,211 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-import itertools
-import similarity
-import traceback
-
-class BooleanQuery:
-    """A Query that matches documents matching boolean combinations of
-    other queries, typically L{lupy.search.term.TermQuery}s or L{lupy.search.phrase.PhraseQuery}s."""
-    
-
-    def __init__(self):
-        """Constructs an empty boolean query."""
-        
-        self.clauses = []
-        self.boost = 1.0
-
-    def addClause(self, clause):
-        """Adds a BooleanClause to this query."""
-        self.clauses.append(clause)
-
-
-    def add(self, query, required, prohibited):
-        """Adds a clause to a boolean query.  Clauses may be:
-        C{required} which means that documents which I{do not}
-        match this sub-query will I{not} match the boolean query;
-        C{prohibited} which means that documents which I{do}
-        match this sub-query will I{not} match the boolean query; or
-        neither, in which case matched documents are neither prohibited from
-        nor required to match the sub-query.
-        
-        It is an error to specify a clause as both C{required} and
-        C{prohibited}."""
-        
-        self.clauses.append(BooleanClause(query,
-                                          required,
-                                          prohibited))
-        
-
-    def normalize(self, norm):
-        for c in self.clauses:
-            if not c.prohibited:
-                c.query.normalize(norm)
-
-    def scorer(self, reader):
-        # optimize zero-term case
-        if len(self.clauses) == 1:
-            # just return term scorer
-            c = self.clauses[0]
-            if not c.prohibited:
-                return c.query.scorer(reader)
-
-        result = BooleanScorer()
-
-        for c in self.clauses:
-            subScorer = c.query.scorer(reader)
-            if subScorer is not None:
-                result.add(subScorer, c.required, c.prohibited)
-            elif c.required:
-                return None
-
-        return result
-            
-
-    def sumOfSquaredWeights(self, searcher):
-        sum = 0.0
-        
-        for c in self.clauses:
-            if not c.prohibited:
-                # sum sub-query weights
-                sum += c.query.sumOfSquaredWeights(searcher)
-            else:
-                # allow complex queries to initialize themself
-                c.query.sumOfSquaredWeights(searcher)
-        return sum
-
-
-    def toString(self, field):
-        """Prints a user-readable version of this query"""
-
-        buffer = ''
-
-        for c in self.clauses:
-            if c.prohibited:
-                buffer += '-'
-            elif c.required:
-                buffer += '+'
-
-            subQuery = c.query
-            if isinstance(subQuery, BooleanQuery):
-                # wrap sub-bools in parens
-                buffer += '('
-                buffer += c.query.toString(field)
-                buffer += ')'
-            else:
-                buffer += c.query.toString(field)
-            
-        return buffer
- 
-class BooleanClause(object):
-    """A clause in a BooleanQuery"""
-
-    def __init__(self, q, r, p):
-        self.query = q
-        self.required = r
-        self.prohibited = p
-    
-class BooleanScorer:
-    
-    def __init__(self):
-        self.coordFactors = None
-        self.maxCoord = 1
-        self.nextMask = 1
-        self.prohibitedMask = 0
-        self.requiredMask = 0
-        self.scorers = []        
-        self.currentDoc = 0
-        self.validList = []
-        self.table = {}
-        
-    def add(self, scorer, required, prohibited):
-        mask = 0
-        if required or prohibited:
-            if self.nextMask == 0:
-                raise Exception, 'More than 32 required/prohibited clauses in a query.'
-            mask = self.nextMask
-            self.nextMask = self.nextMask << 1
-        else:
-            '???'
-            mask = 0
-            
-        if not prohibited:
-            self.maxCoord += 1
-            
-        if prohibited:
-            # Update prohibited mask
-            self.prohibitedMask |= mask
-        elif required:
-            # Update required mask
-            self.requiredMask |= mask
-            
-        self.scorers.append(SubScorer(scorer, required, prohibited, mask))
-        
-        
-    def computeCoordFactors(self):
-        self.coordFactors = []
-        for i in range(self.maxCoord):
-            self.coordFactors.append(similarity.coord(i, self.maxCoord))
-            
-
-    def collect(self, doc, score, mask):
-        bucket = self.table.get(doc, None)
-        if bucket is None:
-            #doc, score, bits, coord
-            bucket = [-1, 0, 0, 0]
-            self.table[doc] = bucket            
-        if bucket[0] != doc:
-            # invalid doc
-            # initialize fields
-            bucket[:] = [doc, score, mask, 1]            
-            self.validList.append(bucket)
-        else:
-            # valid bucket
-            # increment score
-            bucket[1] += score
-            # add bits in mask
-            bucket[2] |= mask
-            # increment coord
-            bucket[3] += 1 # XXX
-            #print doc, score, mask, bucket
-
-            
-    def score(self, maxDoc):
-        if self.coordFactors is None:
-            self.computeCoordFactors()
-        for t in self.scorers:
-            #print "SCORER %r" % t.scorer
-            for d,score in t.scorer.score(maxDoc):
-                #print "DOCUMENT %r %r" % (d, score)
-                self.collect(d,score,t.mask)
-        return self.collectHits()
-    
-    def collectHits(self):        
-        for bucket in self.validList:
-            doc, score, bits, coord = bucket
-            if (bits & self.prohibitedMask) == 0 and (bits & self.requiredMask) == self.requiredMask:
-                # if prohibited and required check out
-                # add to results
-                #print "CollectHits:", doc, score, self.coordFactors, coord
-                try:
-                    scorecf = score * self.coordFactors[coord]
-                except IndexError, err: # XXX ugly way to avoid it crashing 8(
-                    scorecf = 0.0
-                yield (doc, scorecf)
-        del self.validList[:]
-            
-                
-class SubScorer(object):
-    
-    def __init__(self, scorer, required, prohibited, mask):
-      self.scorer = scorer
-      self.required = required
-      self.prohibited = prohibited
-      self.mask = mask
-    
-    
-    
-    
--- a/MoinMoin/support/lupy/search/camelcase.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2005 Florian
-# Festi. This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-from term import TermQuery
-from boolean import BooleanQuery, BooleanScorer
-from phrase import PhraseQuery
-from MoinMoin.support.lupy.index.term import Term
-
-class CamelCaseQuery(TermQuery):
-    """
-    XXX write new comment
-    A Query that matches documents that contains words
-       the term starts with. This is usefull for CamelCase
-       words. You need to filter the results to make shure
-       the camel case words are really contained within the
-       document.
-    """
-    def sumOfSquaredWeights(self, searcher):
-        self.query = BooleanQuery()
-        self.reader = searcher.reader
-        self.splitToWords(self.term, self.reader, [])
-        return self.query.sumOfSquaredWeights(searcher)
-
-    def scorer(self, reader):
-        return self.query.scorer(reader)
-    
-    def _add_phrase(self, terms):
-        phrase = PhraseQuery()
-        for term in terms:
-            phrase.add(term)
-        self.query.add(phrase, False, False)
-        
-    def splitToWords(self, term, reader, terms):
-        text = term.text()
-        field = term.field()
-        for l in xrange(2, len(text)+1):
-            prefix = text[:l]
-            ts = reader.terms(Term(field, prefix))
-            if ((ts.term.text()==prefix and
-                ts.term.field()==field)):
-                t = terms[:]
-                t.append(ts.term)
-                self.splitToWords(Term(field, text[l:]), reader, t)
-        else:
-            ts = reader.terms(term)
-
-        # check for end words
-        if len(text):
-            return
-            max_length = len(text) + 3
-            while ts.term.text().startswith(text):
-                if (len(ts.term.text()) < max_length and
-                    ts.term.field()==field):
-                    self._add_phrase(terms+[ts.term])
-                try:
-                    ts.next()
-                except StopIteration:
-                    break
-        else:
-            self._add_phrase(terms)
--- a/MoinMoin/support/lupy/search/fuzzy.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,92 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2005 Florian
-# Festi. This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-from term import TermQuery
-from boolean import BooleanScorer
-from MoinMoin.support.lupy.index.term import Term
-
-def min(*l):
-    m = l[0]
-    for v in l:
-        if v<m: m = v
-    return m
-
-class FuzzyQuery(TermQuery):
-    """Port of the Lucene FuzzyQuery
-    Still untested, use on your own risk...
-    """
-    WORD_SIZE = 50
-
-    def __init__(self, term, similarity, prefix_length):
-        TermQuery.__init__(self, term)
-        #self.term = term
-        self.prefix = term.text()[:prefix_length]
-        self.text = term.text()[len(self.prefix):]
-        self.min_similarity = similarity
-        self.d = []
-        for i in xrange(self.WORD_SIZE):
-            self.d.append([0]* self.WORD_SIZE) 
-
-    def scorer(self, reader):
-        prefix = self.prefix
-        lprefix = len(prefix)
-        field = self.term.field()
-        terms = []
-        
-        ts = reader.terms(Term(field, self.prefix))
-        scorer = BooleanScorer()
-
-        while True:
-            text = ts.term.text()
-            if not text.startswith(prefix):
-                break
-            sim = self.similarity(text[lprefix:])
-            if (ts.term.field()==field and
-                sim > self.min_similarity):
-                tq = TermQuery(ts.term)
-                tq.weight=1.0
-                scorer.add(tq.scorer(reader), False, False)
-                terms.append(ts.term)
-            try:
-                ts.next()
-            except StopIteration:
-                break
-            
-        if terms is None:
-            return None
-        
-        return scorer
-
-    def initialize_array(self, n, m):
-        d = self.d
-        if len(d)<n+1:
-            l = len(d[0])
-            for i in xrange(len(d), n+1):
-                d.append([0] * l)
-        if len(d[0])<m+1:
-            l = [0] * (m - len(d[0]) + 1)
-            for i in xrange(len(d)):
-                d[i].extend(l)
-
-        for i in xrange(n+1): d[i][0] = i
-        for i in xrange(m+1): d[0][i] = i
-        
-
-    def similarity(self, target):
-        n = len(self.text)
-        m = len(target)
-        d = self.d
-
-        self.initialize_array(n, m)
-
-        for i in xrange(n):
-            s_i = self.text[i]
-            for j in xrange(m):
-                if s_i != target[j]:
-                    d[i+1][j+1] = min(d[i][j+1], d[i+1][j], d[i][j]) + 1
-                else:
-                    d[i+1][j+1] = min(d[i][j+1]+1, d[i+1][j]+1, d[i][j])
-        return 1.0 - (d[n][m]/ float(len(self.prefix) + min(m,n)))
-
--- a/MoinMoin/support/lupy/search/hits.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,98 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-class Hits(object):
-    """A ranked list of documents, used to hold search results."""
-    def __init__(self, s, q, f):
-        """Initialize scoreDocs and totalHits.
-        """
-        self.query = q
-        self.searcher = s
-        self.filter = f
-        self.hitDocs = []
-        self._cache = []
-        self.maxDocs = 200
-        self.length = 0
-        # retrieve 100 initially
-        self.getMoreDocs(50)        
-        
-    def __len__(self):
-        return self.length
-
-    def __getitem__(self, indexOrSlice):
-        # NB - Does not handle hits[:-1]
-        # there has to be a better way than isinstance
-        if isinstance(indexOrSlice, int):
-            return self.doc(indexOrSlice)
-        else:
-            slyce = indexOrSlice
-            start = slyce.start or 0
-            stop = min(slyce.stop or len(self), len(self))
-            step = slyce.step or 1
-            return [self[i] for i in range(start, stop, step)] 
-            
-    def doc(self, n):
-        if n > len(self.hitDocs):
-            self.getMoreDocs(n)
-        elif n >= self.length:
-            raise IndexError, 'Not a valid hit number ' + str(n)
-        hitDoc = self.hitDocs[n]
-        
-        # update LRU cache of documents
-        # remove from list, if there
-        if hitDoc in self._cache:
-            self._cache.remove(hitDoc)
-        # add to front of list
-        self._cache.insert(0,hitDoc)
-
-        if len(self._cache) > self.maxDocs:
-            oldLast = self._cache[-1]
-            del self._cache[-1]
-            # let doc get gc'd
-            oldLast['doc'] = None
-
-        if hitDoc['doc'] is None:
-            # cache miss: read document
-            hitDoc['doc'] = self.searcher.doc(hitDoc['id'])
-
-        return hitDoc['doc']
-
-    def getMoreDocs(self, minDoc):
-        """Tries to add new documents to hitDocs.
-        Ensures that the hit numbered C{minDoc} has been retrieved.
-        """
-        minDoc = max(len(self.hitDocs), minDoc)
-        
-        # double number retrieved
-        n = minDoc * 2
-        
-        topDocs = self.searcher.search(self.query, self.filter, n)
-        scoreDocs = topDocs.scoreDocs
-        self.length = topDocs.totalHits
-        
-        scoreNorm = 1.0
-        if self.length > 0 and scoreDocs[0].score > 1.0:
-            scoreNorm  = 1.0 / scoreDocs[0].score
-            
-        if len(scoreDocs) < self.length:
-            end = len(scoreDocs)
-        else:
-            end = self.length
-            
-        for i in range(len(self.hitDocs),end):
-            self.hitDocs.append({'score': scoreDocs[i].score * scoreNorm, 'id': scoreDocs[i].doc, 'doc': None})
-                
-    def score(self, n):
-        """ Returns the score for the C{n}th document in the set.
-        """
-        return self.hitDocs[n]['score']
-
-    def __repr__(self):
-        s=  '<' + str(len(self)) + ' Hit'
-        if len(self) == 1:
-            s += '>'
-        else:
-            s += 's>'
-        return s
--- a/MoinMoin/support/lupy/search/indexsearcher.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,155 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-import math, itertools
-
-import similarity, hits
-
-from bisect import insort
-from MoinMoin.support.lupy.index import segment, segmentmerger
-from MoinMoin.support.lupy import store
-
-def openDir(directory):
-    infos = segment.SegmentInfos()
-    infos.read(directory)
-    if len(infos) == 1:       # index is optimized
-        return segmentmerger.SegmentReader(infos[0], True)
-    elif len(infos) == 0:
-        readers = []
-    else:
-        readers = [segmentmerger.SegmentReader(info,False) for info in infos[:-1]]
-        readers.append(segmentmerger.SegmentReader(infos[-1],True))
-    return segmentmerger.SegmentsReader(directory, readers)
-
-def open(path):
-    """Returns an IndexReader reading the index in an FSDirectory in
-    the named path."""
-
-    return openDir(store.getDirectory(path, False))
-
-
-class IndexSearcher:
-
-    """The base class for search implementations.
-    Implements search over a single index.
-    
-    Subclasses may implement search over multiple indices, and over
-    indices on remote servers."""
-    
-    def __init__(self, dirOrPath):
-        """Creates a searcher searching the provided index.
-        """
-        if isinstance(dirOrPath, basestring):
-            self.reader = open(dirOrPath)
-        else:
-            self.reader = openDir(dirOrPath)
-        
-    def close(self):
-        """Frees resources associated with this Searcher."""
-        self.reader.close()
-
-    def docFreq(self, term):
-        return self.reader.docFreq(term)
-
-    def maxDoc(self):
-        return self.reader.maxDoc()
-
-    def doc(self, i):
-        """For use by L{lupy.search.hits.Hits}."""
-        return self.reader.document(i)
-
-    def searchAll(self, query, filter):
-        """Lower-level search API.
-
-        Returns a generator that yields all non-zero scoring documents
-        for this query that pass the filter.
-
-        Applications should only use this if they need I{all} of the
-        matching documents.  The high-level search API
-        (L{search(Query)}) is usually more efficient, as it skips
-        non-high-scoring hits.
-
-         - C{query} to match documents
-         - C{filter} if non-null, a bitset used to eliminate some documents
-        """
-        scorer = getScorer(query, self, self.reader)
-        if filter is not None:
-            bits = filter.bits(reader)
-
-        if scorer is None:
-            return
-        
-        return itertools.imap(lambda doc, score: doc,
-                              itertools.ifilter(lambda doc, score: score > 0 and (bits is None or bits.get(doc)),
-                                                scorer.score(self.reader.maxDoc())))
-            
-    def search(self, query, filter=None, nDocs=None):
-        
-        """Search this index for documents matching C{query} and
-        (optionally) passing the C{filter} bitvector. If C{nDocs} is
-        specified then only the top C{nDocs} hits will be returned."""
-        
-        if nDocs is None:
-            return hits.Hits(self, query, filter)
-        
-        scorer = getScorer(query, self, self.reader)
-        if scorer is None:
-            return TopDocs(0, [])
-
-        if filter is not None:
-            bits = filter.bits(reader)
-        else:
-            bits = None
-
-        scoreDocs = []
-        totalHits = [0]
-        minScore = 0.0
-
-        for doc, scr in scorer.score(self.reader.maxDoc()):        
-            if scr > 0.0 and (bits is None or bits.get(doc)):
-                # ignore zeroed buckets and docs not in bits
-                totalHits[0] += 1
-                if scr >= minScore:
-                    # update hit queue
-                    insort(scoreDocs, ScoreDoc(doc, scr))
-                    if len(scoreDocs) > nDocs:
-                        # if hit queue overfull
-                        # remove lowest in hit queue
-                        scoreDocs.pop()
-                        # reset minimum score
-                        minScore = scoreDocs[0].score
-                
-        return TopDocs(totalHits[0], scoreDocs)
-
-    def fieldNames(self):
-        # Experimental for auto queries
-        return self.reader.fieldNames()
-
-
-def getScorer(query, searcher, reader):
-    sum = query.sumOfSquaredWeights(searcher)
-    norm = 1.0/(math.sqrt(sum) or 1.0)
-    query.normalize(norm)
-    return query.scorer(reader)
-
-class ScoreDoc(object):
-  
-    def __init__(self, d, s):
-        self.doc = d
-        self.score = s
-
-    def __lt__(a, b):
-        if a.score == b.score:
-            return a.doc > b.doc
-        else:
-            return a.score < b.score
-
-
-class TopDocs(object):
-
-    def __init__(self, th, sds):
-        self.totalHits = th
-        self.scoreDocs = sds
-        
--- a/MoinMoin/support/lupy/search/phrase.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,232 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-
-from bisect import insort
-from MoinMoin.support.lupy.search import term, similarity
-import sys
-
-class PhraseQuery:
-    """A query that matches documents containing a particular
-    sequence of terms. This may be combined with other terms
-    with a L{lupy.search.boolean.BooleanQuery}.
-    """
-
-    def __init__(self):
-        """Constructs an empty phrase query."""
-        
-        self.idf = 0.0
-        self.slop = 0
-        self.terms = []
-        self.weight = 0.0
-        self.boost = 1.0
-
-    def add(self, term):
-        """Adds a term to the end of the query phrase."""
-        if len(self.terms) == 0:
-            self.field = term.field()
-
-        elif term.field() != self.field:
-            raise Exception, 'All phrase terms must be in the same field: ' + str(term)
-
-        self.terms.append(term)
-
-
-    def getSlop(self):
-        """Returns the slop.  See setSlop()."""
-        return self.slop
-
-
-    def normalize(self, norm):
-        # normalize for query
-        self.weight *= norm
-        # factor from document
-        self.weight *= self.idf
-
-
-    def scorer(self, reader):
-        # optimize zero-term case
-        if len(self.terms) == 0:
-            return None
-
-        # optimize one-term case
-        if len(self.terms) == 1:
-            t = self.terms[0]
-            docs = reader.termDocsTerm(t)
-            if docs is None:
-                return None
-            return term.TermScorer(docs, reader.normsField(t.field()), self.weight)
-
-        tps = [] 
-        
-        for t in self.terms:
-            p = reader.termPositionsTerm(t)
-            if p is None:
-                # I am not sure how this is ever reached?
-                return None
-            tps.append(p)
-
-        if self.slop == 0:
-            return ExactPhraseScorer(tps, reader.normsField(self.field),
-                                     self.weight)
-        else:
-            return SloppyPhraseScorer(tps, reader.norms(self.field),
-                                      self.weight)
-
-
-    def sumOfSquaredWeights(self, searcher):
-        # sum term IDFs
-        for term in self.terms:
-            self.idf += similarity.idfTerm(term, searcher)
-            
-        self.weight = self.idf * self.boost
-        # square term weights
-        return self.weight * self.weight
-
-
-    def toString(self, f):
-        """Prints a user-readable version of this query"""
-
-        buffer = ''
-        if not self.field == f :
-            buffer += f + ':'
-        buffer += '\\'
-
-        for term in self.terms[:-1]:
-            buffer += term.text() + ' '
-            
-        buffer += self.terms[-1].text() + '\\'
-
-        if self.slop != 0:
-            buffer += '~' + str(self.slop)
-
-        if self.boost != 1.0:
-            buffer += '^' + str(self.boost)
-
-        return buffer
-
-
-class PhraseScorer:
-    
-    def __init__(self, tps, n, w):
-        self.norms = n
-        self.weight = w
-        
-        self.pps = [PhrasePositions(tp, i) for i, tp in enumerate(tps)]
-        self.pps.sort()
-                        
-    def phraseQuery(self):
-        """Subclass responsibility"""
-
-    def score(self, end):
-        # find doc w/ all the terms
-        while self.pps[-1].doc < end:
-            while self.pps[0].doc < self.pps[-1].doc:
-                self.pps[0].advance()
-                while self.pps[0].doc < self.pps[-1].doc:
-                    self.pps[0].advance()
-                self.pps.append(self.pps.pop(0))
-                if self.pps[-1].doc >= end:
-                    return
-                
-            # found doc with all terms
-            # check for phrase
-            freq = self.phraseFreq()
-            
-            if freq > 0.0:
-                # compute score
-                score = similarity.tf(freq) * self.weight
-                # normalize
-                score *= similarity.normByte(self.norms[self.pps[0].doc])
-                # add to results
-                yield (self.pps[0].doc, score)
-            # resume scanning
-            self.pps[-1].advance()
-                
-                
-        
-
-class ExactPhraseScorer(PhraseScorer):
-    
-    def phraseFreq(self):
-        for pp in self.pps:
-            pp.firstPosition()
-        self.pps.sort()
-        freq = 0.0
-        
-        init = 0
-        # the 'init' bits are to simulate a do-while loop :-/
-        while init == 0 or self.pps[-1].nextPosition():
-            while self.pps[0].position < self.pps[-1].position:
-                # scan forward in first
-                init2 = 0
-                while init2 == 0 or self.pps[0].position < self.pps[-1].position:
-                    if not self.pps[0].nextPosition():
-                        return freq
-                    init2 = 1
-                    
-                self.pps.append(self.pps.pop(0))
-            # all equal: a match
-            freq += 1
-            init = 1
-            
-        return freq
-        
-
-class PhrasePositions(object):
-
-    def __init__(self, t, o):
-        self.tp = t
-        self.offset = o
-        
-        self.position = 0
-        self.count = 0
-        self.doc = 0
-        self.tpiter = iter(t)
-        self.advance()
-        
-        
-    def firstPosition(self):
-        self.count = self.tp.frq
-        self.nextPosition()
-        
-        
-    def advance(self):
-        """Increments to next doc"""
-        
-        for doc, frq, nextPos in self.tpiter:
-            self.doc = doc
-            self.frq = frq
-            self._nextPos = nextPos
-            self.position = 0
-            return
-        else:
-            # close stream
-            self.tp.close()
-            # sentinel value
-            self.doc = sys.maxint
-            return
-        
-        
-    def nextPosition(self):
-        if self.count > 0:
-            self.count -= 1
-            # read subsequent positions
-            self.position = self._nextPos.next() - self.offset
-            return True
-        else:
-            self.count -= 1
-            return False
-        
-                
-    def __repr__(self):
-        res = '<pp>d:' + str(self.doc) + ' p:' + str(self.position) + ' o:' + str(self.offset)
-        return res
-
-    def __lt__(this, that):
-        if this.doc == that.doc:
-            return this.position < that.position
-        else:
-            return this.doc < that.doc
--- a/MoinMoin/support/lupy/search/prefix.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,44 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2005 Florian
-# Festi. This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-from term import TermQuery
-from boolean import BooleanQuery
-from MoinMoin.support.lupy.index.term import Term
-
-class PrefixQuery(TermQuery):
-    """A Query that matches documents that contains the term and terms
-    that start with the term and have upto max_addon additional chars.
-    This allows to have better matching especially if no stemming is used"""
-    def __init__(self, term, max_addon=10000):
-        TermQuery.__init__(self, term)
-        self.term = term
-        self.max_length = len(term.text()) + max_addon
-        self.weight = 0.0
-        self.boost = 1.0
-                        
-    def sumOfSquaredWeights(self, searcher):
-        self.query = BooleanQuery()
-        reader = searcher.reader
-
-        text = self.term.text()
-        field = self.term.field()
-
-        ts = reader.terms(self.term)
-
-        while True:
-            if not ts.term.text().startswith(text):
-                break
-            if ((len(ts.term.text()) <= self.max_length) and
-                ts.term.field()==field):
-                self.query.add(TermQuery(ts.term), False, False)
-            try:
-                ts.next()
-            except StopIteration:
-                break
-
-        return  self.query.sumOfSquaredWeights(searcher)
-
-    def scorer(self, reader):
-        return self.query.scorer(reader)
--- a/MoinMoin/support/lupy/search/regularexpression.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2005 Florian
-# Festi. This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-import re
-
-def re_prefix(regex):
-    """
-    return a string that the beginning of regex that will always match
-    Assumes the regex is a valid regular expression!!!
-    """
-    match = re.search(r"[[({\\.$*+?|]", regex)
-    if not match: return regex
-
-    if regex.find("|") != -1:
-        # XXXX use string or RE to group non special chars
-        # States
-        plain = 0
-        escape = 1
-        charset = 2
-        charsetfirst = 3
-        charsetescape = 4
-            
-        state = plain
-        parenthesis = 0
-        for c in regex:
-            if state == plain:
-                if c == "\\": state = escape
-                elif c == "(": parenthesis += 1
-                elif c == ")": parenthesis -= 1
-                elif c == "[": state = charsetfirst
-                elif c == "|":
-                    if parenthesis == 0:
-                        # | on toplevel
-                        return ""
-            elif state == charset:
-                if c == "]": state = plain
-                elif c == "\\": state = charsetescape
-            elif state == charsetfirst:
-                if c == "\\": state = charsetescape
-                else: state = charset                
-            elif state == charsetescape: state = charset
-            elif state == escape:
-                state = plain
-
-    end = match.start()
-    if match.group() in "*{?": end -= 1 # RE element refere to last char
-    return regex[:end]
-
-from term import TermQuery
-from boolean import BooleanQuery
-from MoinMoin.support.lupy.index.term import Term
-
-class RegularExpressionQuery(TermQuery):
-    """Matches all documents that contain a word match the
-    regular expression (RE) handed over as text of the term.
-    This query is reasonably fast if the RE starts with normal chars.
-    If the RE starts with RE special chars the whole index is searched!
-    The RE is MATCHED against the terms in the documents!
-    """
-    def sumOfSquaredWeights(self, searcher):
-        self.query = BooleanQuery()
-        reader = searcher.reader
-
-        needle = self.term.text()
-        prefix = re_prefix(needle)
-        reg_ex = re.compile(needle, re.U)
-        field = self.term.field()
-
-        ts = reader.terms(Term(field, prefix))
-
-        while True:
-            if reg_ex.match(ts.term.text()) and ts.term.field()==field:
-                self.query.add(TermQuery(ts.term), False, False)
-            if not ts.term.text().startswith(prefix):
-                break
-            try:
-                ts.next()
-            except StopIteration:
-                break
-        return  self.query.sumOfSquaredWeights(searcher)
-
-    def scorer(self, reader):
-        return self.query.scorer(reader)
-            
--- a/MoinMoin/support/lupy/search/similarity.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-import math
-
-
-NORM_TABLE = map(lambda x: x/255.0, range(0,256))
-
-def coord(overlap, maxOverlap):
-    return overlap/float(maxOverlap)
-
-def idf(docFreq, numDocs):
-    return math.log((numDocs/(docFreq + 1.0)) or 1.0) + 1.0
-
-def idfTerm(term, searcher):
-    """Use maxDoc() instead of numDocs() because its proportional to docFreq(),
-    i.e., when one is inaccurate, so is the other, and in the same way."""
-
-    return idf(searcher.docFreq(term), searcher.maxDoc())
-
-def normByte(normByte):
-    """Un-scales from the byte encoding of a norm into a float, i.e.,
-    approximately 1/sqrt(numTerms)."""
-
-    return NORM_TABLE[normByte & 0xFF]
-
-def normInt(numTerms):
-    """Computes the normalization byte for a document given the total number of
-    terms contained in the document.  These values are stored in an index and
-    used by the search code
-
-    Scales 1/sqrt(numTerms) into a byte, i.e. 256/sqrt(numTerms).
-    Math.ceil is used to ensure that even very long documents don't get a
-    zero norm byte, as that is reserved for zero-lengthed documents and
-    deleted documents."""
-
-    if numTerms == 0:
-        return 0
-    return int((math.ceil(255.0 / math.sqrt(numTerms)))) & 0xFF
-
-
-def tf(freq):
-    return float(math.sqrt(freq))
-
-
--- a/MoinMoin/support/lupy/search/term.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-from itertools import islice
-import sys
-import similarity
-
-class TermQuery:
-    """A Query that matches documents containing a term.
-    This may be combined with other terms with a L{lupy.search.boolean.BooleanQuery}."""
-
-    def __init__(self, t):
-        """Constructs a query for the term B{t}."""
-        
-        self.term = t
-        self.idf = 0.0
-        self.weight = 0.0
-        self.boost = 1.0
-
-    def normalize(self, norm):
-        # normalize for query
-        self.weight *= norm
-        # factor from document
-        self.weight *= self.idf
-        
-        
-    def scorer(self, reader):
-        termDocs = reader.termDocsTerm(self.term)
-        if termDocs is None:
-            return None
-        
-        return TermScorer(termDocs,
-                          reader.normsField(self.term.field()),
-                          self.weight)
-    
-
-    def sumOfSquaredWeights(self, searcher):
-        self.idf = similarity.idfTerm(self.term, searcher)
-        self.weight = self.idf * self.boost
-        # square term weights
-        return self.weight * self.weight
-
-
-    def toString(self, field):
-        """Prints a user-readable version of this query"""
-
-        buffer = ''
-        if not self.term.field() == field:
-            buffer += self.term.field() + ':'
-
-        buffer += self.term.text()
-
-        if self.boost != 1.0:
-            buffer += '^' + str(self.boost)
-
-        return buffer
-    
-    
-
-class TermScorer:
-
-    """Scorer for L{TermQuery}s."""
-
-    SCORE_CACHE_SIZE = 32
-
-
-    def __init__(self, td, n, w):
-        self.termDocs = td
-        self.norms = n
-        self.weight = w
-        self.scoreCache = [similarity.tf(i) * self.weight for i in range(self.SCORE_CACHE_SIZE)]
-        #self.docs, self.freqs = zip(*list(islice(self.termDocs, 128)))
-        
-    def score(self, end):
-        
-        for d, f in self.termDocs.read():
-            if d >= end:
-                break
-            if f < self.SCORE_CACHE_SIZE:
-                score = self.scoreCache[f]
-            else:
-                # cache miss
-                score = similarity.tf(f) * self.weight
-
-            # normalize for field
-            score *= similarity.normByte(self.norms[d])
-            # collect score
-            yield (d, score)
-        else:
-            # close stream
-            self.termDocs.close()
-            # set to sentinel value
-            self.doc = sys.maxint
-
--- a/MoinMoin/support/lupy/store.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,308 +0,0 @@
-"""Interface to directories and files, both in memory and on disk"""
-from array import array
-import weakref
-import os, stat, struct
-
-from StringIO import StringIO
-
-
-DIRECTORIES = weakref.WeakValueDictionary()
-
-
-def getDirectory(path, gen):
-    dir = DIRECTORIES.get(path, None)
-
-    if dir is None:
-        dir = FSDirectory(path, gen)
-        DIRECTORIES[path]=dir
-    elif gen is True:
-        dir.create()
-
-    return dir
-
-class FSDirectory:
-
-    def __init__(self, path, gen):
-        self.directory = path
-        if gen is True:
-            self.create()
-        DIRECTORIES[path]=self
-
-    def fpath(self, fname):
-        return os.path.join(self.directory, fname)
-
-    def create(self):
-        path = self.directory
-        if not os.path.exists(path):
-            os.mkdir(path)
-        try:
-            files = os.listdir(path)
-        except IOError:
-            files = []
-            
-        for file in files:
-            os.remove(os.path.join(path,file))
-
-    def fileExists(self, name):
-        return os.path.exists(self.fpath(name))
-
-    def fileModified(self, name, path=None):
-        return os.stat(self.fpath(name))[stat.ST_MTIME]
-    
-    def fileLength(self, name):
-        return os.stat(self.fpath(name))[stat.ST_SIZE]
-    
-    def deleteFile(self, name):
-        os.remove(self.fpath(name))
-
-    def renameFile(self, frm, to):
-        if os.path.exists(self.fpath(to)):
-            os.remove(self.fpath(to))
-
-        os.rename(self.fpath(frm),self.fpath(to))
-
-    def createFile(self, name):
-        #print "creating " + name
-        f = FileStream(self.fpath(name), 'wb')
-        f._name = name
-        return f
-    def openFile(self, name):
-        #print "opening " + name
-        f = FileStream(self.fpath(name), 'rb')
-        f._name = name
-        return f
-    def close(self):
-        pass
-        #del(DIRECTORIES[self.directory])
-        # breaks if object is used several times
-        # and should not be needed as DIRECTORIES is a weakref dict
-
-    def __str__(self):
-        return 'FSDirectory:' + self.directory
-
-class RAMDirectory:
-    
-    def __init__(self):
-        self.files = {}
-
-    
-    def list(self):
-        return self.files.keys()
-
-
-    def fileExists(self, name):
-        return (self.files.get(name, None) is not None)
-
-
-    def fileModified(self, name):
-        file = self.files[name]
-        return file.lastModified
-    
-
-    def fileLength(self, name):
-        file=self.files[name]
-        return len(file)
-    
-
-    def deleteFile(self, name):
-        del(self.files[name])
-
-
-    def renameFile(self, name, newName):
-        file = self.files[name]
-        del(self.files[name])
-        self.files[newName]=file
-
-
-    def createFile(self, name):
-        #print "creating RAM file " + name
-        file = RAMStream()
-        file._name = name
-        self.files[name]=file
-        return file
-
-
-    def openFile(self, name):
-        x = self.files[name]
-        #print "opening RAM file " + name
-        x.seek(0)
-        return x
-    
-    def makeLock(self, name):
-        """TBC"""
-
-
-    def close(self):
-        """Do nothing"""
-
-class Stream(object):
-    
-    def writeByte(self, b):
-        self.write(chr(b))
-
-    def writeBytes(self, b, length):
-        b[:length].tofile(self._getfile())
-
-    def writeInt(self, i):        
-        self.write(struct.pack("!I",i))
-
-    def writeVInt(self, i):
-        while (i & ~0x7F) != 0:
-            self.writeByte((i & 0x7F) | 0x80)
-            i = i >> 7
-        self.writeByte(i)
-
-    writeVLong = writeVInt
-    
-    def writeLong(self, i):
-        self.writeInt((i >> 32) & 0xFFFFFFFF)
-        self.writeInt(i & 0xFFFFFFFF)
-        
-    def writeString(self, s):
-        length = len(s)
-        self.writeVInt(length)
-        #print "WRITING: %r" % s
-        self.write(s.encode("utf8"))
-                
-    def getFilePointer(self):
-        return self.tell()
-
-    def readByte(self):
-        return ord(self.read(1))
-
-    def readBytes(self, b, offset, len):
-        a = array('B')
-        a.fromfile(self._getfile(), len)
-        b[offset:offset+len] = a
-        
-    def readInt(self):
-        return struct.unpack("!I",self.read(4))[0]
-        
-
-    def readVInt(self):
-        b = self.readByte()
-        i = b & 0x7F
-
-        shift = 7
-        while b & 0x80 != 0:
-            b = self.readByte()
-            i |= (b & 0x7F) << shift
-            shift += 7
-        return i
-
-
-    def readLong(self):
-        return(self.readInt() << 32 | (self.readInt() & 0xFFFFFFFFL))
-    
-
-    def readVLong(self):
-        b = self.readByte()
-        i = b & 0x7F
-
-        shift = 7
-        while b & 0x80 != 0:
-            b = self.readByte()
-            i |= (b & 0x7FL) << shift
-            shift += 7
-
-        return i
-    
-
-    def readString(self):
-        length = self.readVInt()
-        return self.readChars(length)
-    
-    def readChars(self, length): 
-        buffer = []
-        for i in range(length): 
-            b = self.readByte() 
-            if (b & 0x80) == 0: 
-                buffer.append(unichr(b & 0x7F))
-            elif (b & 0xE0) != 0xE0: 
-                tmpInt = (((b & 0x1F) << 6)|(self.readByte() & 0x3F)) 
-                buffer.append(unichr(tmpInt))
-            else: 
-                buffer.append(unichr((((b & 0x0f) << 12) |  
-                             ((self.readByte() & 0x3F) << 6) | 
-                             (self.readByte() & 0x3F))))
-        x =  u''.join(buffer)
-        #print "READING: %r" % x
-        return x
-        
-class FileStream(Stream):
-
-    def __init__(self, name, mode='rb', clone=0):
-        if not clone:
-            self.f = file(name, mode)
-            self.length = os.stat(name).st_size
-            self.isClone = 0
-            self._position = 0
-        else:
-            self.f = name
-            self.isClone = 1
-
-    def close(self):
-        pass
-        #print "!!!@#! Closing " + self._name
-        if not self.isClone:
-            self.f.close()
-
-    def seek(self, pos):
-        self._position = pos
-        self.f.seek(pos)
-
-    def tell(self):
-        return self._position
-
-    def read(self, n):
-        p = self.f.tell()
-        if p != self._position:
-            #print "!!!position mismatch in %s (at %s, wants to be at %s)" % (self._name, p, self._position)
-            self.seek(self._position)
-        s = self.f.read(n)
-        self._position += len(s)
-        return s
-    
-
-    def write(self, v):
-        p = self.f.tell()
-        if p != self._position:
-            #print "!!!position mismatch in %s (at %s, wants to be at %s)" % (self._name, p, self._position)
-            self.seek(self._position)
-        self.f.write(v)
-        self._position += len(v)
-        
-
-    def clone(self):
-        g = FileStream(self.f, clone=1)
-        g._name = self._name + " <clone>"
-        g._position = self._position        
-        return g
-
-    def _getfile(self):
-        return self.f
-    
-    def __getattr__(self, attr):
-        return getattr(self.f, attr)
-
-class RAMStream(Stream, StringIO):
-    def __init__(self, *args):
-        StringIO.__init__(self, *args)
-        self.isClone = 0
-        
-    def close(self):
-        pass
-        
-    def _getfile(self):
-        return self
-
-    def get_size(self):
-        return len(self.getvalue())
-    length = property(get_size)
-
-    def clone(self):
-        r = RAMStream(self.getvalue())
-        r._name = self._name + " <clone>"
-        r.isClone = 1
-        r.seek(self.tell())
-        return r
--- a/MoinMoin/support/lupy/util.py	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-# This module is part of the Lupy project and is Copyright 2003 Amir
-# Bakhtiar (amir@divmod.org). This is free software; you can redistribute
-# it and/or modify it under the terms of version 2.1 of the GNU Lesser
-# General Public License as published by the Free Software Foundation.
-
-from array import array
-import os
-
-# Table of bits/byte
-BYTE_COUNTS = array('B',[
-    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-    4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8])
-
-class BitVector(object):
-
-    def __init__(self, dirOrInt, name = None):
-        # some low fi type dispatch
-        if name is None:
-            # create a new vector of dirOrInt length
-            self.len = dirOrInt
-            self.bits = array('B', ([0x00]*((self.len >> 3) + 1)))
-            self.bcount = -1
-        else:
-            # read a BitVector from a file
-            input = dirOrInt.openFile(name)
-            try:
-                self.len = input.readInt()      # read size
-                self.bcount = input.readInt()          # read count
-                self.bits = array('B', [0x00]*((self.len >> 3) + 1)) # allocate bits
-                input.readBytes(self.bits, 0, len(self.bits))
-            finally:
-                input.close()
-            
-
-    def init__(self, n):
-        self.len = n
-        self.bits = array('B', ([0x00]*((self.len >> 3) + 1)))
-
-
-    def clear(self, bit):
-        # Set value of bit to zero
-        self.bits[bit >> 3] &= ~(1 << (bit & 7))
-        self.bcount = -1
-
-
-    def count(self):
-        """Returns the total number of one bits in this vector.
-        This is efficiently computed and cached, so that, if the
-        vector is not changed, no recomputation is done for
-        repeated calls."""
-
-        if self.bcount == -1:
-            c = 0
-            for b in self.bits:
-                c += BYTE_COUNTS[b & 0xFF]    # sum bits per byte
-
-            self.bcount = c
-
-        return self.bcount
-
-
-    def get(self, bit):
-        # Returns True if bit is one and False if it is zero
-        return(self.bits[bit >> 3] & (1 << (bit & 7)) != 0)
-    
-
-    def set(self, bit):
-        # Sets the value of bit to one
-        self.bits[bit >> 3] |= 1 << (bit & 7)
-        self.bcount = -1
-
-        
-    def __len__(self):
-        return self.len
-
-
-    def write(self, d, name):
-        output = d.createFile(name)
-        try:
-            output.writeInt(len(self))    # write size
-            output.writeInt(self.count())   # write count
-            output.writeBytes(self.bits, len(self.bits))
-        finally:
-            output.close()
-            
-def sibpath(path, sibling):
-    """Return the path to a sibling of a file in the filesystem.
-
-    This is useful in conjunction with the special __file__ attribute
-    that Python provides for modules, so modules can load associated
-    resource files.
-    """
-    return os.path.join(os.path.dirname(os.path.abspath(path)), sibling)
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xapwrap/__init__.py	Sat Jun 10 16:52:04 2006 +0200
@@ -0,0 +1,2 @@
+""" xapwrap version 0.3.1 """
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xapwrap/document.py	Sat Jun 10 16:52:04 2006 +0200
@@ -0,0 +1,319 @@
+"""
+    xapwrap.document - Pythonic wrapper around Xapian's Document API
+"""
+import string
+import datetime
+import re
+import cPickle
+import xapian
+
+MAX_KEY_LEN = 240 # this comes from xapian's btree.h, Btree::max_key_len
+# NOTE: xapian's btree.h file says that its actually 252, but due to
+# xapian's implementation details, the actual limit is closer to 245
+# bytes. See http://thread.gmane.org/gmane.comp.search.xapian.cvs/329
+# for more info, especially the second message.
+
+# The limit described above only holds true assuming keys that do not
+# contain any NULL bytes. Since xapian internally escapes \0 bytes,
+# xapian sees the key length as (2*N + 2) where N is the number of
+# embedded NULL characters.
+
+INTER_FIELD_POSITION_GAP = 100
+
+UNICODE_ENCODING = "UTF-8" # XXX this should not be hardcoded on module level
+UNICODE_ERROR_POLICY = "replace"
+
+class StandardAnalyzer:
+    WORD_RE = re.compile('\\w{1,%i}' % MAX_KEY_LEN, re.U)
+
+    def tokenize(self, unknownText):
+        originalText = cleanInputText(unknownText, True)
+        # we want to perform lower() and the re search using a unicode
+        # object. if we try to perform those operations on regular
+        # string object that happens to represent unicode text encoded
+        # with UTF-8, we'll get garbage, or at least an
+        # OS/libc/$LC_CTYPE dependant result
+        text = originalText.lower()
+        for match in self.WORD_RE.finditer(text):
+            # we yield unicode ONLY
+            yield match.group()
+
+
+class TextField(object):
+    __slots__ = ('name', 'text', 'prefix')
+
+    def __init__(self, name, text = '', prefix = False):
+        if name and not text:
+            assert not prefix  # it makes no sense to use a prefixed
+                               # field without a name
+            self.text = name
+            self.name = ''
+        else:
+            self.name = name
+            self.text = text
+        self.prefix = prefix
+
+    def __len__(self):
+        return len(self.text)
+
+class SortKey(object):
+    __slots__ = ('name', 'value', 'index', 'flattener')
+
+    def __init__(self, name, value, index = None, flattener = None):
+        self.name = name
+        self.value = value
+        self.index = index
+        assert (name is None) ^ (index is None)
+        self.flattener = flattener
+
+class Value(SortKey):
+    pass
+
+class Term(object):
+    __slots__ = ('value')
+
+    def __init__(self, value):
+        self.value = value
+
+    def __len__(self):
+        return len(self.value)
+
+class Keyword(object):
+    __slots__ = ('name', 'value')
+
+    def __init__(self, name, value):
+        self.name = name
+        self.value = value
+
+    def __len__(self):
+        return len(self.value)
+
+
+class Document:
+    """
+    @ivar keywords: sequence of Keyword objects
+    @ivar sortFields: sequence of SortKey objects
+    @ivar textFields: sequence of TextField objects
+
+    @cvar analyzerFactory: factory object for constructing analyzers
+    @cvar _picklerProtocol: protocol used in pickling data attributes
+    @cvar _noObject: dummy object used to indicate that there is no
+    data attribute
+    @cvar source: this is an optional argument to point at the
+    original text/object that this document represents
+    """
+    _noObject = object()
+    _picklerProtocol = -1
+    analyzerFactory = StandardAnalyzer
+
+    # XXX TODO: add a fromXapianDoc classmethod that can be used by
+    # indices when returning documents from the db
+
+    def __init__(self, textFields = (), sortFields = (), keywords = (),
+                 terms = (), values = (), uid = None, data = _noObject, source = None):
+        """
+        sortFields and values are really the same thing as far as
+        xapian is concerned. We differentiate them in the hope of
+        making the API easier to understand.
+        """
+        for fields in ('textFields', 'sortFields', 'keywords', 'terms', 'values'):
+            arg = vars()[fields]
+            if not isinstance(arg, (list, tuple)):
+                arg = (arg,)
+            setattr(self, fields, list(arg))
+            # copy the list so we can modify without affecting the original
+        self.uid = uid
+        self.data = data
+        self.source = source
+        # sortFields and values are really the same thing as far as xapian is concerned
+        self.sortFields += self.values
+
+    def __len__(self):
+        length = 0
+        for fieldList in (self.textFields, self.keywords):
+            length += sum(map(len, fieldList))
+
+        if self.data != self._noObject:
+            length += len(cPickle.dumps(self.data, self._picklerProtocol))
+
+        return length
+
+    def toXapianDocument(self, indexValueMap, prefixMap=None):
+        d = xapian.Document()
+        position = 1
+        analyzer = self.analyzerFactory()
+
+        # add text fields
+        for field in self.textFields:
+            for token in analyzer.tokenize(field.text):
+                # the xapian swig bindings don't like unicode objects, so we
+                # decode terms to UTF-8 before indexing. this is fine as
+                # long as all data that goes into the db (whether for
+                # indexing or search) is converted to UTF-8 string and all
+                # data coming from the db (.get_value(), .get_data()) is
+                # decoded as UTF-8.
+                token = token.encode(UNICODE_ENCODING, UNICODE_ERROR_POLICY)
+                # the tokenizer cannot guarantee that token length is
+                # below MAX_KEY_LEN since the regexp is done with
+                # unicode and the result is later converted to UTF-8. In
+                # the process, the string length could expand, so we
+                # need to check here as well.
+                d.add_posting(checkKeyLen(token), position)
+                position += 1
+            position += INTER_FIELD_POSITION_GAP
+
+            if field.prefix:
+                prefix = field.name
+                for token in analyzer.tokenize(field.text):
+                    # token is unicode, but gets converted to UTF-8
+                    # by makePairForWrite:
+                    term = makePairForWrite(prefix, token, prefixMap)
+                    d.add_posting(term, position)
+                    position += 1
+                position += INTER_FIELD_POSITION_GAP
+
+        # add keyword fields
+        for field in self.keywords:
+            term = makePairForWrite(field.name, field.value, prefixMap)
+            d.add_term(term)
+
+        # add non positional terms
+        for term in self.terms:
+            d.add_term(term.value)
+
+        # add sort keys
+        for field in self.sortFields:
+            self.addSortField(d, field, indexValueMap)
+
+        # serialize and add the data object if present
+        if self.data is not self._noObject:
+            dataStr = cPickle.dumps(self.data, self._picklerProtocol)
+            d.set_data(dataStr)
+
+        return d
+
+    def addSortField(self, doc, field, indexValueMap):
+        if field.index is None:
+            valueIndex = indexValueMap.get(field.name, None)
+            if valueIndex is None:
+                from index import NoIndexValueFound
+                raise NoIndexValueFound(field.name, indexValueMap)
+        else:
+            valueIndex = field.index
+        assert isinstance(valueIndex, int)
+
+        if field.flattener:
+            flatValue = field.flattener(field.value)
+        else:
+            flatValue = self.flatten(field.value)
+        # xapian has no limit on value length
+        cleanValue = cleanInputText(flatValue)
+        doc.add_value(valueIndex, cleanValue)
+
+    _flatteners = {}
+
+    def flatten(self, value):
+        t = type(value)
+        if t == str:
+            return value
+        elif t in self._flatteners:
+            flattener = self._flatteners[t]
+            flatVal = flattener(value)
+            return flatVal
+        else:
+            raise ValueError("Cannot flatten %r into a string. Perhaps you "
+                             "should register a flattener for type %r."
+                             % (value, type(value)))
+
+    def registerFlattener(klass, typeToFlatten, flattener):
+        if typeToFlatten in klass._flatteners:
+            raise ValueError("A sort field flattener for type %s has already"
+                             "been registered (%s) but you are attempting to"
+                             "register a new flattener: %s"
+                             % (typeToFlatten, klass._flatteners[typeToFlatten],
+                                flattener))
+        assert callable(flattener)
+        klass._flatteners[typeToFlatten] = flattener
+    registerFlattener = classmethod(registerFlattener)
+
+    def unregisterFlattener(klass, typeToFlatten):
+        if typeToFlatten in klass._flatteners:
+            del klass._flatteners[typeToFlatten]
+    unregisterFlattener = classmethod(unregisterFlattener)
+
+# common flatteners:
+
+def flattenNumeric(value, numDigits = 10):
+    return ''.join(('%', str(numDigits), '.d')) % value
+
+Document.registerFlattener(int, flattenNumeric)
+
+def flattenLong(value):
+    return flattenNumeric(value, numDigits=20)
+
+Document.registerFlattener(long, flattenLong)
+
+def flattenDate(value):
+    return value.isoformat()
+
+for dt in (datetime.date, datetime.time, datetime.datetime):
+    Document.registerFlattener(dt, flattenDate)
+
+def flattenUnicode(value):
+    return value.encode(UNICODE_ENCODING)
+
+Document.registerFlattener(unicode, flattenUnicode)
+
+
+def cleanInputText(unknownText, returnUnicode = False):
+    if isinstance(unknownText, str):
+        originalText = unknownText.decode(UNICODE_ENCODING, UNICODE_ERROR_POLICY) # XXX hardcoded UTF-8, make param XXX
+    elif isinstance(unknownText, unicode):
+        originalText = unknownText
+    else:
+        raise ValueError("Only strings and unicode objects can be indexed.")
+    # be very careful about lowercasing the text here: since the API we
+    # expose to higher levels doesn't allow searchup.py to call
+    # findInField directly, searches for INDEXERVERSION:4 have to be
+    # sent as regular queries. lowercasing all queries here will break
+    # keyword searches.
+    if returnUnicode:
+        return originalText
+    else:
+        return originalText.encode(UNICODE_ENCODING, UNICODE_ERROR_POLICY)
+
+
+def makePairForWrite(prefix, token, prefixMap=None):
+    # prefixes must be uppercase; if the prefix given to us is a str
+    # that happens to be UTF-8 encoded, bad things will happen when we
+    # uppercase it, so we convert everything to unicode first
+    if isinstance(prefix, str):
+        prefix = prefix.decode(UNICODE_ENCODING, UNICODE_ERROR_POLICY)
+    if isinstance(token, str):
+        token = token.decode(UNICODE_ENCODING, UNICODE_ERROR_POLICY) # XXX hardcoded UTF-8, make param
+
+    if prefixMap is None:
+        prefix = prefix.upper()
+    else: # we have a map, so first translate it using the map (e.g. 'title' -> 'S')
+        prefix = prefixMap.get(prefix, prefix.upper())
+
+    result = '%s%s%s' % (prefix, prefix[0] == 'X' and ':' or '', token)
+    # since return value is going into the db, it must be encoded as UTF-8
+    result = result.encode(UNICODE_ENCODING, UNICODE_ERROR_POLICY)
+    return checkKeyLen(result)
+
+def checkKeyLen(s):
+    if not s:
+        return ' '
+    numNullBytes = s.count('\0') + 1
+    xapianLen = numNullBytes + len(s) + 1 # that last one is for the
+                                          # terminating \0
+    if xapianLen < MAX_KEY_LEN:
+        return s
+    else:
+        # doing nothing seems preferable to mangling an overly large
+        # token that we don't know how to handle. we use a space
+        # instead of an empty string because xapian doesn't like
+        # getting empty strings added as terms
+        return ' '
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/support/xapwrap/index.py	Sat Jun 10 16:52:04 2006 +0200
@@ -0,0 +1,1030 @@
+# Copyright (c) 2005 Divmod Inc. See LICENSE file for details.
+"""
+Xapwrap provides an improved interface to the Xapian text indexing
+library (see http://www.xapian.org/ for more information on
+Xapian). Xapwrap provides a layered approach offering ample
+opportunities for customization.
+
+Example
+-------
+::
+
+    from xapwrap import SmartIndex, Document, TextField, SortKey
+    from datetime import date
+
+    idx = SmartIndex('/tmp/index', True)
+    d1 = Document(TextField('hi there bob'),
+                  sortFields = [SortKey('date', date(2004, 1, 1)),
+                                SortKey('author', 'Bob'),
+                                SortKey('size', 450)])
+    idx.index(d1)
+    idx.close()
+
+    idx = SmartIndex('/tmp/index')
+    print idx.search('there', 'date', sortAscending = True)
+
+
+
+Indices
+-------
+
+Important methods for C{ReadOnlyIndex}:
+ __init__(self, *pathnames)
+ close(self)
+ configure(self, prefixMap = None, indexValueMap = None)
+ flush(self)
+ search(self, query, sortKeyt = None,
+        startingIndex = 0, batchSize = MAX_DOCS_TO_RETURN,
+        sortIndex = None, sortAscending = True,
+        sortByRelevence = False)
+ count(self, query)
+ checkIndex(self, maxID)
+ get_doccount(self, uid)
+
+Important methods for C{Index}:
+ (all methods in ReadOnlyIndex)
+ __init__(self, pathname, create)
+ index(self, doc)
+ add_document(self, doc)
+ replace_document(self, uid, doc)
+ delete_document(self, uid)
+
+C{SmartIndex} and C{SmartReadOnlyIndex} define the same methods as their
+dumb counterparts.
+
+The primary way to interact with a Xapian index is to use either the
+C{Index} or C{ReadOnlyIndex} class. In addition to offering read only
+access without the inconveniance of lock files, C{ReadOnlyIndex} offers
+the ability to merge several xapian indices into one super index with
+only a small performance impediement.
+
+In addition to C{Index} and C{ReadOnlyIndex}, Xapwrap also offers
+C{SmartIndex} and C{SmartReadOnlyIndex} classes. These classes
+automatically store and manage the index value map and the prefix map in
+the index. There are two caveats to using them however. First, one
+cannot index documents that have a xapian ID of 1. Secondly, when using
+C{SmartReadOnlyIndex} to combine multiple indices together, the indices
+must have consistent value index maps. Indices where all documents have
+the same index value map are always consistent. The problem only emerges
+when indices can have different types of documents with different sets
+of sort keys. More specifically, the problem can only emerge if one
+indices documents in such a way that sort keys are added to different
+indices in different orders.
+
+
+Documents
+---------
+
+In order to add new data to an index, one asks a C{Index} or
+C{SmartIndex} instance to index a C{Document} instance. Documents take a
+sequence of text fields, a sequence of sort keys and a sequence of
+keywords as constructor arguments. They also take optional universal
+identifiers and an arbitrary serializable object. The first three
+sequences can be created using the C{TextField}, C{SortKey}, and
+C{Keyword} classes defined below. C{TextField} instances contain a chunk
+of text and an optional name as well as a boolean indicating whether the
+field is to be prefixed. Prefixed fields are effectively indexed twice:
+after being indexed normally, each token is indexed again with the field
+name. This allows the user to perform fielded searches and is primarily
+useful for small text fields, such as the subject of an email or a list
+of author names. C{Keyword} instances denote individual prefixed tokens
+that are indexed with no positional information. C{SortKey} instances
+denote arbitrary fields that are used for sorting documents. They
+include a sort field name and the sort key value. Since Xapian only
+accepts strings as sort keys, sort key values must be flattened into
+strings before entering the index.
+
+Xapwrap defines flattener functions that automatically flatten integer,
+date, time, and datetime instances into strings that sort properly. You
+can define your own flatteners for custom data types by using the
+C{registerFlattener} class method of the C{Document} class.
+
+
+Error Handling
+--------------
+Internal Xapian error conditions should generate normal python
+exceptions defined in this file that inherit from xapwrap.XapianError.
+
+
+Logging
+-------
+Xapwrap will use twisted's logging facilities if available. In any
+event, a custom logging function can be supplied by setting xapwrap.log.
+
+
+Future Work
+-----------
+Xapwrap currently does not support stemming or stop words, although a
+future version will.
+
+"""
+import cPickle, sets, glob, os
+import xapian
+from document import makePairForWrite, StandardAnalyzer, Document, SortKey, Keyword
+from document import UNICODE_ENCODING, UNICODE_ERROR_POLICY
+
+try:
+    from atop.tpython import FilesystemLock
+except ImportError:
+    from os import symlink, readlink, remove as rmlink
+    import errno
+
+    class FilesystemLock:
+        """A mutex.
+
+        This relies on the filesystem property that creating
+        a symlink is an atomic operation and that it will
+        fail if the symlink already exists.  Deleting the
+        symlink will release the lock.
+
+        @ivar name: The name of the file associated with this lock.
+        @ivar clean: Indicates whether this lock was released cleanly by its
+        last owner.  Only meaningful after C{lock} has been called and returns
+        True.
+        """
+
+        clean = None
+        locked = False
+
+        def __init__(self, name):
+            self.name = name
+
+        def lock(self):
+            """Acquire this lock.
+
+            @rtype: C{bool}
+            @return: True if the lock is acquired, false otherwise.
+
+            @raise: Any exception os.symlink() may raise, other than
+            EEXIST.
+            """
+            try:
+                pid = readlink(self.name)
+            except (OSError, IOError), e:
+                if e.errno != errno.ENOENT:
+                    raise
+                self.clean = True
+            else:
+                if not hasattr(os, 'kill'):
+                    return False
+                try:
+                    os.kill(int(pid), 0)
+                except (OSError, IOError), e:
+                    if e.errno != errno.ESRCH:
+                        raise
+                    rmlink(self.name)
+                    self.clean = False
+                else:
+                    return False
+
+            symlink(str(os.getpid()), self.name)
+            self.locked = True
+            return True
+
+        def unlock(self):
+            """Release this lock.
+
+            This deletes the directory with the given name.
+
+            @raise: Any exception os.readlink() may raise, or
+            ValueError if the lock is not owned by this process.
+            """
+            pid = readlink(self.name)
+            if int(pid) != os.getpid():
+                raise ValueError("Lock %r not owned by this process" % (self.name,))
+            rmlink(self.name)
+            self.locked = False
+
+try:
+    from twisted.python.log import msg as log
+except ImportError:
+    def log(*args):
+        pass
+
+
+# max number of bytes that can be indexed without forcing an index
+# flush. this limits memory consumption
+MAX_DATA_INDEXED_BETWEEN_FLUSHES = 200 * 1000
+
+MAX_DOCS_TO_RETURN = 1000 * 1000
+
+XAPIAN_LOCK_FILENAME = "db_lock"
+XAPWRAP_LOCK_FILENAME = "xapian_lock"
+
+# Xapian error handling is somewhat weak: all errors trigger either an
+# IOError, a RuntimeError, or a ValueError. The exception's args
+# attribute is a singleton tuple containing an explanation
+# string. Possible errors include 'DatabaseCorruptError: Quartz metafile
+# /tmp/foo/meta is invalid: magic string not found.' and
+# 'DatabaseLockError: Unable to acquire database write lock
+# /tmp/foo/db_lock'. Instead of looking inside exception error strings
+# everywhere, I made a wrapper for xapian database operations that
+# catches exceptions and translates them into the more meaningful
+# exceptions shown below.
+
+class XapianError(StandardError):
+    pass
+class XapianRuntimeError(XapianError):
+    pass
+class XapianLogicError(XapianError):
+    pass
+class XapianDatabaseError(XapianError):
+    pass
+
+class XapianAssertionError(XapianLogicError):
+    pass
+class InvalidOperationError(XapianLogicError):
+    pass
+class InvalidArgumentError(XapianLogicError):
+    pass
+class UnimplementedError(XapianLogicError):
+    pass
+
+class DocNotFoundError(XapianRuntimeError):
+    pass
+class RangeError(XapianRuntimeError):
+    pass
+class InternalError(XapianRuntimeError):
+    pass
+class FeatureUnavalableError(XapianRuntimeError):
+    pass
+class XapianNetworkError(XapianRuntimeError):
+    pass
+
+class NetworkTimeoutError(XapianNetworkError):
+    pass
+
+class DatabaseCorruptionError(XapianDatabaseError):
+    pass
+class DatabaseCreationError(XapianDatabaseError):
+    pass
+class DatabaseOpeningError(XapianDatabaseError):
+    pass
+class DatabaseLockError(XapianDatabaseError):
+    pass
+class DatabaseModifiedError(XapianDatabaseError):
+    pass
+
+# these exceptions are not Xapian errors
+class UnknownDatabaseError(XapianError):
+    pass
+
+class NoIndexValueFound(XapianError):
+    pass
+
+class InconsistantIndex(XapianError):
+    pass
+
+class InconsistantIndexCombination(XapianError):
+    pass
+
+
+def makeTranslatedMethod(methodName):
+    def translatedMethod(self, *args, **kwargs):
+        try:
+            return getattr(self.db, methodName)(*args, **kwargs)
+        except (IOError, RuntimeError, ValueError), e:
+            errorMsg = e.args[0]
+            for subString, exceptionClass in self.exceptionStrMap.iteritems():
+                if subString in errorMsg:
+                    raise exceptionClass(e)
+            else:
+                raise UnknownDatabaseError(e)
+        except:
+            raise
+    return translatedMethod
+
+class ExceptionTranslater:
+    def __init__(self, db):
+        self.db = db
+
+    def openIndex(klass, readOnly, *args, **kwargs):
+        try:
+            if readOnly:
+                assert len(kwargs) == 0
+                # assume all args are db paths
+                db = xapian.Database(args[0])
+                for path in args[1:]:
+                    db.add_database(xapian.Database(path))
+                return klass(db)
+            else:
+                return klass(xapian.open(*args, **kwargs))
+        except (IOError, RuntimeError, ValueError), e:
+            errorMsg = e.args[0]
+            for subString, exceptionClass in klass.exceptionStrMap.iteritems():
+                if subString in errorMsg:
+                    raise exceptionClass(e)
+            else:
+                raise UnknownDatabaseError(e)
+        except Exception, e:
+            raise UnknownDatabaseError(e)
+
+    openIndex = classmethod(openIndex)
+
+    # possible exceptions are taken from the list at
+    # http://www.xapian.org/docs/apidoc/html/errortypes_8h.html
+    exceptionStrMap = {
+        # exceptions whose names differ between xapwrap and Xapian
+        'DatabaseCorruptError': DatabaseCorruptionError,
+        'AssertionError': XapianAssertionError,
+        'DatabaseCreateError': DatabaseCreationError,
+
+        # exceptions translated with the same name
+        'DatabaseLockError': DatabaseLockError,
+        'DatabaseOpeningError': DatabaseOpeningError,
+        'DatabaseModifiedError': DatabaseModifiedError,
+        'FeatureUnavalableError': FeatureUnavalableError,
+        'DocNotFoundError': DocNotFoundError,
+        'InvalidOperationError': InvalidOperationError,
+        'InvalidArgumentError': InvalidArgumentError,
+        'UnimplementedError': UnimplementedError,
+        'NetworkError': XapianNetworkError,
+        'NetworkTimeoutError': NetworkTimeoutError,
+        'DatabaseError': XapianDatabaseError,
+        'InternalError': InternalError,
+        'RangeError': RangeError,
+        'RuntimeError': XapianRuntimeError,
+        'LogicError': XapianLogicError
+        }
+
+    get_doccount = makeTranslatedMethod('get_doccount')
+    add_document = makeTranslatedMethod('add_document')
+    replace_document = makeTranslatedMethod('replace_document')
+    delete_document = makeTranslatedMethod('delete_document')
+    flush = makeTranslatedMethod('flush')
+    term_exists = makeTranslatedMethod('term_exists')
+    reopen = makeTranslatedMethod('reopen')
+    begin_transaction = makeTranslatedMethod('begin_transaction')
+    commit_transaction = makeTranslatedMethod('commit_transaction')
+    cancel_transaction = makeTranslatedMethod('cancel_transaction')
+    get_lastdocid = makeTranslatedMethod('get_lastdocid')
+    get_avlength = makeTranslatedMethod('get_avlength')
+    get_termfreq = makeTranslatedMethod('get_termfreq')
+    get_collection_freq = makeTranslatedMethod('get_collection_freq')
+    get_doclength = makeTranslatedMethod('get_doclength')
+    get_document = makeTranslatedMethod('get_document')
+
+    postlist_begin = makeTranslatedMethod('postlist_begin')
+    postlist_end = makeTranslatedMethod('postlist_end')
+    termlist_begin = makeTranslatedMethod('termlist_begin')
+    termlist_end = makeTranslatedMethod('termlist_end')
+    positionlist_begin = makeTranslatedMethod('positionlist_begin')
+    positionlist_end = makeTranslatedMethod('positionlist_end')
+    allterms_begin = makeTranslatedMethod('allterms_begin')
+    allterms_end = makeTranslatedMethod('allterms_end')
+
+
+def makeProtectedDBMethod(method, setupDB = True):
+    def protectedMethod(self, *args, **kwargs):
+        if setupDB:
+            self.setupDB()
+        try:
+            return method(self, *args, **kwargs)
+##         # test that this works and doesn't recurse infinitely
+##         except DatabaseModifiedError:
+##             self.reopen()
+##             return protectedMethod(self, *args, **kwargs)
+        except XapianError, e:
+            #log("error encountered while performing xapian index operation %s: %s"
+            #    % (method.__name__, e))
+            self.close()
+            raise
+    return protectedMethod
+
+
+# there are lots of places below where we write code like:
+# enq = mset = None
+# try:
+#     enq = self.enquire(foo)
+#     mset = enq.get_mset(0, 10)
+#     return mset[0][flimflam]
+# except:
+#     del enq, mset
+#     raise
+
+# the purpose of this code is to ensure that no references to enquire
+# objects or msets will outlive the function call. msets and enquire
+# objsects hold a reference to the xapian db, and thus prevent it from
+# being properly gc'd. if we fail to delete enq and mset on exception,
+# then they can be kept around for arbitrarily long periods of time as
+# part of the exception state
+
+
+# be extremely careful about keeping a db object in local scope;
+# once its there, an unhandled exception could create a traceback
+# containing a frame object that holds a copy of the locals dict,
+# including the db object. if that frame/traceback object is kept
+# around forever (which parts of twisted/quotient seem to do,
+# especially deferreds), then the db object will never be deleted
+# and the indexer lock will never go away.
+
+# in order to prevent that from happening, we maintain two invariants:
+
+# 1. the db is only accessed as an instance attribute and is never
+# copied into a local variable. i.e., we always say self.db and
+# never ever say db = self.db. this keeps the db object from ever
+# getting captured by a frame/traceback.
+
+# 2. the db is only accessed from within an exception handler that
+# calls self.close() in the event of *any* failure. this ensures
+# that the instance loses all references to the db on failure, so,
+# even if the instance object is captured by a frame object (or
+# something else), the db will already have been freed.
+
+
+class ReadOnlyIndex:
+    """
+    I represent a Xapian index that is read only by wrapping the
+    xapian.Database class. Because I provide read only access, I can be
+    used to combine several Xapian indices into one index with
+    performance only slightly lower than when using only one index.
+
+    @cvar DEFAULT_QUERY_COMBINER_OP: the operation used by the query parser to combine query terms
+
+    @cvar STEMMING_LANGUAGE: the language used by the query parser for
+    stemming. this is of little use since Xapwrap does not yet support
+    stemming when indexing.
+
+    @ivar names: a sequence of file names representing paths to Xapian
+    indices
+
+    Please use the configure method to modify C{prefixMap} and C{indexValueMap}
+
+    @ivar prefixMap: a map of prefixes used by named fields in the index
+    and the name they should be referred to by the query parser
+
+    @ivar indexValueMap: a map from sort field names to value integer
+
+    @ivar amountIndexedSinceLastFlush: the number of bytes indexed since
+    the last flush
+
+    The following instance attributes should never be modified or
+    accessed directly:
+
+    @ivar db: the xapian index object
+    @ivar qp: the xapian query parser object
+    @ivar _searchSessions: a map from query description string to
+    (enquire, lastIndexSortedBy)
+    """
+
+    DEFAULT_QUERY_COMBINER_OP = xapian.Query.OP_AND
+    STEMMING_LANGUAGE = 'none'
+
+    def __init__(self, *names):
+        if len(names) < 1:
+            raise ValueError("No index directory supplied to Index constructor")
+        self.names = names
+        self.db = None
+        self.qp = None
+        self._searchSessions = {}
+        self.prefixMap = {}
+        self.indexValueMap = {}
+        self.amountIndexedSinceLastFlush = 0
+
+    def setupDB(self):
+        # we hide the db so that methods always access it only through
+        # this method since db objects can be silently reaped when not
+        # in use. db objects consume 5 file descriptors.
+
+        if self.db is None:
+            self._setupDB()
+
+            #self.qp = xapian.QueryParser()
+            # this is vital: these options specify no language for
+            # stemming (""), disable stemming (False), and specify an
+            # empty stop word object (None). we need this because by
+            # default, xapian's query parser does english stemming
+            #s = xapian.Stem(self.STEMMING_LANGUAGE)
+            #self.qp.set_stemmer(s)
+
+            # we want query terms to be ANDed together by default
+            #self.qp.set_default_op(self.DEFAULT_QUERY_COMBINER_OP)
+            self._configure()
+
+            log("Index %s contains %s documents" %
+                (self.names, self.get_doccount()))
+
+    def _setupDB(self):
+        self.db = ExceptionTranslater.openIndex(True, *self.names)
+
+    def close(self):
+        log("closing xapian index %s" % self.names)
+        for query in self._searchSessions.keys():
+            del self._searchSessions[query]
+        self.qp = None
+        self.db = None
+
+    def _configure(self):
+        if 'uid' not in self.indexValueMap:
+            # this a gross hack...
+            self.indexValueMap['uid'] = 0
+            self.indexValueMap['uidREV'] = 1
+        if self.qp is not None:
+            for k, v in self.prefixMap.iteritems():
+                # check for unicode encoding?
+                if v:
+                    V = v.upper()
+                else:
+                    V = k.upper()
+                self.qp.add_prefix(k, V)
+
+    def configure(self, prefixMap = None, indexValueMap = None):
+        if prefixMap is not None:
+            self.prefixMap = prefixMap
+        if indexValueMap is not None:
+            self.indexValueMap = indexValueMap
+        self._configure()
+
+    def get_doccount(self):
+        return self.db.get_doccount()
+    get_doccount = makeProtectedDBMethod(get_doccount)
+
+    def enquire(self, query):
+        searchSession = None
+        try:
+            searchSession = xapian.Enquire(self.db.db)
+            searchSession.set_query(query)
+            return searchSession
+        except:
+            del query, searchSession
+            raise
+    enquire = makeProtectedDBMethod(enquire)
+
+    def flush(self):
+        if self.db is not None:
+            self.db.flush()
+            self.amountIndexedSinceLastFlush = 0
+    flush = makeProtectedDBMethod(flush)
+
+    def search(self, query,
+               sortKey = None,
+               startingIndex = 0,
+               batchSize = MAX_DOCS_TO_RETURN,
+               sortIndex = None, sortAscending = True,
+               sortByRelevence = False,
+               valuesWanted = None):
+        """
+        Search an index.
+
+        @ivar valuesWanted: a list of Values that will be returned as part
+        of the result dictionary.
+        """
+
+        # TODO - allow a simple way to get Keywords out
+        self.setupDB()
+        if isinstance(query, (str, unicode)):
+            query = ParsedQuery(query)
+        elif not(isinstance(query, Query)):
+            raise ValueError("query %s must be either a string or a "
+                             "subclass of xapwrap.Query" % query)
+
+        q = query.prepare(self.qp)
+        # uggg. this mess is due to the fact that xapain Query objects
+        # don't hash in a sane way.
+        qString = q.get_description()
+
+        # the only thing we use sortKey for is to set sort index
+        if sortKey is not None:
+            sortIndex = self.indexValueMap[sortKey]
+
+        # once you call set_sorting on an Enquire instance, there is no
+        # way to resort it by relevence, so we have to open a new
+        # session instead.
+
+        # ignore sortAscending since there's no easy way to implement
+        # ascending relevancy sorts and it's tough to imagine a case
+        # where you'd want to see the worst results. in any event, the
+        # user can always sort by relevancy and go to the last page of
+        # results.
+
+        enq = mset = None
+        if qString not in self._searchSessions:
+            self._searchSessions[qString] = (self.enquire(q), None)
+        try:
+            enq, lastIndexSortedBy = self._searchSessions[qString]
+
+            # if we don't set sortIndex, the results will be returned
+            # sorted by relevance, assuming that we have never called
+            # set_sorting on this session
+            if sortByRelevence and lastIndexSortedBy is not None:
+                sortIndex = sortKey = None
+                if lastIndexSortedBy is not None:
+                    del self._searchSessions[qString]
+                    self._searchSessions[qString] = (self.enquire(q), None)
+                    enq, lastIndexSortedBy = self._searchSessions[qString]
+            if sortIndex is not None:
+                # It seems that we have the opposite definition of sort ascending
+                # than Xapian so we invert the ascending flag!
+                enq.set_sort_by_value(sortIndex, not sortAscending)
+
+            self._searchSessions[qString] = (enq, sortIndex)
+
+            mset = enq.get_mset(startingIndex, batchSize)
+            results = []
+            for m in mset:
+                thisResult = {}
+                thisResult['uid'] = m[xapian.MSET_DID]
+                thisResult['score'] = m[xapian.MSET_PERCENT]
+                if valuesWanted:
+                    xapDoc = m[4]
+                    valRes = {}
+                    for valName in valuesWanted:
+                        valueIndex = self.indexValueMap.get(valName, None)
+                        if valueIndex is None:
+                            raise NoIndexValueFound(valName, self.indexValueMap)
+                        valRes[valName] = xapDoc.get_value(valueIndex)
+                    thisResult['values'] = valRes
+                results.append(thisResult)
+            return results
+        except:
+            del enq, mset
+            raise
+    search = makeProtectedDBMethod(search)
+
+    def count(self, query):
+        enq = mset = None
+        try:
+            enq = self.enquire(query)
+            # get_matches_estimated does not return accurate results if
+            # given a small ending number like 0 or 1
+            mset = enq.get_mset(0, MAX_DOCS_TO_RETURN)
+            sizeEstimate = mset.get_matches_estimated()
+            return sizeEstimate, self.get_doccount()
+        except:
+            del enq, mset
+            raise
+    count = makeProtectedDBMethod(count)
+
+    def checkIndex(self, maxID):
+        """Compute a list of all UIDs less than or equal to maxID that
+        are not in the db.
+        """
+        # I had originally suspected that the performance hit of
+        # returning a huge list in the case of empty indexes would be
+        # substantial, but testing with a 120,000 msg index indicates
+        # that performance is fine and that the space overhead is quite
+        # reasonable. If that were not the case, this could be optimized
+        # by calculating the maximum document ID in the index and only
+        # scanning up to the minimum of maxID and the max ID in the
+        # index, assuming that were using the same document IDs in the
+        # index as in atop.
+
+        missingUIDs = []
+        for uid in xrange(maxID + 1):
+            term = makePairForWrite('UID', str(uid))
+            if not self.db.term_exists(term):
+                missingUIDs.append(uid)
+        return missingUIDs
+    checkIndex = makeProtectedDBMethod(checkIndex)
+
+    def get_documents(self, uid):
+        """ return a list of remapped UIDs corresponding to the actual UID given
+        """
+        docTerm = makePairForWrite('UID', str(uid))
+        candidates = self.search(RawQuery(docTerm))
+        return [int(c['uid']) for c in candidates]
+
+    def get_document(self, uid):
+        # we cannot simply use db.get_document since doc ids get
+        # remapped when combining multiple databases
+        candidates = self.get_documents(uid)
+        if len(candidates) == 0:
+            raise DocNotFoundError(uid)
+        elif len(candidates) == 1:
+            return self._get_document(candidates[0])
+        else:
+            raise InconsistantIndex(
+                "Something has gone horribly wrong. I tried "
+                "retrieving document id %s but found %i documents "
+                "with that document ID term" % (uid, len(candidates)))
+
+    def _get_document(self, uid):
+        assert isinstance(uid, int)
+        return self.db.get_document(uid)
+    _get_document = makeProtectedDBMethod(_get_document)
+
+    def term_exists(self, term):
+        assert isinstance(term, str)
+        return self.db.term_exists(term)
+    term_exists = makeProtectedDBMethod(term_exists)
+
+    def get_lastdocid(self):
+        return self.db.get_lastdocid()
+    get_lastdocid = makeProtectedDBMethod(get_lastdocid)
+
+# XXX FIXME: we should consider deleting all searchSessions whenever we
+# add a document, or we should reopen the db
+
+
+class Index(ReadOnlyIndex):
+
+    def __init__(self, name, create = False, analyzer = None):
+        # XXX FIXME: we should really try opening the db here, so that
+        # any errors are caught immediately rather than waiting for the
+        # first time we try to do something...
+        ReadOnlyIndex.__init__(self, name)
+        self.name = name
+        if create:
+            self.flags = xapian.DB_CREATE_OR_OPEN
+        else:
+            self.flags = xapian.DB_OPEN
+        self.analyzer = analyzer or StandardAnalyzer()
+        self.lockFile = FilesystemLock(
+            os.path.join(self.name, XAPWRAP_LOCK_FILENAME))
+
+    def _setupDB(self):
+        """ really get a xapian database object """
+
+        # xapian expects directories! self.name should refer to a
+        # directory. if it doesn't exist, we'll make one.
+        if not os.path.exists(self.name):
+            os.mkdir(self.name)
+
+        # try to acquire a lock file
+        if not self.lockFile.lock():
+            owningPid = os.readlink(self.lockFile.name)
+            errorMsg = ("cannot acquire lock file for xapian index %s"
+                        "because it is owned by process %s" %
+                        (self.name, owningPid))
+            log(errorMsg)
+            raise DatabaseLockError(errorMsg)
+        xapLockFilePath = os.path.join(self.name, XAPIAN_LOCK_FILENAME)
+        if os.path.exists(xapLockFilePath):
+            log("Stale database lock found in %s. Deleting it now." % xapLockFilePath)
+            os.remove(xapLockFilePath)
+
+        # actually try to open a xapian DB
+        try:
+            try:
+                self.db = ExceptionTranslater.openIndex(False, self.name, self.flags)
+            except DatabaseCorruptionError, e:
+                # the index is trashed, so there's no harm in blowing it
+                # away and starting from scratch
+                log("Xapian index at %s is corrupted and will be destroyed"
+                    % self.name)
+                if self.lockFile.locked:
+                    self.lockFile.unlock()
+                for idxFname in glob.glob(os.path.join(self.name, '*')):
+                    os.remove(idxFname)
+                self.db = ExceptionTranslater.openIndex(False, self.name, self.flags)
+        finally:
+            if self.db is None and self.lockFile.locked:
+                self.lockFile.unlock()
+
+    def __del__(self):
+        self.close()
+
+    def close(self):
+        # this is important! the only way to get xapian to release the
+        # db lock is to call the db object's destructor. that won't
+        # happen until nobody is holding a reference to the db
+        # object. unfortunately, the query parser holds a reference to
+        # it, so the query parser must also go away. do not hold
+        # references to these objects anywhere but here.
+
+        # enquire objects and mset objects hold a reference to the db,
+        # so if any of them are left alive, the db will not be reclaimed
+
+        if self.db is not None:
+            ReadOnlyIndex.close(self)
+            # the islink test is needed in case the index directory has
+            # been deleted before we close was called.
+            if self.lockFile.locked and os.path.islink(self.lockFile.name):
+                self.lockFile.unlock()
+            # there is no point in checking if the lock file is still
+            # around right here: it will only be deleted when xapian's
+            # destructor runs, but python defers running destructors
+            # until after exception handling is complete. since this
+            # code will often get called from an exception handler, we
+            # have to assume that the lock file's removal will be
+            # delayed at least until after this method exits
+
+    def get_document(self, uid):
+        return self._get_document(uid)
+
+    # methods that modify db state
+
+    def index(self, doc):
+        self.setupDB()
+        if hasattr(doc, 'uid') and doc.uid:
+            uid = int(doc.uid)
+            doc.sortFields.append(SortKey('uid', uid))
+            doc.keywords.append(Keyword('uid', str(uid)))
+            xapDoc = doc.toXapianDocument(self.indexValueMap, self.prefixMap)
+            self.replace_document(uid, xapDoc)
+        else:
+            # We need to know the uid of the doc we're going to add
+            # before we add it so we can setup appropriate uid sorting
+            # values. But, another thread could potentially insert a
+            # document at that uid after we determine the last uid, but
+            # before we manage the insertion. Yay race conditions! So we
+            # try to add the document and then check that it ended up at
+            # the right uid. If it did not, we update it with the
+            # correct uid sort values.
+            uid = self.get_lastdocid() + 1
+            doc.sortFields.append(SortKey('uid', uid))
+            doc.keywords.append(Keyword('uid', str(uid)))
+            xapDoc = doc.toXapianDocument(self.indexValueMap, self.prefixMap)
+            newUID = self.add_document(xapDoc)
+            if newUID != uid:
+                doc.sortFields.append(SortKey('uid', newUID))
+                doc.keywords.append(Keyword('uid', str(newUID)))
+                xapDoc = doc.toXapianDocument(self.indexValueMap, self.prefixMap)
+                self.replace_document(newUID, xapDoc)
+
+            # a simpler alternative would be to add an empty document
+            # and then replace it. the problem with that strategy is
+            # that it kills performance since xapian performs an
+            # implicit flush when you replace a document that was added
+            # but not yet committed to disk.
+
+        self.amountIndexedSinceLastFlush += len(doc)
+        if self.amountIndexedSinceLastFlush > MAX_DATA_INDEXED_BETWEEN_FLUSHES:
+            self.flush()
+        return uid
+
+    def add_document(self, doc):
+        return self.db.add_document(doc)
+    add_document = makeProtectedDBMethod(add_document)
+
+    def replace_document(self, uid, doc):
+        return self.db.replace_document(uid, doc)
+    replace_document = makeProtectedDBMethod(replace_document)
+
+    def delete_document(self, docID):
+        return self.db.delete_document(docID)
+    delete_document = makeProtectedDBMethod(delete_document)
+
+class Query:
+    pass
+
+class ParsedQuery(Query):
+    def __init__(self, queryString):
+        if isinstance(queryString, unicode):
+            queryString = queryString.encode(UNICODE_ENCODING, UNICODE_ERROR_POLICY)
+        # as of xapian 0.9.5 the query parser makes trouble with utf-8. but it
+        # also doesnt work with iso-8859-15, so we just live with ascii-only search
+        # for now... - a utf8 fix seems to be planned for the near future!
+        self.queryString = queryString
+
+    def prepare(self, queryParser):
+        return queryParser.parse_query(self.queryString)
+
+class RawQuery(Query):
+    def __init__(self, queryString):
+        if isinstance(queryString, unicode):
+            queryString = queryString.encode('utf-8')
+
+        assert isinstance(queryString, str)
+        self.queryString = queryString
+
+    def prepare(self, queryParser):
+        return xapian.Query(self.queryString)
+
+class QObjQuery(Query):
+    def __init__(self, query):
+        assert isinstance(query, xapian.Query)
+        self.query = query
+
+    def prepare(self, queryParser):
+        return self.query
+
+class SmartIndex(Index):
+    documentFactory = Document
+
+    def __init__(self, *args, **kwargs):
+        Index.__init__(self, *args, **kwargs)
+        self.fetchState()
+
+    def saveState(self):
+        self.setupDB()
+        state = {'indexValueMap': self.indexValueMap,
+                 'prefixMap': self.prefixMap}
+        d = self.documentFactory(uid = 1, data = state)
+        self.index(d, checkID = False)
+        self.flush()
+
+    def fetchState(self):
+        self.setupDB()
+        if self.get_doccount() == 0:
+            # Don't rely on the try:except: for this case
+            self.saveState()
+        try:
+            doc = self.get_document(1)
+        except DocNotFoundError:
+            newState = {'indexValueMap': {}, 'prefixMap': {}}
+            self.saveState()
+        else:
+            dataStr = doc.get_data()
+            newState = cPickle.loads(dataStr)
+        self.indexValueMap.update(newState['indexValueMap'])
+        self.prefixMap.update(newState['prefixMap'])
+
+    def index(self, doc, checkID = True):
+        if hasattr(doc, 'uid') and doc.uid == 1 and checkID:
+            raise InvalidArgumentError(
+                "document UIDs must be greater than one when using SmartIndex")
+
+        docSortKeys = sets.Set([sk.name for sk in doc.sortFields if sk.name is not None])
+        indexSortKeys = sets.Set(self.indexValueMap.keys())
+        if not docSortKeys.issubset(indexSortKeys):
+            nextValueIndex = 1 + max(self.indexValueMap.itervalues())
+            # we sort the sortKeys in order to improve the odds that two
+            # indices that are indexed with the same documents in the
+            # same order will always end up with the same
+            # indexValueMaps, even if different versions of python are
+            # used with different hash functions
+            sortKeys = list(docSortKeys)
+            sortKeys.sort()
+            for sortKey in sortKeys:
+                if sortKey not in self.indexValueMap:
+                    assert nextValueIndex % 2 == 0
+                    self.indexValueMap[sortKey] = nextValueIndex
+                    self.indexValueMap[sortKey + 'REV'] = nextValueIndex + 1
+                    nextValueIndex += 2
+            self.saveState()
+
+        docKeywords = sets.Set([tf.name for tf in doc.textFields if tf.prefix] +
+                               [kw.name for kw in doc.keywords])
+        indexKeyWords = sets.Set(self.prefixMap.keys())
+        if not docKeywords.issubset(indexKeyWords):
+            for k in docKeywords - indexKeyWords:
+                self.prefixMap[k] = k.upper()
+            self.saveState()
+
+        return Index.index(self, doc)
+
+
+class SmartReadOnlyIndex(ReadOnlyIndex):
+
+    def __init__(self, *args, **kwargs):
+        ReadOnlyIndex.__init__(self, *args, **kwargs)
+        self.fetchState()
+
+    def fetchState(self):
+        stateDocIDs = self.get_documents(1)
+        stateDocs = map(self._get_document, stateDocIDs)
+        states = [cPickle.loads(s.get_data()) for s in stateDocs]
+
+        # should we issue a warning when the number of states that we
+        # retrieve is less than the number of indices we opened? the
+        # only problem is that some indices may be empty, but there's no
+        # easy way to check how many documents are in a subindex without
+        # opening it explicitly using xapian.Database and that seems
+        # rather expensive for this code path.
+
+        # merge all the states into a master state
+        master = {'prefixMap': self.prefixMap,
+                  'indexValueMap': self.indexValueMap}
+        # note that if there are conflicts, there is no guarantee on who
+        # will win, but it doesn't matter since we'll die on conflicts
+        # later anyway
+        for s in states:
+            for substate in ('prefixMap', 'indexValueMap'):
+                sub = s.get(substate, {})
+                mSub = master[substate]
+                for k, v in sub.iteritems():
+                    mSub[k] = v
+
+        # ensure that states are compatible (check for conflicts)
+        conflicts = []
+        for s in states:
+            for substate in ('prefixMap', 'indexValueMap'):
+                sub = s.get(substate, {})
+                mSub = master[substate]
+                for k, v in sub.iteritems():
+                    if k in mSub and mSub[k] != v:
+                        # we defer error reporting so that the user sees
+                        # as much info on the error as possible
+                        conflicts.append((substate, k, v, mSub[k]))
+
+        # the only way states can be incompatible is if two states have
+        # different values for the same keys in the same substate
+
+        if conflicts:
+            raise InconsistantIndexCombination(
+                "The SmartReadOnlyIndex opened on %s cannot recconcile "
+                "the following conflicts in the subindices' states:\n%s"
+                % (self.names,
+                   '\n'.join(["%s[%r] is %r in one index but %r in another"
+                              % c for c in conflicts])))
+
+        self.prefixMap = master['prefixMap']
+        self.indexValueMap = master['indexValueMap']
+
+    def search(self, query, sortKey = None,
+               startingIndex = 0,
+               batchSize = MAX_DOCS_TO_RETURN,
+               sortIndex = None, sortAscending = True,
+               sortByRelevence = False):
+        # if the appropriate index value string is not in
+        # self.indexValueMap, fetchState() before calling
+        # ReadOnlyIndex.search. if it still isn't there, let
+        # ReadOnlyIndex.search take care of throwing an error
+        if sortKey is not None and sortKey not in self.indexValueMap:
+            self.fetchState()
+        return ReadOnlyIndex.search(self, query, sortKey,
+                                    startingIndex, batchSize,
+                                    sortIndex, sortAscending,
+                                    sortByRelevence)
+
--- a/MoinMoin/wikiutil.py	Sat Jun 10 16:45:05 2006 +0200
+++ b/MoinMoin/wikiutil.py	Sat Jun 10 16:52:04 2006 +0200
@@ -761,33 +761,57 @@
     else:
         return u'["%s"]' % pagename
 
+#############################################################################
+### mimetype support
+#############################################################################
+import mimetypes
+
+MIMETYPES_MORE = {
+ # OpenOffice 2.x & other open document stuff
+ '.odt': 'application/vnd.oasis.opendocument.text',
+ '.ods': 'application/vnd.oasis.opendocument.spreadsheet',
+ '.odp': 'application/vnd.oasis.opendocument.presentation',
+ '.odg': 'application/vnd.oasis.opendocument.graphics',
+ '.odc': 'application/vnd.oasis.opendocument.chart',
+ '.odf': 'application/vnd.oasis.opendocument.formula',
+ '.odb': 'application/vnd.oasis.opendocument.database',
+ '.odi': 'application/vnd.oasis.opendocument.image',
+ '.odm': 'application/vnd.oasis.opendocument.text-master',
+ '.ott': 'application/vnd.oasis.opendocument.text-template',
+ '.ots': 'application/vnd.oasis.opendocument.spreadsheet-template',
+ '.otp': 'application/vnd.oasis.opendocument.presentation-template',
+ '.otg': 'application/vnd.oasis.opendocument.graphics-template',
+}
+[mimetypes.add_type(mimetype, ext, True) for ext, mimetype in MIMETYPES_MORE.items()]
+
+MIMETYPES_sanitize_mapping = {
+    # this stuff is text, but got application/* for unknown reasons
+    ('application', 'docbook+xml'): ('text', 'docbook'),
+    ('application', 'x-latex'): ('text', 'latex'),
+    ('application', 'x-tex'): ('text', 'tex'),
+    ('application', 'javascript'): ('text', 'javascript'),
+}
+
+MIMETYPES_spoil_mapping = {} # inverse mapping of above
+for key, value in MIMETYPES_sanitize_mapping.items():
+    MIMETYPES_spoil_mapping[value] = key
+
+
 # mimetype stuff ------------------------------------------------------------
 class MimeType(object):
     """ represents a mimetype like text/plain """
-    sanitize_mapping = {
-        # this stuff is text, but got application/* for unknown reasons
-        ('application', 'docbook+xml'): ('text', 'docbook'),
-        ('application', 'x-latex'): ('text', 'latex'),
-        ('application', 'x-tex'): ('text', 'tex'),
-        ('application', 'javascript'): ('text', 'javascript'),
-    }
-    spoil_mapping = {} # inverse mapping of above
     
     def __init__(self, mimestr=None, filename=None):
         self.major = self.minor = None # sanitized mime type and subtype
         self.params = {} # parameters like "charset" or others
         self.charset = None # this stays None until we know for sure!
 
-        for key, value in self.sanitize_mapping.items():
-            self.spoil_mapping[value] = key
-
         if mimestr:
             self.parse_mimetype(mimestr)
         elif filename:
             self.parse_filename(filename)
     
     def parse_filename(self, filename):
-        import mimetypes
         mtype, encoding = mimetypes.guess_type(filename)
         if mtype is None:
             mtype = 'application/octet-stream'
@@ -844,13 +868,13 @@
             readable text, we will return some text/* mimetype, not application/*,
             because we need text/plain as fallback and not application/octet-stream.
         """
-        self.major, self.minor = self.sanitize_mapping.get((self.major, self.minor), (self.major, self.minor))
+        self.major, self.minor = MIMETYPES_sanitize_mapping.get((self.major, self.minor), (self.major, self.minor))
 
     def spoil(self):
         """ this returns something conformant to /etc/mime.type or IANA as a string,
             kind of inverse operation of sanitize(), but doesn't change self
         """
-        major, minor = self.spoil_mapping.get((self.major, self.minor), (self.major, self.minor))
+        major, minor = MIMETYPES_spoil_mapping.get((self.major, self.minor), (self.major, self.minor))
         return self.content_type(major, minor)
 
     def content_type(self, major=None, minor=None, charset=None, params=None):
--- a/docs/CHANGES	Sat Jun 10 16:45:05 2006 +0200
+++ b/docs/CHANGES	Sat Jun 10 16:52:04 2006 +0200
@@ -28,6 +28,33 @@
     and improving it and after having made a backup with some other, proven
     method. USE BOTH ON YOUR OWN RISK!
 
+Branch moin-1.6-xapian:
+  New Features:
+    * Added Xapian (see http://xapian.org/) based indexed search code.
+      Our implementation is still buggy, only use it if you want to help
+      debugging it or to implement / test indexing filters (see
+      MoinMoin/filter/). To use this:
+      * Install xapian-core and xapian-bindings on your machine.
+        We used 0.9.4, but newer code should hopefully work, too.
+      * cfg.xapian_search = True
+      * Execute this to build the index:
+        $ moin ... index build   # indexes pages and attachments
+        $ moin ... index build --files=files.lst  # same plus a list of files
+        You should run those commands as the same user you use for your wiki,
+        usually this is the webserver userid, e.g.:
+        $ sudo -u www-data moin --config=... --wiki-url=wiki.example.org/ \
+               index build --files=files.lst
+  ToDo:
+    * fix/improve query parsing (xapian_term member functions)
+    * fix/improve evaluation of search result
+    * maybe add some "xapian native query" mode (can we make it working without
+      the _moinSearch post-processing (not possible as it uses the same query))
+
+  Other Changes:
+    * Removed Lupy based indexed search code. If you were brave enough to
+      use cfg.lupy_search, you maybe want to try cfg.xapian_search instead.
+
+
 Version 1.6.current:
     This is the active development branch. All changes get done here and
     critical stuff gets committed with -m "... (backport to 1.5)" and then
--- a/docs/CHANGES.fpletz	Sat Jun 10 16:45:05 2006 +0200
+++ b/docs/CHANGES.fpletz	Sat Jun 10 16:52:04 2006 +0200
@@ -1,6 +1,3 @@
-Please use your CHANGES.$yourname for recording your changes you do while
-Google Summer of Code.
-
 Branch moin/1.6-xapian-fpletz
 =============================
 
@@ -8,10 +5,12 @@
     * ...
 
   ToDo:
-    * ...
+    * Manually parse prefixes (e.g. title:) in MoinMoin.Xapian.Index
+      right before searching
+    * Mockup the new search UI
 
   New Features:
-    * ...
+    * TBD
   
   Bugfixes (only stuff that is buggy in moin/1.6 main branch):
     * ...
@@ -25,9 +24,12 @@
 
 Diary
 =====
-Please make at least one entry per day (and commit it) about what your work was about.
 
-2006-05-29 ...
-2006-05-30 ...
-2006-05-31 ...
+2006-06-10 Changed xapian_term() functions to return xapian.Query objects
+but without touching the prefixes as we don't have a prefixMap yet. Will
+implement this in MoinMoin.Xapian.Index. AndExpression needed some more
+tweaking to use AND_NOT because Xapian doesn't provide a pure NOT. Should
+be no issue with OrExpression as _moinSearch handles this correctly.
 
+2006-06-11
+
--- a/docs/Lupy-0.2.1/LICENSE	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,504 +0,0 @@
-		  GNU LESSER GENERAL PUBLIC LICENSE
-		       Version 2.1, February 1999
-
- Copyright (C) 1991, 1999 Free Software Foundation, Inc.
-     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-[This is the first released version of the Lesser GPL.  It also counts
- as the successor of the GNU Library Public License, version 2, hence
- the version number 2.1.]
-
-			    Preamble
-
-  The licenses for most software are designed to take away your
-freedom to share and change it.  By contrast, the GNU General Public
-Licenses are intended to guarantee your freedom to share and change
-free software--to make sure the software is free for all its users.
-
-  This license, the Lesser General Public License, applies to some
-specially designated software packages--typically libraries--of the
-Free Software Foundation and other authors who decide to use it.  You
-can use it too, but we suggest you first think carefully about whether
-this license or the ordinary General Public License is the better
-strategy to use in any particular case, based on the explanations below.
-
-  When we speak of free software, we are referring to freedom of use,
-not price.  Our General Public Licenses are designed to make sure that
-you have the freedom to distribute copies of free software (and charge
-for this service if you wish); that you receive source code or can get
-it if you want it; that you can change the software and use pieces of
-it in new free programs; and that you are informed that you can do
-these things.
-
-  To protect your rights, we need to make restrictions that forbid
-distributors to deny you these rights or to ask you to surrender these
-rights.  These restrictions translate to certain responsibilities for
-you if you distribute copies of the library or if you modify it.
-
-  For example, if you distribute copies of the library, whether gratis
-or for a fee, you must give the recipients all the rights that we gave
-you.  You must make sure that they, too, receive or can get the source
-code.  If you link other code with the library, you must provide
-complete object files to the recipients, so that they can relink them
-with the library after making changes to the library and recompiling
-it.  And you must show them these terms so they know their rights.
-
-  We protect your rights with a two-step method: (1) we copyright the
-library, and (2) we offer you this license, which gives you legal
-permission to copy, distribute and/or modify the library.
-
-  To protect each distributor, we want to make it very clear that
-there is no warranty for the free library.  Also, if the library is
-modified by someone else and passed on, the recipients should know
-that what they have is not the original version, so that the original
-author's reputation will not be affected by problems that might be
-introduced by others.
-
-  Finally, software patents pose a constant threat to the existence of
-any free program.  We wish to make sure that a company cannot
-effectively restrict the users of a free program by obtaining a
-restrictive license from a patent holder.  Therefore, we insist that
-any patent license obtained for a version of the library must be
-consistent with the full freedom of use specified in this license.
-
-  Most GNU software, including some libraries, is covered by the
-ordinary GNU General Public License.  This license, the GNU Lesser
-General Public License, applies to certain designated libraries, and
-is quite different from the ordinary General Public License.  We use
-this license for certain libraries in order to permit linking those
-libraries into non-free programs.
-
-  When a program is linked with a library, whether statically or using
-a shared library, the combination of the two is legally speaking a
-combined work, a derivative of the original library.  The ordinary
-General Public License therefore permits such linking only if the
-entire combination fits its criteria of freedom.  The Lesser General
-Public License permits more lax criteria for linking other code with
-the library.
-
-  We call this license the "Lesser" General Public License because it
-does Less to protect the user's freedom than the ordinary General
-Public License.  It also provides other free software developers Less
-of an advantage over competing non-free programs.  These disadvantages
-are the reason we use the ordinary General Public License for many
-libraries.  However, the Lesser license provides advantages in certain
-special circumstances.
-
-  For example, on rare occasions, there may be a special need to
-encourage the widest possible use of a certain library, so that it becomes
-a de-facto standard.  To achieve this, non-free programs must be
-allowed to use the library.  A more frequent case is that a free
-library does the same job as widely used non-free libraries.  In this
-case, there is little to gain by limiting the free library to free
-software only, so we use the Lesser General Public License.
-
-  In other cases, permission to use a particular library in non-free
-programs enables a greater number of people to use a large body of
-free software.  For example, permission to use the GNU C Library in
-non-free programs enables many more people to use the whole GNU
-operating system, as well as its variant, the GNU/Linux operating
-system.
-
-  Although the Lesser General Public License is Less protective of the
-users' freedom, it does ensure that the user of a program that is
-linked with the Library has the freedom and the wherewithal to run
-that program using a modified version of the Library.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.  Pay close attention to the difference between a
-"work based on the library" and a "work that uses the library".  The
-former contains code derived from the library, whereas the latter must
-be combined with the library in order to run.
-
-		  GNU LESSER GENERAL PUBLIC LICENSE
-   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-  0. This License Agreement applies to any software library or other
-program which contains a notice placed by the copyright holder or
-other authorized party saying it may be distributed under the terms of
-this Lesser General Public License (also called "this License").
-Each licensee is addressed as "you".
-
-  A "library" means a collection of software functions and/or data
-prepared so as to be conveniently linked with application programs
-(which use some of those functions and data) to form executables.
-
-  The "Library", below, refers to any such software library or work
-which has been distributed under these terms.  A "work based on the
-Library" means either the Library or any derivative work under
-copyright law: that is to say, a work containing the Library or a
-portion of it, either verbatim or with modifications and/or translated
-straightforwardly into another language.  (Hereinafter, translation is
-included without limitation in the term "modification".)
-
-  "Source code" for a work means the preferred form of the work for
-making modifications to it.  For a library, complete source code means
-all the source code for all modules it contains, plus any associated
-interface definition files, plus the scripts used to control compilation
-and installation of the library.
-
-  Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope.  The act of
-running a program using the Library is not restricted, and output from
-such a program is covered only if its contents constitute a work based
-on the Library (independent of the use of the Library in a tool for
-writing it).  Whether that is true depends on what the Library does
-and what the program that uses the Library does.
-  
-  1. You may copy and distribute verbatim copies of the Library's
-complete source code as you receive it, in any medium, provided that
-you conspicuously and appropriately publish on each copy an
-appropriate copyright notice and disclaimer of warranty; keep intact
-all the notices that refer to this License and to the absence of any
-warranty; and distribute a copy of this License along with the
-Library.
-
-  You may charge a fee for the physical act of transferring a copy,
-and you may at your option offer warranty protection in exchange for a
-fee.
-
-  2. You may modify your copy or copies of the Library or any portion
-of it, thus forming a work based on the Library, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
-    a) The modified work must itself be a software library.
-
-    b) You must cause the files modified to carry prominent notices
-    stating that you changed the files and the date of any change.
-
-    c) You must cause the whole of the work to be licensed at no
-    charge to all third parties under the terms of this License.
-
-    d) If a facility in the modified Library refers to a function or a
-    table of data to be supplied by an application program that uses
-    the facility, other than as an argument passed when the facility
-    is invoked, then you must make a good faith effort to ensure that,
-    in the event an application does not supply such function or
-    table, the facility still operates, and performs whatever part of
-    its purpose remains meaningful.
-
-    (For example, a function in a library to compute square roots has
-    a purpose that is entirely well-defined independent of the
-    application.  Therefore, Subsection 2d requires that any
-    application-supplied function or table used by this function must
-    be optional: if the application does not supply it, the square
-    root function must still compute square roots.)
-
-These requirements apply to the modified work as a whole.  If
-identifiable sections of that work are not derived from the Library,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works.  But when you
-distribute the same sections as part of a whole which is a work based
-on the Library, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote
-it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Library.
-
-In addition, mere aggregation of another work not based on the Library
-with the Library (or with a work based on the Library) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-  3. You may opt to apply the terms of the ordinary GNU General Public
-License instead of this License to a given copy of the Library.  To do
-this, you must alter all the notices that refer to this License, so
-that they refer to the ordinary GNU General Public License, version 2,
-instead of to this License.  (If a newer version than version 2 of the
-ordinary GNU General Public License has appeared, then you can specify
-that version instead if you wish.)  Do not make any other change in
-these notices.
-
-  Once this change is made in a given copy, it is irreversible for
-that copy, so the ordinary GNU General Public License applies to all
-subsequent copies and derivative works made from that copy.
-
-  This option is useful when you wish to copy part of the code of
-the Library into a program that is not a library.
-
-  4. You may copy and distribute the Library (or a portion or
-derivative of it, under Section 2) in object code or executable form
-under the terms of Sections 1 and 2 above provided that you accompany
-it with the complete corresponding machine-readable source code, which
-must be distributed under the terms of Sections 1 and 2 above on a
-medium customarily used for software interchange.
-
-  If distribution of object code is made by offering access to copy
-from a designated place, then offering equivalent access to copy the
-source code from the same place satisfies the requirement to
-distribute the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-  5. A program that contains no derivative of any portion of the
-Library, but is designed to work with the Library by being compiled or
-linked with it, is called a "work that uses the Library".  Such a
-work, in isolation, is not a derivative work of the Library, and
-therefore falls outside the scope of this License.
-
-  However, linking a "work that uses the Library" with the Library
-creates an executable that is a derivative of the Library (because it
-contains portions of the Library), rather than a "work that uses the
-library".  The executable is therefore covered by this License.
-Section 6 states terms for distribution of such executables.
-
-  When a "work that uses the Library" uses material from a header file
-that is part of the Library, the object code for the work may be a
-derivative work of the Library even though the source code is not.
-Whether this is true is especially significant if the work can be
-linked without the Library, or if the work is itself a library.  The
-threshold for this to be true is not precisely defined by law.
-
-  If such an object file uses only numerical parameters, data
-structure layouts and accessors, and small macros and small inline
-functions (ten lines or less in length), then the use of the object
-file is unrestricted, regardless of whether it is legally a derivative
-work.  (Executables containing this object code plus portions of the
-Library will still fall under Section 6.)
-
-  Otherwise, if the work is a derivative of the Library, you may
-distribute the object code for the work under the terms of Section 6.
-Any executables containing that work also fall under Section 6,
-whether or not they are linked directly with the Library itself.
-
-  6. As an exception to the Sections above, you may also combine or
-link a "work that uses the Library" with the Library to produce a
-work containing portions of the Library, and distribute that work
-under terms of your choice, provided that the terms permit
-modification of the work for the customer's own use and reverse
-engineering for debugging such modifications.
-
-  You must give prominent notice with each copy of the work that the
-Library is used in it and that the Library and its use are covered by
-this License.  You must supply a copy of this License.  If the work
-during execution displays copyright notices, you must include the
-copyright notice for the Library among them, as well as a reference
-directing the user to the copy of this License.  Also, you must do one
-of these things:
-
-    a) Accompany the work with the complete corresponding
-    machine-readable source code for the Library including whatever
-    changes were used in the work (which must be distributed under
-    Sections 1 and 2 above); and, if the work is an executable linked
-    with the Library, with the complete machine-readable "work that
-    uses the Library", as object code and/or source code, so that the
-    user can modify the Library and then relink to produce a modified
-    executable containing the modified Library.  (It is understood
-    that the user who changes the contents of definitions files in the
-    Library will not necessarily be able to recompile the application
-    to use the modified definitions.)
-
-    b) Use a suitable shared library mechanism for linking with the
-    Library.  A suitable mechanism is one that (1) uses at run time a
-    copy of the library already present on the user's computer system,
-    rather than copying library functions into the executable, and (2)
-    will operate properly with a modified version of the library, if
-    the user installs one, as long as the modified version is
-    interface-compatible with the version that the work was made with.
-
-    c) Accompany the work with a written offer, valid for at
-    least three years, to give the same user the materials
-    specified in Subsection 6a, above, for a charge no more
-    than the cost of performing this distribution.
-
-    d) If distribution of the work is made by offering access to copy
-    from a designated place, offer equivalent access to copy the above
-    specified materials from the same place.
-
-    e) Verify that the user has already received a copy of these
-    materials or that you have already sent this user a copy.
-
-  For an executable, the required form of the "work that uses the
-Library" must include any data and utility programs needed for
-reproducing the executable from it.  However, as a special exception,
-the materials to be distributed need not include anything that is
-normally distributed (in either source or binary form) with the major
-components (compiler, kernel, and so on) of the operating system on
-which the executable runs, unless that component itself accompanies
-the executable.
-
-  It may happen that this requirement contradicts the license
-restrictions of other proprietary libraries that do not normally
-accompany the operating system.  Such a contradiction means you cannot
-use both them and the Library together in an executable that you
-distribute.
-
-  7. You may place library facilities that are a work based on the
-Library side-by-side in a single library together with other library
-facilities not covered by this License, and distribute such a combined
-library, provided that the separate distribution of the work based on
-the Library and of the other library facilities is otherwise
-permitted, and provided that you do these two things:
-
-    a) Accompany the combined library with a copy of the same work
-    based on the Library, uncombined with any other library
-    facilities.  This must be distributed under the terms of the
-    Sections above.
-
-    b) Give prominent notice with the combined library of the fact
-    that part of it is a work based on the Library, and explaining
-    where to find the accompanying uncombined form of the same work.
-
-  8. You may not copy, modify, sublicense, link with, or distribute
-the Library except as expressly provided under this License.  Any
-attempt otherwise to copy, modify, sublicense, link with, or
-distribute the Library is void, and will automatically terminate your
-rights under this License.  However, parties who have received copies,
-or rights, from you under this License will not have their licenses
-terminated so long as such parties remain in full compliance.
-
-  9. You are not required to accept this License, since you have not
-signed it.  However, nothing else grants you permission to modify or
-distribute the Library or its derivative works.  These actions are
-prohibited by law if you do not accept this License.  Therefore, by
-modifying or distributing the Library (or any work based on the
-Library), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Library or works based on it.
-
-  10. Each time you redistribute the Library (or any work based on the
-Library), the recipient automatically receives a license from the
-original licensor to copy, distribute, link with or modify the Library
-subject to these terms and conditions.  You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties with
-this License.
-
-  11. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Library at all.  For example, if a patent
-license would not permit royalty-free redistribution of the Library by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Library.
-
-If any portion of this section is held invalid or unenforceable under any
-particular circumstance, the balance of the section is intended to apply,
-and the section as a whole is intended to apply in other circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system which is
-implemented by public license practices.  Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-  12. If the distribution and/or use of the Library is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Library under this License may add
-an explicit geographical distribution limitation excluding those countries,
-so that distribution is permitted only in or among countries not thus
-excluded.  In such case, this License incorporates the limitation as if
-written in the body of this License.
-
-  13. The Free Software Foundation may publish revised and/or new
-versions of the Lesser General Public License from time to time.
-Such new versions will be similar in spirit to the present version,
-but may differ in detail to address new problems or concerns.
-
-Each version is given a distinguishing version number.  If the Library
-specifies a version number of this License which applies to it and
-"any later version", you have the option of following the terms and
-conditions either of that version or of any later version published by
-the Free Software Foundation.  If the Library does not specify a
-license version number, you may choose any version ever published by
-the Free Software Foundation.
-
-  14. If you wish to incorporate parts of the Library into other free
-programs whose distribution conditions are incompatible with these,
-write to the author to ask for permission.  For software which is
-copyrighted by the Free Software Foundation, write to the Free
-Software Foundation; we sometimes make exceptions for this.  Our
-decision will be guided by the two goals of preserving the free status
-of all derivatives of our free software and of promoting the sharing
-and reuse of software generally.
-
-			    NO WARRANTY
-
-  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
-WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
-EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
-OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
-KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
-LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
-THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
-WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
-AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
-FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
-CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
-LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
-RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
-FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
-SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGES.
-
-		     END OF TERMS AND CONDITIONS
-
-           How to Apply These Terms to Your New Libraries
-
-  If you develop a new library, and you want it to be of the greatest
-possible use to the public, we recommend making it free software that
-everyone can redistribute and change.  You can do so by permitting
-redistribution under these terms (or, alternatively, under the terms of the
-ordinary General Public License).
-
-  To apply these terms, attach the following notices to the library.  It is
-safest to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least the
-"copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the library's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-Also add information on how to contact you by electronic and paper mail.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the library, if
-necessary.  Here is a sample; alter the names:
-
-  Yoyodyne, Inc., hereby disclaims all copyright interest in the
-  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
-
-  <signature of Ty Coon>, 1 April 1990
-  Ty Coon, President of Vice
-
-That's all there is to it!
-
-
--- a/docs/Lupy-0.2.1/README.txt	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-Lupy full text indexer r0.2.1
------------------------------
-
-**What is Lupy?**
-  Lupy is a port of the excellent Jakarta Lucene 1.2 into 
-  Python. 
-
-**What can I do with Lupy?**
-  Lupy is a full text indexer and search engine. It can be used to
-  index text documents such as web pages, source code, email, etc.
-
-**What is in this release?**
-  Most of Lucene 1.2 is in Lupy 0.2. Lupy supports text indexing
-  producing files that are binary compatible with Lucene. Index
-  creation, update and searching are supported.
-
-  This release supports TermQuery, PhraseQuery and BooleanQuery.
-
-**What is not in this release?**
-  There is no locking or synchronization.
-
-  The query parser has not been ported, nor all of the analysis/doc
-  parsing classes. Queries can be built using the basic building blocks.
-
-  Tokenization is done with a simple regexp; there is no stop-lists,
-  Porter stemming, StandardAnalyzer or German analyzer.
-
-  This release does not contain the following queries:
-  
-  - QueryParser
-  - MultiTermQuery
-  - FuzzyQuery
-  - WildCardQuery
-  - PrefixQuery
-  - RangeQuery
-  - Sloppy phrase queries
-
-  DateField has not been ported.
-
-  Merging of multiple multi-segment indices is not supported.
-
-**How do I get started?**
-  Look in the examples directory.
-
-  Most of the Lucene documentation is relevant to Lupy:
- 
-  - http://jakarta.apache.org/lucene
-  - http://www.onjava.com/pub/a/onjava/2003/01/15/lucene.html
-  - http://darksleep.com/lucene/
-
-**Performance**
-  Java is faster.
-
-
-**Acknowledgements**
-  Many thanks to Doug Cutting and the Jakarta Lucene team for building
-  and enhancing such a high quality piece of open source software.
-
-  Glyph Lefkowitz for serving as my language guru for Python and Java.
-
-  Allen Short did the refactoring for the 0.2 release.
-  
-  I hope you find what you are searching for ;-)
-  amir@divmod.org
--- a/docs/Lupy-0.2.1/releasenotes.txt	Sat Jun 10 16:45:05 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-0.2.0 Release notes
-
-This release brings major reorganization of the code, grouping classes
-into larger modules instead of the original Java style, as well as
-rewriting several of the classes to be more Pythonic, removing
-extraneous data structures and so forth; overall, the code has been
-reduced by 20%. The public interface, indexer.py, has not changed;
-other classes have not been changed significantly, other than being
-moved to new modules.
-
-Also, this release changes the interface for analyzers: they are now
-iterable objects that take one argument, the string to be tokenized,
-and produce tokens, rather than the analysis classes ported from
-Lucene. This improves performance while simplifying the code. If an
-analyzer is not specified, lupy.index.documentwriter.standardTokenizer
-is used. The regex used by that generator is re.compile("\\w+", re.U),
-and the tokens are downcased before being stored.
-
-Along with this improvement in tokenization comes better Unicode
-support; all text is now handled as Unicode strings.  There is a
-simple test for the indexing and retrieval of documents containing
-non-ASCII data.
--- a/setup.py	Sat Jun 10 16:45:05 2006 +0200
+++ b/setup.py	Sat Jun 10 16:52:04 2006 +0200
@@ -216,9 +216,9 @@
         'MoinMoin.script.cli',
         'MoinMoin.script.export',
         'MoinMoin.script.import',
+        'MoinMoin.script.index',
         'MoinMoin.script.maint',
         'MoinMoin.script.migration',
-        'MoinMoin.script.lupy',
         'MoinMoin.script.old',
         'MoinMoin.script.old.migration',
         'MoinMoin.script.old.xmlrpc-tools',
@@ -226,9 +226,7 @@
         'MoinMoin.server',
         'MoinMoin.stats',
         'MoinMoin.support',
-        'MoinMoin.support.lupy',
-        'MoinMoin.support.lupy.index',
-        'MoinMoin.support.lupy.search',
+        'MoinMoin.support.xapwrap',
         'MoinMoin.theme',
         'MoinMoin.util',
         'MoinMoin.widget',