changeset 937:1da203931dc7

Merge with main.
author Alexander Schremmer <alex AT alexanderweb DOT de>
date Sun, 09 Jul 2006 22:39:15 +0200
parents d59c1af01ccb (current diff) 9dcfb8f36524 (diff)
children 8effe95df6f0
files MoinMoin/Xapian.py MoinMoin/search.py
diffstat 54 files changed, 3096 insertions(+), 2735 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/PageEditor.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/PageEditor.py	Sun Jul 09 22:39:15 2006 +0200
@@ -996,8 +996,8 @@
                 msg = msg + self._notifySubscribers(comment, trivial)
           
             if self.request.cfg.xapian_search:
-                from MoinMoin import Xapian
-                index = Xapian.Index(self.request)
+                from MoinMoin.search.Xapian import Index
+                index = Index(self.request)
                 # When we have automatic index building, we can add to
                 # the queue even if the index is missing.
                 if index.exists():
--- a/MoinMoin/Xapian.py	Sun Jul 09 15:31:02 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,765 +0,0 @@
-# -*- coding: iso-8859-1 -*-
-"""
-    MoinMoin - xapian indexing search engine
-
-    @copyright: 2006 MoinMoin:ThomasWaldmann,
-                2006 MoinMoin:FranzPletz
-    @license: GNU GPL, see COPYING for details.
-"""
-debug = True
-
-import sys, os, re, codecs, errno, time
-from pprint import pprint
-
-import xapian
-from xapian import Query
-from MoinMoin.support.xapwrap import document as xapdoc
-from MoinMoin.support.xapwrap import index as xapidx
-from MoinMoin.parser.text_moin_wiki import Parser as WikiParser
-
-from MoinMoin.Page import Page
-from MoinMoin import config, wikiutil
-from MoinMoin.util import filesys, lock
-
-try:
-    # PyStemmer, snowball python bindings from http://snowball.tartarus.org/
-    from Stemmer import Stemmer
-    use_stemming = True
-except ImportError:
-    use_stemming = False
-
-class UnicodeQuery(xapian.Query):
-    def __init__(self, *args, **kwargs):
-        self.encoding = kwargs.get('encoding', config.charset)
-
-        nargs = []
-        for term in args:
-            if isinstance(term, unicode):
-                term = term.encode(self.encoding)
-            elif isinstance(term, list) or isinstance(term, tuple):
-                term = [t.encode(self.encoding) for t in term]
-            nargs.append(term)
-
-        xapian.Query.__init__(self, *nargs, **kwargs)
-
-
-##############################################################################
-### Tokenizer
-##############################################################################
-
-def getWikiAnalyzerFactory(language='en'):
-    return (lambda: WikiAnalyzer(language))
-
-class WikiAnalyzer:
-    singleword = r"[%(u)s][%(l)s]+" % {
-                     'u': config.chars_upper,
-                     'l': config.chars_lower,
-                 }
-
-    singleword_re = re.compile(singleword, re.U)
-    wikiword_re = re.compile(WikiParser.word_rule, re.U)
-
-    token_re = re.compile(
-        r"(?P<company>\w+[&@]\w+)|" + # company names like AT&T and Excite@Home.
-        r"(?P<email>\w+([.-]\w+)*@\w+([.-]\w+)*)|" +    # email addresses
-        r"(?P<hostname>\w+(\.\w+)+)|" +                 # hostnames
-        r"(?P<num>(\w+[-/.,])*\w*\d\w*([-/.,]\w+)*)|" + # version numbers
-        r"(?P<acronym>(\w\.)+)|" +          # acronyms: U.S.A., I.B.M., etc.
-        r"(?P<word>\w+)",                   # words (including WikiWords)
-        re.U)
-
-    dot_re = re.compile(r"[-_/,.]")
-    mail_re = re.compile(r"[-_/,.]|(@)")
-    
-    # XXX limit stuff above to xapdoc.MAX_KEY_LEN
-    # WORD_RE = re.compile('\\w{1,%i}' % MAX_KEY_LEN, re.U)
-
-    def __init__(self, language=None):
-        if use_stemming and language:
-            self.stemmer = Stemmer(language)
-        else:
-            self.stemmer = None
-
-    def raw_tokenize(self, value):
-        def enc(uc):
-            """ 'encode' unicode results into whatever xapian / xapwrap wants """
-            lower = uc.lower()
-            return lower
-            
-        if isinstance(value, list): # used for page links
-            for v in value:
-                yield enc(v)
-        else:
-            tokenstream = re.finditer(self.token_re, value)
-            for m in tokenstream:
-                if m.group("acronym"):
-                    yield enc(m.group("acronym").replace('.', ''))
-                elif m.group("company"):
-                    yield enc(m.group("company"))
-                elif m.group("email"):
-                    for word in self.mail_re.split(m.group("email")):
-                        if word:
-                            yield enc(word)
-                elif m.group("hostname"):
-                    for word in self.dot_re.split(m.group("hostname")):
-                        yield enc(word)
-                elif m.group("num"):
-                    for word in self.dot_re.split(m.group("num")):
-                        yield enc(word)
-                elif m.group("word"):
-                    word = m.group("word")
-                    yield enc(word)
-                    # if it is a CamelCaseWord, we additionally yield Camel, Case and Word
-                    if self.wikiword_re.match(word):
-                        for sm in re.finditer(self.singleword_re, word):
-                            yield enc(sm.group())
-
-    def tokenize(self, value, flat_stemming=True):
-        """Yield a stream of lower cased raw and stemmed (optional) words from a string.
-           value must be an UNICODE object or a list of unicode objects
-        """
-        for i in self.raw_tokenize(value):
-            if flat_stemming:
-                yield i # XXX: should we really use a prefix for that? Index.prefixMap['raw'] + i
-                if self.stemmer:
-                    yield self.stemmer.stemWord(i)
-            else:
-                yield (i, self.stemmer.stemWord(i))
-
-
-#############################################################################
-### Indexing
-#############################################################################
-
-class UpdateQueue:
-    def __init__(self, file, lock_dir):
-        self.file = file
-        self.writeLock = lock.WriteLock(lock_dir, timeout=10.0)
-        self.readLock = lock.ReadLock(lock_dir, timeout=10.0)
-
-    def exists(self):
-        return os.path.exists(self.file)
-
-    def append(self, pagename):
-        """ Append a page to queue """
-        if not self.writeLock.acquire(60.0):
-            request.log("can't add %r to xapian update queue: can't lock queue" %
-                        pagename)
-            return
-        try:
-            f = codecs.open(self.file, 'a', config.charset)
-            try:
-                f.write(pagename + "\n")
-            finally:
-                f.close()
-        finally:
-            self.writeLock.release()
-
-    def pages(self):
-        """ Return list of pages in the queue """
-        if self.readLock.acquire(1.0):
-            try:
-                return self._decode(self._read())
-            finally:
-                self.readLock.release()
-        return []
-
-    def remove(self, pages):
-        """ Remove pages from the queue
-        
-        When the queue is empty, the queue file is removed, so exists()
-        can tell if there is something waiting in the queue.
-        """
-        if self.writeLock.acquire(30.0):
-            try:
-                queue = self._decode(self._read())
-                for page in pages:
-                    try:
-                        queue.remove(page)
-                    except ValueError:
-                        pass
-                if queue:
-                    self._write(queue)
-                else:
-                    self._removeFile()
-                return True
-            finally:
-                self.writeLock.release()
-        return False
-
-    # Private -------------------------------------------------------
-
-    def _decode(self, data):
-        """ Decode queue data """
-        pages = data.splitlines()
-        return self._filterDuplicates(pages)
-
-    def _filterDuplicates(self, pages):
-        """ Filter duplicates in page list, keeping the order """
-        unique = []
-        seen = {}
-        for name in pages:
-            if not name in seen:
-                unique.append(name)
-                seen[name] = 1
-        return unique
-
-    def _read(self):
-        """ Read and return queue data
-        
-        This does not do anything with the data so we can release the
-        lock as soon as possible, enabling others to update the queue.
-        """
-        try:
-            f = codecs.open(self.file, 'r', config.charset)
-            try:
-                return f.read()
-            finally:
-                f.close()
-        except (OSError, IOError), err:
-            if err.errno != errno.ENOENT:
-                raise
-            return ''
-
-    def _write(self, pages):
-        """ Write pages to queue file
-        
-        Requires queue write locking.
-        """
-        # XXX use tmpfile/move for atomic replace on real operating systems
-        data = '\n'.join(pages) + '\n'
-        f = codecs.open(self.file, 'w', config.charset)
-        try:
-            f.write(data)
-        finally:
-            f.close()
-
-    def _removeFile(self):
-        """ Remove queue file 
-        
-        Requires queue write locking.
-        """
-        try:
-            os.remove(self.file)
-        except OSError, err:
-            if err.errno != errno.ENOENT:
-                raise
-
-
-class Index:
-    indexValueMap = {
-        # mapping the value names we can easily fetch from the index to
-        # integers required by xapian. 0 and 1 are reserved by xapwrap!
-        'pagename': 2,
-        'attachment': 3,
-        'mtime': 4,
-        'wikiname': 5,
-    }
-    prefixMap = {
-        # http://svn.xapian.org/*checkout*/trunk/xapian-applications/omega/docs/termprefixes.txt
-        'author': 'A',
-        'date':   'D', # numeric format: YYYYMMDD or "latest" - e.g. D20050224 or Dlatest
-                       #G   newsGroup (or similar entity - e.g. a web forum name)
-        'hostname': 'H',
-        'keyword': 'K',
-        'lang': 'L',   # ISO Language code
-                       #M   Month (numeric format: YYYYMM)
-                       #N   ISO couNtry code (or domaiN name)
-                       #P   Pathname
-                       #Q   uniQue id
-        'raw':  'R',   # Raw (i.e. unstemmed) term
-        'title': 'S',  # Subject (or title)
-        'mimetype': 'T',
-        'url': 'U',    # full URL of indexed document - if the resulting term would be > 240
-                       # characters, a hashing scheme is used to prevent overflowing
-                       # the Xapian term length limit (see omindex for how to do this).
-                       #W   "weak" (approximately 10 day intervals, taken as YYYYMMD from
-                       #  the D term, and changing the last digit to a '2' if it's a '3')
-                       #X   longer prefix for user-defined use
-        'linkto': 'XLINKTO', # this document links to that document
-        'stem_lang': 'XSTEMLANG', # ISO Language code this document was stemmed in 
-                       #Y   year (four digits)
-    }
-
-    class LockedException(Exception):
-        pass
-    
-    def __init__(self, request):
-        self.request = request
-        cache_dir = request.cfg.cache_dir
-        main_dir = self._main_dir()
-        self.dir = os.path.join(main_dir, 'index')
-        filesys.makeDirs(self.dir)
-        self.sig_file = os.path.join(main_dir, 'complete')
-        lock_dir = os.path.join(main_dir, 'index-lock')
-        self.lock = lock.WriteLock(lock_dir,
-                                   timeout=3600.0, readlocktimeout=60.0)
-        self.read_lock = lock.ReadLock(lock_dir, timeout=3600.0)
-        self.queue = UpdateQueue(os.path.join(main_dir, 'update-queue'),
-                                 os.path.join(main_dir, 'update-queue-lock'))
-
-        # Disabled until we have a sane way to build the index with a
-        # queue in small steps.
-        ## if not self.exists():
-        ##    self.indexPagesInNewThread(request)
-
-    def _main_dir(self):
-        if self.request.cfg.xapian_index_dir:
-            return os.path.join(self.request.cfg.xapian_index_dir,
-                    self.request.cfg.siteid)
-        else:
-            return os.path.join(self.request.cfg.cache_dir, 'xapian')
-
-    def exists(self):
-        """ Check if index exists """        
-        return os.path.exists(self.sig_file)
-                
-    def mtime(self):
-        return os.path.getmtime(self.dir)
-
-    def _search(self, query):
-        """ read lock must be acquired """
-        while True:
-            try:
-                searcher, timestamp = self.request.cfg.xapian_searchers.pop()
-                if timestamp != self.mtime():
-                    searcher.close()
-                else:
-                    break
-            except IndexError:
-                searcher = xapidx.ReadOnlyIndex(self.dir)
-                searcher.configure(self.prefixMap, self.indexValueMap)
-                timestamp = self.mtime()
-                break
-        
-        hits = searcher.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname'])
-        self.request.cfg.xapian_searchers.append((searcher, timestamp))
-        return hits
-    
-    def search(self, query):
-        if not self.read_lock.acquire(1.0):
-            raise self.LockedException
-        try:
-            hits = self._search(query)
-        finally:
-            self.read_lock.release()
-        return hits
-
-    def update_page(self, page):
-        self.queue.append(page.page_name)
-        self._do_queued_updates_InNewThread()
-
-    def indexPages(self, files=None, mode='update'):
-        """ Index all pages (and files, if given)
-        
-        Can be called only from a script. To index pages during a user
-        request, use indexPagesInNewThread.
-        @arg files: iterator or list of files to index additionally
-        """
-        if not self.lock.acquire(1.0):
-            self.request.log("can't index: can't acquire lock")
-            return
-        try:
-            request = self._indexingRequest(self.request)
-            self._index_pages(request, None, files, mode)
-        finally:
-            self.lock.release()
-    
-    def indexPagesInNewThread(self, files=None, mode='update'):
-        """ Index all pages in a new thread
-        
-        Should be called from a user request. From a script, use indexPages.
-        """
-        if not self.lock.acquire(1.0):
-            self.request.log("can't index: can't acquire lock")
-            return
-        try:
-            # Prevent rebuilding the index just after it was finished
-            if self.exists():
-                self.lock.release()
-                return
-            from threading import Thread
-            indexThread = Thread(target=self._index_pages,
-                args=(self._indexingRequest(self.request), self.lock, files, mode))
-            indexThread.setDaemon(True)
-            
-            # Join the index thread after current request finish, prevent
-            # Apache CGI from killing the process.
-            def joinDecorator(finish):
-                def func():
-                    finish()
-                    indexThread.join()
-                return func
-
-            self.request.finish = joinDecorator(self.request.finish)
-            indexThread.start()
-        except:
-            self.lock.release()
-            raise
-
-    def optimize(self):
-        pass
-
-    # Private ----------------------------------------------------------------
-
-    def _do_queued_updates_InNewThread(self):
-        """ do queued index updates in a new thread
-        
-        Should be called from a user request. From a script, use indexPages.
-        """
-        if not self.lock.acquire(1.0):
-            self.request.log("can't index: can't acquire lock")
-            return
-        try:
-            from threading import Thread
-            indexThread = Thread(target=self._do_queued_updates,
-                args=(self._indexingRequest(self.request), self.lock))
-            indexThread.setDaemon(True)
-            
-            # Join the index thread after current request finish, prevent
-            # Apache CGI from killing the process.
-            def joinDecorator(finish):
-                def func():
-                    finish()
-                    indexThread.join()
-                return func
-                
-            self.request.finish = joinDecorator(self.request.finish)
-            indexThread.start()
-        except:
-            self.lock.release()
-            raise
-
-    def _do_queued_updates(self, request, lock=None, amount=5):
-        """ Assumes that the write lock is acquired """
-        try:
-            writer = xapidx.Index(self.dir, True)
-            writer.configure(self.prefixMap, self.indexValueMap)
-            pages = self.queue.pages()[:amount]
-            for name in pages:
-                p = Page(request, name)
-                self._index_page(writer, p, mode='update')
-                self.queue.remove([name])
-        finally:
-            writer.close()
-            if lock:
-                lock.release()
-
-    def contentfilter(self, filename):
-        """ Get a filter for content of filename and return unicode content. """
-        request = self.request
-        mt = wikiutil.MimeType(filename=filename)
-        for modulename in mt.module_name():
-            try:
-                execute = wikiutil.importPlugin(request.cfg, 'filter', modulename)
-                break
-            except wikiutil.PluginMissingError:
-                pass
-            else:
-                request.log("Cannot load filter for mimetype." + modulename)
-        try:
-            data = execute(self, filename)
-            if debug:
-                request.log("Filter %s returned %d characters for file %s" % (modulename, len(data), filename))
-        except (OSError, IOError), err:
-            data = ''
-            request.log("Filter %s threw error '%s' for file %s" % (modulename, str(err), filename))
-        return mt.mime_type(), data
-   
-    def test(self, request):
-        idx = xapidx.ReadOnlyIndex(self.dir)
-        idx.configure(self.prefixMap, self.indexValueMap)
-        print idx.search("is")
-        #for d in docs:
-        #    request.log("%r %r %r" % (d, d.get('attachment'), d.get('pagename')))
-
-    def _index_file(self, request, writer, filename, mode='update'):
-        """ index a file as it were a page named pagename
-            Assumes that the write lock is acquired
-        """
-        fs_rootpage = 'FS' # XXX FS hardcoded
-        try:
-            wikiname = request.cfg.interwikiname or 'Self'
-            itemid = "%s:%s" % (wikiname, os.path.join(fs_rootpage, filename))
-            mtime = os.path.getmtime(filename)
-            mtime = wikiutil.timestamp2version(mtime)
-            if mode == 'update':
-                query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', itemid))
-                docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
-                if docs:
-                    doc = docs[0] # there should be only one
-                    uid = doc['uid']
-                    docmtime = long(doc['values']['mtime'])
-                    updated = mtime > docmtime
-                    if debug: request.log("uid %r: mtime %r > docmtime %r == updated %r" % (uid, mtime, docmtime, updated))
-                else:
-                    uid = None
-                    updated = True
-            elif mode == 'add':
-                updated = True
-            if debug: request.log("%s %r" % (filename, updated))
-            if updated:
-                xitemid = xapdoc.Keyword('itemid', itemid)
-                mimetype, file_content = self.contentfilter(filename)
-                xwname = xapdoc.SortKey('wikiname', request.cfg.interwikiname or "Self")
-                xpname = xapdoc.SortKey('pagename', fs_rootpage)
-                xattachment = xapdoc.SortKey('attachment', filename) # XXX we should treat files like real pages, not attachments
-                xmtime = xapdoc.SortKey('mtime', mtime)
-                title = " ".join(os.path.join(fs_rootpage, filename).split("/"))
-                xtitle = xapdoc.Keyword('title', title)
-                xmimetype = xapdoc.TextField('mimetype', mimetype, True)
-                xcontent = xapdoc.TextField('content', file_content)
-                doc = xapdoc.Document(textFields=(xcontent, xmimetype, ),
-                                      keywords=(xtitle, xitemid, ),
-                                      sortFields=(xpname, xattachment, xmtime, xwname, ),
-                                     )
-                doc.analyzerFactory = getWikiAnalyzerFactory()
-                if mode == 'update':
-                    if debug: request.log("%s (replace %r)" % (filename, uid))
-                    doc.uid = uid
-                    id = writer.index(doc)
-                elif mode == 'add':
-                    if debug: request.log("%s (add)" % (filename,))
-                    id = writer.index(doc)
-        except (OSError, IOError), err:
-            pass
-
-    def _get_languages(self, page):
-        body = page.get_raw_body()
-        default_lang = page.request.cfg.language_default
-
-        lang = ''
-
-        if use_stemming:
-            for line in body.split('\n'):
-                if line.startswith('#language'):
-                    lang = line.split(' ')[1]
-                    try:
-                        Stemmer(lang)
-                    except KeyError:
-                        # lang is not stemmable
-                        break
-                    else:
-                        # lang is stemmable
-                        return (lang, lang)
-                elif not line.startswith('#'):
-                    break
-        
-        if not lang:
-            # no lang found at all.. fallback to default language
-            lang = default_lang
-
-        # return actual lang and lang to stem in
-        return (lang, default_lang)
-
-    def _index_page(self, writer, page, mode='update'):
-        """ Index a page - assumes that the write lock is acquired
-            @arg writer: the index writer object
-            @arg page: a page object
-            @arg mode: 'add' = just add, no checks
-                       'update' = check if already in index and update if needed (mtime)
-            
-        """
-        request = page.request
-        wikiname = request.cfg.interwikiname or "Self"
-        pagename = page.page_name
-        mtime = page.mtime_usecs()
-        itemid = "%s:%s" % (wikiname, pagename)
-        # XXX: Hack until we get proper metadata
-        language, stem_language = self._get_languages(page)
-        updated = False
-
-        if mode == 'update':
-            # from #xapian: if you generate a special "unique id" term,
-            # you can just call database.replace_document(uid_term, doc)
-            # -> done in xapwrap.index.Index.index()
-            query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', itemid))
-            docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
-            if docs:
-                doc = docs[0] # there should be only one
-                uid = doc['uid']
-                docmtime = long(doc['values']['mtime'])
-                updated = mtime > docmtime
-                if debug: request.log("uid %r: mtime %r > docmtime %r == updated %r" % (uid, mtime, docmtime, updated))
-            else:
-                uid = None
-                updated = True
-        elif mode == 'add':
-            updated = True
-        if debug: request.log("%s %r" % (pagename, updated))
-        if updated:
-            xwname = xapdoc.SortKey('wikiname', request.cfg.interwikiname or "Self")
-            xpname = xapdoc.SortKey('pagename', pagename)
-            xattachment = xapdoc.SortKey('attachment', '') # this is a real page, not an attachment
-            xmtime = xapdoc.SortKey('mtime', mtime)
-            xtitle = xapdoc.TextField('title', pagename, True) # prefixed
-            xkeywords = [xapdoc.Keyword('itemid', itemid),
-                    xapdoc.Keyword('lang', language),
-                    xapdoc.Keyword('stem_lang', stem_language)]
-            for pagelink in page.getPageLinks(request):
-                xkeywords.append(xapdoc.Keyword('linkto', pagelink))
-            xcontent = xapdoc.TextField('content', page.get_raw_body())
-            doc = xapdoc.Document(textFields=(xcontent, xtitle),
-                                  keywords=xkeywords,
-                                  sortFields=(xpname, xattachment, xmtime, xwname, ),
-                                 )
-            doc.analyzerFactory = getWikiAnalyzerFactory()
-
-            if mode == 'update':
-                if debug: request.log("%s (replace %r)" % (pagename, uid))
-                doc.uid = uid
-                id = writer.index(doc)
-            elif mode == 'add':
-                if debug: request.log("%s (add)" % (pagename,))
-                id = writer.index(doc)
-
-        from MoinMoin.action import AttachFile
-
-        attachments = AttachFile._get_files(request, pagename)
-        for att in attachments:
-            filename = AttachFile.getFilename(request, pagename, att)
-            att_itemid = "%s//%s" % (itemid, att)
-            mtime = wikiutil.timestamp2version(os.path.getmtime(filename))
-            if mode == 'update':
-                query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', att_itemid))
-                docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', ])
-                if debug: request.log("##%r %r" % (filename, docs))
-                if docs:
-                    doc = docs[0] # there should be only one
-                    uid = doc['uid']
-                    docmtime = long(doc['values']['mtime'])
-                    updated = mtime > docmtime
-                    if debug: request.log("uid %r: mtime %r > docmtime %r == updated %r" % (uid, mtime, docmtime, updated))
-                else:
-                    uid = None
-                    updated = True
-            elif mode == 'add':
-                updated = True
-            if debug: request.log("%s %s %r" % (pagename, att, updated))
-            if updated:
-                xatt_itemid = xapdoc.Keyword('itemid', att_itemid)
-                xpname = xapdoc.SortKey('pagename', pagename)
-                xattachment = xapdoc.SortKey('attachment', att) # this is an attachment, store its filename
-                xmtime = xapdoc.SortKey('mtime', mtime)
-                xtitle = xapdoc.Keyword('title', '%s/%s' % (pagename, att))
-                xlanguage = xapdoc.Keyword('lang', language)
-                mimetype, att_content = self.contentfilter(filename)
-                xmimetype = xapdoc.TextField('mimetype', mimetype, True)
-                xcontent = xapdoc.TextField('content', att_content)
-                doc = xapdoc.Document(textFields=(xcontent, xmimetype, ),
-                                      keywords=(xatt_itemid, xtitle, xlanguage, ),
-                                      sortFields=(xpname, xattachment, xmtime, xwname, ),
-                                     )
-                doc.analyzerFactory = getWikiAnalyzerFactory()
-                if mode == 'update':
-                    if debug: request.log("%s (replace %r)" % (pagename, uid))
-                    doc.uid = uid
-                    id = writer.index(doc)
-                elif mode == 'add':
-                    if debug: request.log("%s (add)" % (pagename,))
-                    id = writer.index(doc)
-        #writer.flush()
-        
-
-    def _index_pages(self, request, lock=None, files=None, mode='update'):
-        """ Index all pages (and all given files)
-        
-        This should be called from indexPages or indexPagesInNewThread only!
-        
-        This may take some time, depending on the size of the wiki and speed
-        of the machine.
-
-        When called in a new thread, lock is acquired before the call,
-        and this method must release it when it finishes or fails.
-        """
-        try:
-            self._unsign()
-            start = time.time()
-            writer = xapidx.Index(self.dir, True)
-            writer.configure(self.prefixMap, self.indexValueMap)
-            pages = request.rootpage.getPageList(user='', exists=1)
-            request.log("indexing all (%d) pages..." % len(pages))
-            for pagename in pages:
-                p = Page(request, pagename)
-                self._index_page(writer, p, mode)
-            if files:
-                request.log("indexing all files...")
-                for fname in files:
-                    fname = fname.strip()
-                    self._index_file(request, writer, fname, mode)
-            writer.close()
-            request.log("indexing completed successfully in %0.2f seconds." %
-                        (time.time() - start))
-            self._sign()
-        finally:
-            writer.__del__()
-            if lock:
-                lock.release()
-
-    def _optimize(self, request):
-        """ Optimize the index """
-        pass
-
-    def _indexingRequest(self, request):
-        """ Return a new request that can be used for index building.
-        
-        This request uses a security policy that lets the current user
-        read any page. Without this policy some pages will not render,
-        which will create broken pagelinks index.        
-        """
-        from MoinMoin.request.CLI import Request
-        from MoinMoin.security import Permissions
-        request = Request(request.url)
-        class SecurityPolicy(Permissions):
-            def read(*args, **kw):
-                return True        
-        request.user.may = SecurityPolicy(request.user)
-        return request
-
-    def _unsign(self):
-        """ Remove sig file - assume write lock acquired """
-        try:
-            os.remove(self.sig_file)
-        except OSError, err:
-            if err.errno != errno.ENOENT:
-                raise
-
-    def _sign(self):
-        """ Add sig file - assume write lock acquired """
-        f = file(self.sig_file, 'w')
-        try:
-            f.write('')
-        finally:
-            f.close()
-
-
-def run_query(query, db):
-    enquire = xapian.Enquire(db)
-    parser = xapian.QueryParser()
-    query = parser.parse_query(query, xapian.QueryParser.FLAG_WILDCARD)
-    print query.get_description()
-    enquire.set_query(query)
-    return enquire.get_mset(0, 10)
-
-def run(request):
-    pass
-    #print "Begin"
-    #db = xapian.WritableDatabase(xapian.open('test.db',
-    #                                         xapian.DB_CREATE_OR_OPEN))
-    #
-    # index_data(db) ???
-    #del db
-    #mset = run_query(sys.argv[1], db)
-    #print mset.get_matches_estimated()
-    #iterator = mset.begin()
-    #while iterator != mset.end():
-    #    print iterator.get_document().get_data()
-    #    iterator.next()
-    #for i in xrange(1,170):
-    #    doc = db.get_document(i)
-    #    print doc.get_data()
-
-if __name__ == '__main__':
-    run()
-
-
--- a/MoinMoin/action/__init__.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/action/__init__.py	Sun Jul 09 22:39:15 2006 +0200
@@ -58,11 +58,11 @@
     def is_excluded(self):
         """ Return True if action is excluded """
         return self.actionname in self.cfg.actions_excluded
-    
+
     def is_allowed(self):
         """ Return True if action is allowed (by ACL) """
         return True
-    
+
     def check_condition(self):
         """ Check if some other condition is not allowing us to do that action,
             return error msg or None if there is no problem.
@@ -70,7 +70,7 @@
             You can use this to e.g. check if a page exists.
         """
         return None
-    
+
     def ticket_ok(self):
         """ Return True if we check for tickets and there is some valid ticket
             in the form data or if we don't check for tickets at all.
@@ -82,7 +82,7 @@
         # requiring two full HTTP transactions
         ticket = self.form.get('ticket', [''])[0]
         return wikiutil.checkTicket(ticket)
-    
+
     # UI ---------------------------------------------------------------------
     def get_form_html(self, buttons_html):
         """ Override this to assemble the inner part of the form,
@@ -117,12 +117,12 @@
         for button in buttons:
             buttons_html.append('<input type="submit" name="%s" value="%s">' % button)
         buttons_html = "".join(buttons_html)
-        
+
         if self.use_ticket:
             ticket_html = '<input type="hidden" name="ticket" value="%s">' % wikiutil.createTicket()
         else:
             ticket_html = ''
-            
+
         d = {
             'error_html': error_html,
             'actionname': self.actionname,
@@ -138,7 +138,7 @@
 %(ticket_html)s
 %(user_html)s
 </form>''' % d
-        
+
         return Dialog(self.request, content=form_html)
 
     def render_msg(self, msg):
@@ -152,7 +152,7 @@
     def render_cancel(self):
         """ Called when user has hit the cancel button """
         self.page.send_page(self.request) # we don't tell user he has hit cancel :)
-        
+
     def render(self):
         """ Render action - this is the main function called by action's
             execute() function.
@@ -161,7 +161,7 @@
         """
         _ = self._
         form = self.form
-        
+
         if form.has_key(self.form_cancel):
             self.render_cancel()
             return
@@ -286,7 +286,7 @@
 
     if not request.user.may.revert(pagename):
         return Page(request, pagename).send_page(request,
-            msg = _('You are not allowed to revert this page!'))
+            msg=_('You are not allowed to revert this page!'))
 
     rev = int(request.form['rev'][0])
     revstr = '%08d' % rev
@@ -309,16 +309,16 @@
 
     if not request.user.may.write(pagename):
         Page(request, pagename).send_page(request,
-            msg = _('You are not allowed to edit this page.'))
+            msg=_('You are not allowed to edit this page.'))
         return
 
-    valideditors = ['text', 'gui',]
+    valideditors = ['text', 'gui', ]
     editor = ''
     if request.user.valid:
         editor = request.user.editor_default
     if editor not in valideditors:
         editor = request.cfg.editor_default
-    
+
     editorparam = request.form.get('editor', [editor])[0]
     if editorparam == "guipossible":
         lasteditor = editor
@@ -333,7 +333,7 @@
     # if it is still nothing valid, we just use the text editor
     if editor not in valideditors:
         editor = 'text'
-            
+
     savetext = request.form.get('savetext', [None])[0]
     rev = int(request.form.get('rev', ['0'])[0])
     comment = request.form.get('comment', [u''])[0]
@@ -359,7 +359,7 @@
     if savetext is None:
         pg.sendEditor()
         return
-  
+
     # did user hit cancel button?
     cancelled = request.form.has_key('button_cancel')
 
@@ -368,7 +368,7 @@
     try:
         if lasteditor == 'gui':
             savetext = convert(request, pagename, savetext)
-                
+
         # IMPORTANT: normalize text from the form. This should be done in
         # one place before we manipulate the text.
         savetext = pg.normalizeText(savetext, stripspaces=rstrip)
@@ -389,7 +389,7 @@
     # things behind your back, and in general not needed. Either we have
     # a full interface for categories (add, delete) or just add them by
     # markup.
-    
+
     if category and category != _('<No addition>', formatted=False): # opera 8.5 needs this
         # strip trailing whitespace
         savetext = savetext.rstrip()
@@ -398,11 +398,11 @@
         # non-categories.
         lines = filter(None, savetext.splitlines())
         if lines:
-            
+
             #TODO: this code is broken, will not work for extended links
             #categories, e.g ["category hebrew"]
             categories = lines[-1].split()
-            
+
             if categories:
                 confirmed = wikiutil.filterCategoryPages(request, categories)
                 if len(confirmed) < len(categories):
@@ -419,15 +419,15 @@
         request.form.has_key('button_spellcheck') or
         request.form.has_key('button_newwords')):
         pg.sendEditor(preview=savetext, comment=comment)
-    
+
     # Preview with mode switch
     elif request.form.has_key('button_switch'):
         pg.sendEditor(preview=savetext, comment=comment, staytop=1)
-    
+
     # Save new text
     else:
         try:
-            still_conflict = "/!\ '''Edit conflict" in savetext
+            still_conflict = r"/!\ '''Edit conflict" in savetext
             pg.setConflict(still_conflict)
             savemsg = pg.saveText(savetext, rev, trivial=trivial, comment=comment)
         except pg.EditConflict, e:
@@ -440,7 +440,7 @@
             # We don't send preview when we do merge conflict
             pg.sendEditor(msg=msg, comment=comment)
             return
-        
+
         except pg.SaveError, msg:
             # msg contain a unicode string
             savemsg = unicode(msg)
@@ -490,12 +490,12 @@
             rev1 = int(request.form.get('rev', [-1])[0])
         except StandardError:
             rev1 = -1
- 
+
     # spacing flag?
     ignorews = int(request.form.get('ignorews', [0])[0])
 
     _ = request.getText
-    
+
     # get a list of old revisions, and back out if none are available
     currentpage = Page(request, pagename)
     revisions = currentpage.getRevList()
@@ -520,24 +520,24 @@
 
     request.http_headers()
     request.theme.send_title(_('Diff for "%s"') % (pagename,), pagename=pagename, allow_doubleclick=1)
-  
+
     if rev1 > 0 and rev2 > 0 and rev1 > rev2 or rev1 == 0 and rev2 > 0:
         rev1, rev2 = rev2, rev1
-          
+
     oldrev1, oldcount1 = None, 0
     oldrev2, oldcount2 = None, 0
-    
+
     # get the filename of the version to compare to
     edit_count = 0
     for rev in revisions:
         edit_count += 1
-        if rev <= rev1: 
+        if rev <= rev1:
             oldrev1, oldcount1 = rev, edit_count
-        if rev2 and rev >= rev2: 
+        if rev2 and rev >= rev2:
             oldrev2, oldcount2 = rev, edit_count
         if oldrev1 and oldrev2 or oldrev1 and not rev2:
             break
-    
+
     if rev1 == -1:
         oldpage = Page(request, pagename, rev=revisions[1])
         oldcount1 -= 1
@@ -551,7 +551,7 @@
             oldpage = Page(request, "$EmptyPage$") # hack
             oldpage.set_raw_body("")    # avoid loading from disk
             oldrev1 = 0 # XXX
-              
+
     if rev2 == 0:
         newpage = currentpage
         # oldcount2 is still on init value 0
@@ -562,7 +562,7 @@
             newpage = Page(request, "$EmptyPage$") # hack
             newpage.set_raw_body("")    # avoid loading from disk
             oldrev2 = 0 # XXX
-    
+
     edit_count = abs(oldcount1 - oldcount2)
 
     # this should use the formatter, but there is none?
@@ -572,7 +572,7 @@
     if edit_count > 1:
         request.write(' ' + _('(spanning %d versions)') % (edit_count,))
     request.write('</p>')
-  
+
     if request.user.show_fancy_diff:
         from MoinMoin.util.diff import diff
         request.write(diff(request, oldpage.get_raw_body(), newpage.get_raw_body()))
@@ -617,7 +617,7 @@
         _ = request.getText
 
         request.write('<h2>%s</h2>\n' % _('General Information'))
-        
+
         # show page size
         request.write(("<p>%s</p>" % _("Page size: %d")) % page.size())
 
@@ -635,7 +635,7 @@
             request.write(attachment_info(pagename, request))
 
         # show subscribers
-        subscribers = page.getSubscribers(request,  include_self=1, return_users=1)
+        subscribers = page.getSubscribers(request, include_self=1, return_users=1)
         if subscribers:
             request.write('<p>', _('The following users subscribed to this page:'))
             for lang in subscribers.keys():
@@ -668,7 +668,7 @@
         history.columns = [
             Column('rev', label='#', align='right'),
             Column('mtime', label=_('Date'), align='right'),
-            Column('size',  label=_('Size'), align='right'),
+            Column('size', label=_('Size'), align='right'),
             Column('diff', label='<input type="submit" value="%s">' % (_("Diff"))),
             Column('editor', label=_('Editor'), hidden=not request.cfg.show_names),
             Column('comment', label=_('Comment')),
@@ -680,14 +680,14 @@
         versions = len(revisions)
 
         may_revert = request.user.may.revert(pagename)
-        
+
         # read in the complete log of this page
         log = editlog.EditLog(request, rootpagename=pagename)
         count = 0
         for line in log.reverse():
             rev = int(line.rev)
             actions = ""
-            if line.action in ['SAVE','SAVENEW','SAVE/REVERT',]:
+            if line.action in ['SAVE', 'SAVENEW', 'SAVE/REVERT', ]:
                 size = page.size(rev=rev)
                 if count == 0: # latest page
                     actions = '%s&nbsp;%s' % (actions, page.link_to(request,
@@ -714,21 +714,21 @@
                             text=_('revert'),
                             querystr='action=revert&rev=%d' % rev, rel='nofollow'))
                 if count == 0:
-                    rchecked=' checked="checked"'
+                    rchecked = ' checked="checked"'
                     lchecked = ''
                 elif count == 1:
-                    lchecked=' checked="checked"'
+                    lchecked = ' checked="checked"'
                     rchecked = ''
                 else:
                     lchecked = rchecked = ''
-                diff = '<input type="radio" name="rev1" value="%d"%s><input type="radio" name="rev2" value="%d"%s>' % (rev,lchecked,rev,rchecked)
+                diff = '<input type="radio" name="rev1" value="%d"%s><input type="radio" name="rev2" value="%d"%s>' % (rev, lchecked, rev, rchecked)
                 comment = line.comment
-                if not comment and line.action.find('/REVERT') != -1:
+                if not comment and '/REVERT' in line.action:
                         comment = _("Revert to revision %(rev)d.") % {'rev': int(line.extra)}
             else: # ATT*
                 rev = '-'
                 diff = '-'
-                
+
                 filename = wikiutil.url_unquote(line.extra)
                 comment = "%s: %s %s" % (line.action, filename, line.comment)
                 size = 0
@@ -748,7 +748,7 @@
                     elif line.action == 'ATTDRW':
                         actions = '%s&nbsp;%s' % (actions, page.link_to(request,
                             text=_('edit'),
-                            querystr='action=AttachFile&drawing=%s' % filename.replace(".draw",""), rel='nofollow'))
+                            querystr='action=AttachFile&drawing=%s' % filename.replace(".draw", ""), rel='nofollow'))
 
                     actions = '%s&nbsp;%s' % (actions, page.link_to(request,
                         text=_('get'),
@@ -806,22 +806,22 @@
     # this will be automatically fixed.
     lang = page.language or request.cfg.language_default
     request.setContentLanguage(lang)
-    
+
     request.theme.send_title(_('Info for "%s"') % (title,), pagename=pagename)
 
-    historylink =  wikiutil.link_tag(request, '%s?action=info' % qpagename,
+    historylink = wikiutil.link_tag(request, '%s?action=info' % qpagename,
         _('Show "%(title)s"') % {'title': _('Revision History')}, request.formatter, rel='nofollow')
-    generallink =  wikiutil.link_tag(request, '%s?action=info&amp;general=1' % qpagename,
+    generallink = wikiutil.link_tag(request, '%s?action=info&amp;general=1' % qpagename,
         _('Show "%(title)s"') % {'title': _('General Page Infos')}, request.formatter, rel='nofollow')
     hitcountlink = wikiutil.link_tag(request, '%s?action=info&amp;hitcounts=1' % qpagename,
         _('Show chart "%(title)s"') % {'title': _('Page hits and edits')}, request.formatter, rel='nofollow')
-    
+
     request.write('<div id="content">\n') # start content div
     request.write("<p>[%s]  [%s]  [%s]</p>" % (historylink, generallink, hitcountlink))
 
     show_hitcounts = int(request.form.get('hitcounts', [0])[0]) != 0
     show_general = int(request.form.get('general', [0])[0]) != 0
-    
+
     if show_hitcounts:
         from MoinMoin.stats import hitcounts
         request.write(hitcounts.linkto(pagename, request, 'page=' + wikiutil.url_quote_plus(pagename)))
@@ -829,7 +829,7 @@
         general(page, pagename, request)
     else:
         history(page, pagename, request)
-        
+
     request.write('</div>\n') # end content div
     request.theme.send_footer(pagename)
     request.theme.send_closing_html()
@@ -843,10 +843,10 @@
     msg = None
 
     if not request.user.valid:
-        msg = _("You must login to add a quicklink.")    
+        msg = _("You must login to add a quicklink.")
     elif request.user.isQuickLinkedTo([pagename]):
         if request.user.removeQuicklink(pagename):
-            msg = _('Your quicklink to this page has been removed.')            
+            msg = _('Your quicklink to this page has been removed.')
     else:
         if request.user.addQuicklink(pagename):
             msg = _('A quicklink to this page has been added for you.')
@@ -912,13 +912,13 @@
                 tm = wikiutil.timestamp2version(time.time())
     else:
         tm = wikiutil.timestamp2version(time.time())
-  
+
     if tm is None:
         request.user.delBookmark()
     else:
         request.user.setBookmark(tm)
     Page(request, pagename).send_page(request)
-  
+
 
 #############################################################################
 ### Special Actions
@@ -930,22 +930,22 @@
     if not request.user.may.read(pagename):
         msg = _("You are not allowed to view this page.")
         return request.page.send_page(request, msg=msg)
-    
+
     if not request.cfg.chart_options:
         msg = _("Charts are not available!")
         return request.page.send_page(request, msg=msg)
-    
+
     chart_type = request.form.get('type', [''])[0].strip()
     if not chart_type:
         msg = _('You need to provide a chart type!')
         return request.page.send_page(request, msg=msg)
-    
+
     try:
         func = pysupport.importName("MoinMoin.stats." + chart_type, 'draw')
     except (ImportError, AttributeError):
         msg = _('Bad chart type "%s"!') % chart_type
         return request.page.send_page(request, msg=msg)
-    
+
     func(pagename, request)
 
 def do_dumpform(pagename, request):
@@ -978,6 +978,6 @@
         handler = wikiutil.importPlugin(request.cfg, "action", action, identifier)
     except wikiutil.PluginMissingError:
         handler = globals().get('do_' + action)
-        
+
     return handler
 
--- a/MoinMoin/action/fckdialog.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/action/fckdialog.py	Sun Jul 09 22:39:15 2006 +0200
@@ -165,10 +165,7 @@
     from MoinMoin import search
     name = request.form.get("pagename",[""])[0]
     if name:
-        searchresult = search.searchPages(
-            request,
-            search.QueryParser().parse_query('t:"%s"' % name))
-        
+        searchresult = search.searchPages(request, 't:"%s"' % name)
         pages = [p.page_name for p in searchresult.hits]
     else:
         pages = [name]
@@ -209,9 +206,7 @@
     if name:
         from MoinMoin import search
         # XXX error handling!
-        searchresult = search.searchPages(
-            request,
-            search.QueryParser().parse_query('t:"%s"' % name))
+        searchresult = search.searchPages(request, 't:"%s"' % name)
         
         pages = [p.page_name for p in searchresult.hits]
         pages.sort()
@@ -378,9 +373,7 @@
     if name:
         from MoinMoin import search
         # XXX error handling!
-        searchresult = search.searchPages(
-            request,
-            search.QueryParser().parse_query('t:"%s"' % name))
+        searchresult = search.searchPages(request, 't:"%s"' % name)
         
         pages = [p.page_name for p in searchresult.hits]
         pages.sort()
--- a/MoinMoin/action/fullsearch.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/action/fullsearch.py	Sun Jul 09 22:39:15 2006 +0200
@@ -52,14 +52,14 @@
                 'of {{{"%s"}}}') % needle
         # send http headers
         request.http_headers()
-        Page(request, pagename).send_page(request, msg=err) 
+        Page(request, pagename).send_page(request, msg=err)
         return
 
     # search the pages
-    from MoinMoin import search
-    query = search.QueryParser(case=case, regex=regex,
-                               titlesearch=titlesearch).parse_query(needle)
-    results = search.searchPages(request, query)
+    from MoinMoin.search import searchPages, QueryParser
+    query = QueryParser(case=case, regex=regex,
+            titlesearch=titlesearch).parse_query(needle)
+    results = searchPages(request, query)
 
     # directly show a single hit
     # XXX won't work with attachment search
--- a/MoinMoin/caching.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/caching.py	Sun Jul 09 22:39:15 2006 +0200
@@ -13,7 +13,7 @@
 locking = 1
 if locking:
     from MoinMoin.util import lock
-    
+
 class CacheEntry:
     def __init__(self, request, arena, key, scope='page_or_wiki', do_locking=True):
         """ init a cache entry
@@ -49,7 +49,7 @@
             self.lock_dir = os.path.join(self.arena_dir, '__lock__')
             self.rlock = lock.ReadLock(self.lock_dir, 60.0)
             self.wlock = lock.WriteLock(self.lock_dir, 60.0)
-        
+
     def _filename(self):
         return os.path.join(self.arena_dir, self.key)
 
@@ -73,7 +73,7 @@
             return 1
 
         needsupdate = ftime > ctime
-        
+
         # if a page depends on the attachment dir, we check this, too:
         if not needsupdate and attachdir:
             try:
@@ -81,7 +81,7 @@
             except os.error:
                 ftime2 = 0
             needsupdate = ftime2 > ctime
-                
+
         return needsupdate
 
     def copyto(self, filename):
--- a/MoinMoin/error.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/error.py	Sun Jul 09 22:39:15 2006 +0200
@@ -43,7 +43,7 @@
             return unicode(self.message, config.charset)
         else:
             return unicode(self.message)
-    
+
     def __str__(self):
         """ Return encoded message """
         if isinstance(self.message, unicode):
@@ -54,7 +54,7 @@
     def __getitem__(self, item):
         """ Make it possible to access attributes like a dict """
         return getattr(self, item)
-    
+
 
 class CompositeError(Error):
     ''' Base class for exceptions containing an exception
@@ -82,7 +82,7 @@
         """ Save system exception info before this exception is raised """
         Error.__init__(self, message)
         self.innerException = sys.exc_info()
-   
+
     def exceptions(self):
         """ Return a list of all inner exceptions """
         all = [self.innerException]
@@ -94,7 +94,6 @@
                 break
         return all
 
-   
 class FatalError(CompositeError):
     """ Base class for fatal error we can't handle
 
@@ -106,3 +105,4 @@
 
 class InternalError(FatalError):
     """ Raise when internal fatal error is found """
+
--- a/MoinMoin/failure.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/failure.py	Sun Jul 09 22:39:15 2006 +0200
@@ -27,7 +27,7 @@
     developers. This moin specific subclass is an example.
     """
     debugInfoID = 'debug-info'
-    
+
     def formatContent(self):
         content = (
             self.script(),
@@ -39,7 +39,7 @@
             self.formatTextTraceback()
             )
         return ''.join(content)
-    
+
     def script(self):
         return '''
 <script type="text/javascript">
@@ -62,26 +62,26 @@
         f = self.formatter
         text = [self.formatExceptionMessage(self.info),
                 f.paragraph("If you want to report a bug, please save "
-                            "this page and  attach it to your bug report."),]
+                            "this page and  attach it to your bug report."), ]
         return ''.join(text)
 
     def formatButtons(self):
         """ Add 'buttons' to the error dialog """
         f = self.formatter
-        buttons = [f.link('javascript:toggleDebugInfo()', 
+        buttons = [f.link('javascript:toggleDebugInfo()',
                           'Show debugging information'),
-                   f.link('http://moinmoin.wikiwikiweb.de/MoinMoinBugs', 
+                   f.link('http://moinmoin.wikiwikiweb.de/MoinMoinBugs',
                           'Report bug'),
-                   f.link('http://moinmoin.wikiwikiweb.de/FrontPage', 
-                          'Visit MoinMoin wiki'),]
+                   f.link('http://moinmoin.wikiwikiweb.de/FrontPage',
+                          'Visit MoinMoin wiki'), ]
         return f.list(buttons, {'class': 'buttons'})
-    
+
     def formatDebugInfo(self):
         """ Put debugging information in a hidden div """
         attributes = {'id': self.debugInfoID}
         info = [self.debugInfoHideScript(),
                 self.formatTraceback(),
-                self.formatSystemDetails(),]
+                self.formatSystemDetails(), ]
         return self.formatter.section(''.join(info), attributes)
 
     def debugInfoHideScript(self):
@@ -94,16 +94,16 @@
 
     def formatTraceback(self):
         return self.formatAllTracebacks(self.formatOneTraceback)
-        
+
     def formatTextTraceback(self):
         template = self.textTracebackTemplate()
-        return template % self.formatAllTracebacks(self.formatOneTextTraceback)        
+        return template % self.formatAllTracebacks(self.formatOneTextTraceback)
 
     def formatAllTracebacks(self, formatFuction):
         """ Format multiple tracebacks using formatFunction """
         tracebacks = []
         for type, value, tb in self.exceptions():
-            if type is None: 
+            if type is None:
                 break
             tracebacks.append(formatFuction((type, value, tb)))
             del tb
@@ -145,10 +145,10 @@
         request.write('<pre>\n')
         printTextException(request, savedError)
         request.write('\nAdditionally cgitb raised this exception:\n')
-        printTextException(request)        
+        printTextException(request)
         request.write('</pre>\n')
 
-        
+
 def printTextException(request, info=None):
     """ Simple text exception that should never fail
     
--- a/MoinMoin/formatter/__init__.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/formatter/__init__.py	Sun Jul 09 22:39:15 2006 +0200
@@ -313,7 +313,7 @@
             try:
                 parser = wikiutil.importPlugin(self.request.cfg, "parser", module_name, "Parser")
                 break
-            except PluginMissingError:
+            except wikiutil.PluginMissingError:
                 pass
         else:
             raise "Parser not found" # XXX what now?
--- a/MoinMoin/formatter/text_python.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/formatter/text_python.py	Sun Jul 09 22:39:15 2006 +0200
@@ -181,7 +181,7 @@
             try:
                 Dependencies = wikiutil.importPlugin(self.request.cfg, "parser", module_name, "Dependencies")
                 break
-            except wikiutil.PluginAttributeError:
+            except (wikiutil.PluginMissingError, wikiutil.PluginAttributeError), err:
                 pass
         else:
             Dependencies = self.defaultDependencies
--- a/MoinMoin/i18n/Makefile	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/i18n/Makefile	Sun Jul 09 22:39:15 2006 +0200
@@ -1,25 +1,15 @@
 POFILES = $(wildcard *.po)
 UPDATEPOFILES = $(POFILES:.po=.po-update)
-# CATALOGS = $(POFILES:.po=.mo)
 NOPFILES = $(POFILES:.po=.nop)
 DOMAIN = MoinMoin
 
-.SUFFIXES: .mo .po .po-update .nop
+.SUFFIXES: .po .po-update .nop
 
 -include POTFILES
 
-# both POTFILES.in and POTFILES are now generated by the same tool
-#POTFILES: POTFILES.in
-#	@echo "POTFILES = \\" > POTFILES
-#	@sed -e '/^#/d' -e "/^[ ]*\$$/d" -e 's,.*,	../& \\,' -e '$$s/\(.*\) \\/\1/' < POTFILES.in >> POTFILES
-
 POTFILES.in POTFILES:
 	tools/mk_POTFILES.py
 
-#.po.mo:
-#	@lang=`echo $@ | sed -e 's/\.mo$$//'`; \
-#	msgfmt $$lang.po -o mo/$$lang.$(DOMAIN).mo
-
 .nop.po-update:
 	@lang=`echo $@ | sed -e 's/\.MoinMoin\.po-update$$//'`; \
 	echo "$$lang:"; \
@@ -75,7 +65,6 @@
 update-po:
 	$(MAKE) $(DOMAIN).pot-update
 	$(MAKE) $(UPDATEPOFILES)
-#	$(MAKE) $(CATALOGS)
 
 stats:
 	@files="$(POFILES)"; \
@@ -86,5 +75,5 @@
 	done
 
 clean:
-	rm -f POTFILES
+	rm -f POTFILES POTFILES.in
 
--- a/MoinMoin/i18n/README	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/i18n/README	Sun Jul 09 22:39:15 2006 +0200
@@ -1,27 +1,33 @@
 Translators and Developers,
 
-The data flow for i18n stuff has completely changed, so please read this.
-
 Translators need to deal with the *.po files ONLY - they should do this on
 http://moinmaster.wikiwikiweb.de/MoinI18n/<language-code>, please do not
 send po updates via email, the stuff from the moinmaster wiki gets processed
 automatically.
 
-In moin 1.3 we switched the i18n system to use wiki markup instead html - you
-should change the header of your *.po file after adapting to wiki markup,
-see de.po for a sample.
-
-Encoding: please use utf-8 ONLY for the po file you submit.
+The i18n system uses wiki markup in the msgid/msgstr strings, see also the
+the header of your *.po file (and see see de.po for a sample).
 
-If you don't like to edit using utf-8, see the section below about non-utf-8
-editing.
-
-For using the i18n build system, one needs to have "gettext" (e.g. from Debian
-unstable) installed. For simply translating text, you do not need it, you can
-use a text editor.
+Encoding is utf-8 ONLY for the po file (the wiki page automatically uses
+utf-8).
 
 Please search your translation for fuzzy and untranslated strings. If you
-have reviewed the fuzzy stuff, remove the "fuzzy" marker.
+have reviewed the fuzzy stuff, remove the "fuzzy" marker. If you do that
+online on the moinmaster wiki, here are some tips:
+
+ * If you look at MoinMaster:MoinI18n/cc (where cc is your language code),
+   you will see some statistics at the top, you should try to have no untrans-
+   lated strings and also no fuzzy strings.
+ * On normal page view of that page, untranslated stuff is marked with !!!
+   (3 exclamation marks, just use firefox Ctrl-f to find them). Just remember
+   the untranslated phrase (or at least some  non-common words in it).
+ * Now edit the page and use Ctrl-f again. Enter the words you remembered.
+   Then use the highlight function right of the search box to make those words
+   highlighted in yellow color and scroll through the text in the edit box to
+   find the yellow stuff (the search function without highlight won't help you).
+ * For finding fuzzy stuff, just use the same method with "fuzzy".
+
+For using the i18n build system, one needs to have "gettext" installed.
 
 In the source code, there is original english text marked with
 _("english string") - xgettext can extract these strings automatically from
@@ -38,14 +44,14 @@
 
 Makefile: a GNU-Makefile
 
-POTFILES.in: a list of files with translatable strings
+POTFILES.in: a list of files with translatable strings (automatically created
+             by "make POTFILES"). POTFILES is about the same thing, just in a
+             format suitable for inclusion into the Makefile.
 
 MoinMoin.pot: Master translation file.
 
 *.po: Translations (utf-8 encoding)
 
-mo/*.mo: Binary output files of msgfmt (this is the stuff moin reads at runtime).
-
 
 New Translation (no .po file exists yet)
 ========================================
@@ -66,7 +72,6 @@
 - run "make <langcode>.po"
 - change the translation
 - update the PO-Revision-Date and Last-Translator entries
-- run "make <langcode>.mo"
 
 Change of translatable strings
 ==============================
--- a/MoinMoin/i18n/__init__.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/i18n/__init__.py	Sun Jul 09 22:39:15 2006 +0200
@@ -46,15 +46,6 @@
 
 translations = {}
 
-def mo_filename(request, language, domain):
-    """ we use MoinMoin/i18n/mo/<language>.<domain>.mo as filename for the binary
-        file generated by GNU gettext.
-    
-        TODO: later, when we have a farm scope plugin dir, we can also load
-              language data from there.
-    """
-    return os.path.join(request.cfg.moinmoin_dir, 'i18n', 'mo', "%s.%s.mo" % (language, domain))
-
 def po_filename(request, language, domain):
     """ we use MoinMoin/i18n/<language>[.<domain>].mo as filename for the PO file.
     
--- a/MoinMoin/macro/FullSearch.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/macro/FullSearch.py	Sun Jul 09 22:39:15 2006 +0200
@@ -54,8 +54,7 @@
     needle = needle.strip()
 
     # Search the pages and return the results
-    query = search.QueryParser().parse_query(needle)
-    results = search.searchPages(request, query)
+    results = search.searchPages(request, needle)
     results.sortByPagename()
 
     return results.pageList(request, macro.formatter)
--- a/MoinMoin/macro/__init__.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/macro/__init__.py	Sun Jul 09 22:39:15 2006 +0200
@@ -328,8 +328,9 @@
             return '<span class="error">%s</span>' % err
             
         # Return a title search for needle, sorted by name.
-        query = search.QueryParser(literal=literal, titlesearch=1, case=case).parse_query(needle)
-        results = search.searchPages(self.request, query)
+        # XXX: what's with literal?
+        results = search.searchPages(self.request, needle,
+                titlesearch=1, case=case)
         results.sortByPagename()
         return results.pageList(self.request, self.formatter)
         
--- a/MoinMoin/multiconfig.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/multiconfig.py	Sun Jul 09 22:39:15 2006 +0200
@@ -2,7 +2,8 @@
 """
     MoinMoin - Multiple configuration handler and Configuration defaults class
 
-    @copyright: 2000-2004 by Jürgen Hermann <jh@web.de>
+    @copyright: 2000-2004 by Jürgen Hermann <jh@web.de>,
+                2005-2006 by MoinMoin:ThomasWaldmann.
     @license: GNU GPL, see COPYING for details.
 """
 
@@ -31,12 +32,14 @@
     except ImportError:
         raise
     except IndentationError, err:
-        msg = 'IndentationError: %s\n' % str(err) + '''
+        msg = '''IndentationError: %(err)s
 
 The configuration files are python modules. Therefore, whitespace is
 important. Make sure that you use only spaces, no tabs are allowed here!
 You have to use four spaces at the beginning of the line mostly.
-'''
+''' % {
+    'err': err,
+}
         raise error.ConfigurationError(msg)
     except Exception, err:
         msg = '%s: %s' % (err.__class__.__name__, str(err))
@@ -74,7 +77,7 @@
 If you run a single wiki you do not need farmconfig.py. Delete it and
 use wikiconfig.py.
 """
-                raise error.ConfigurationError(msg)    
+                raise error.ConfigurationError(msg)
     return _url_re_cache
 
 
@@ -95,7 +98,7 @@
         cfg = configClass(name)
         cfg.cfg_mtime = max(mtime, _farmconfig_mtime)
     except ImportError, err:
-        msg = 'ImportError: %s\n' % str(err) + '''
+        msg = '''ImportError: %(err)s
 
 Check that the file is in the same directory as the server script. If
 it is not, you must add the path of the directory where the file is
@@ -105,17 +108,29 @@
 Check that the configuration file name is either "wikiconfig.py" or the
 module name specified in the wikis list in farmconfig.py. Note that the
 module name does not include the ".py" suffix.
-'''
+''' % {
+    'err': err,
+}
         raise error.ConfigurationError(msg)
-    except AttributeError:
-        msg = '''
-Could not find required "Config" class in "%(name)s.py". This might
-happen if you are trying to use a pre 1.3 configuration file, or made a
-syntax or spelling error.
+    except AttributeError, err:
+        msg = '''AttributeError: %(err)s
+
+Could not find required "Config" class in "%(name)s.py".
+
+This might happen if you are trying to use a pre 1.3 configuration file, or
+made a syntax or spelling error.
+
+Another reason for this could be a name clash. It is not possible to have
+config names like e.g. stats.py - because that colides with MoinMoin/stats/ -
+have a look into your MoinMoin code directory what other names are NOT
+possible.
 
 Please check your configuration file. As an example for correct syntax,
 use the wikiconfig.py file from the distribution.
-''' % {'name': name}
+''' % {
+    'name': name,
+    'err': err,
+}
         raise error.ConfigurationError(msg)
     return cfg
 
@@ -132,8 +147,10 @@
 
 Check your URL regular expressions in the "wikis" list in
 "farmconfig.py". 
-''' % {'url': url}
-    raise error.ConfigurationError(msg)    
+''' % {
+    'url': url,
+}
+    raise error.ConfigurationError(msg)
 
 
 def getConfig(url):
@@ -163,18 +180,18 @@
 
 class DefaultConfig:
     """ default config values """
-    
+
     # All acl_rights_* lines must use unicode!
     acl_rights_default = u"Trusted:read,write,delete,revert Known:read,write,delete,revert All:read,write"
     acl_rights_before = u""
     acl_rights_after = u""
     acl_rights_valid = ['read', 'write', 'delete', 'revert', 'admin']
-    
+
     actions_excluded = [] # ['DeletePage', 'AttachFile', 'RenamePage', 'test', ]
     allow_xslt = 0
     attachments = None # {'dir': path, 'url': url-prefix}
-    auth = [authmodule.moin_login, authmodule.moin_session,]
-    
+    auth = [authmodule.moin_login, authmodule.moin_session, ]
+
     backup_compression = 'gz'
     backup_users = []
     backup_include = []
@@ -186,7 +203,7 @@
         ]
     backup_storage_dir = '/tmp'
     backup_restore_target_dir = '/tmp'
-    
+
     bang_meta = 1
     caching_formats = ['text_html']
     changed_time_fmt = '%H:%M'
@@ -196,27 +213,27 @@
     # if you have gdchart, add something like
     # chart_options = {'width = 720, 'height': 540}
     chart_options = None
-    
+
     config_check_enabled = 0
 
     cookie_domain = None # use '.domain.tld" for a farm with hosts in that domain
     cookie_path = None   # use '/wikifarm" for a farm with pathes below that path
     cookie_lifetime = 12 # 12 hours from now
     cookie_secret = '1234' # secret value for crypting session cookie - you should change this :)
-    
+
     data_dir = './data/'
     data_underlay_dir = './underlay/'
-    
+
     date_fmt = '%Y-%m-%d'
     datetime_fmt = '%Y-%m-%d %H:%M:%S'
-    
+
     default_markup = 'wiki'
     docbook_html_dir = r"/usr/share/xml/docbook/stylesheet/nwalsh/html/" # correct for debian sarge
-    
+
     editor_default = 'text' # which editor is called when nothing is specified
     editor_ui = 'freechoice' # which editor links are shown on user interface
     editor_force = False
-    editor_quickhelp = { # editor markup hints quickhelp 
+    editor_quickhelp = {# editor markup hints quickhelp 
         'wiki': _("""\
  Emphasis:: [[Verbatim('')]]''italics''[[Verbatim('')]]; [[Verbatim(''')]]'''bold'''[[Verbatim(''')]]; [[Verbatim(''''')]]'''''bold italics'''''[[Verbatim(''''')]]; [[Verbatim('')]]''mixed ''[[Verbatim(''')]]'''''bold'''[[Verbatim(''')]] and italics''[[Verbatim('')]]; [[Verbatim(----)]] horizontal rule.
  Headings:: [[Verbatim(=)]] Title 1 [[Verbatim(=)]]; [[Verbatim(==)]] Title 2 [[Verbatim(==)]]; [[Verbatim(===)]] Title 3 [[Verbatim(===)]];   [[Verbatim(====)]] Title 4 [[Verbatim(====)]]; [[Verbatim(=====)]] Title 5 [[Verbatim(=====)]].
@@ -248,7 +265,7 @@
     }
     edit_locking = 'warn 10' # None, 'warn <timeout mins>', 'lock <timeout mins>'
     edit_rows = 20
-                
+
     hacks = {} # { 'feature1': value1, ... }
                # Configuration for features still in development.
                # For boolean stuff just use config like this:
@@ -258,7 +275,7 @@
                # A non-existing hack key should ever mean False, None, "", [] or {}!
 
     hosts_deny = []
-    
+
     html_head = ''
     html_head_queries = '''<meta name="robots" content="noindex,nofollow">\n'''
     html_head_posts   = '''<meta name="robots" content="noindex,nofollow">\n'''
@@ -277,6 +294,7 @@
 
     xapian_search = False # disabled until xapian is finished
     xapian_index_dir = None
+    xapian_stemming = True
 
     mail_login = None # or "user pwd" if you need to use SMTP AUTH
     mail_sendmail = None # "/usr/sbin/sendmail -t -i" to not use SMTP, but sendmail
@@ -286,7 +304,7 @@
     mail_import_subpage_template = u"$from-$date-$subject" # used for mail import
     mail_import_wiki_address = None # the e-mail address for e-mails that should go into the wiki
     mail_import_secret = ""
-    
+
     navi_bar = [u'RecentChanges', u'FindPage', u'HelpContents', ]
     nonexist_qm = 0
 
@@ -300,7 +318,7 @@
 
     page_header1 = ''
     page_header2 = ''
-    
+
     page_front_page = u'HelpOnLanguages' # this will make people choose a sane config
     page_local_spelling_words = u'LocalSpellingWords'
     page_category_regex = u'^Category[A-Z]'
@@ -314,7 +332,7 @@
     # These icons will show in this order in the iconbar, unless they
     # are not relevant, e.g email icon when the wiki is not configured
     # for email.
-    page_iconbar = ["up", "edit", "view", "diff", "info", "subscribe", "raw", "print",]
+    page_iconbar = ["up", "edit", "view", "diff", "info", "subscribe", "raw", "print", ]
 
     # Standard buttons in the iconbar
     page_icons_table = {
@@ -332,7 +350,7 @@
         'view':        ("%(q_page_name)s", _("View"), "view"),
         'up':          ("%(q_page_parent_page)s", _("Up"), "up"),
         }
-    
+
     refresh = None # (minimum_delay, type), e.g.: (2, 'internal')
     rss_cache = 60 # suggested caching time for RecentChanges RSS, in seconds
     shared_intermap = None # can be string or list of strings (filenames)
@@ -346,8 +364,8 @@
     siteid = 'default'
     stylesheets = [] # list of tuples (media, csshref) to insert after theme css, before user css
     superuser = [] # list of unicode user names that have super powers :)
-    
-    surge_action_limits = { # allow max. <count> <action> requests per <dt> secs
+
+    surge_action_limits = {# allow max. <count> <action> requests per <dt> secs
         # action: (count, dt)
         'all': (30, 30),
         'show': (30, 60),
@@ -361,13 +379,13 @@
         'default': (30, 60),
     }
     surge_lockout_time = 3600 # secs you get locked out when you ignore warnings
-    
+
     theme_default = 'modern'
     theme_force = False
-    
+
     trail_size = 5
     tz_offset = 0.0 # default time zone offset in hours from UTC
-    
+
     user_autocreate = False # do we auto-create user profiles
     user_email_unique = True # do we check whether a user's email is unique?
 
@@ -382,7 +400,7 @@
     url_prefix = '/wiki'
     logo_string = None
     interwikiname = None
-    
+
     url_mappings = {}
 
     user_checkbox_fields = [
@@ -397,13 +415,13 @@
         ('wikiname_add_spaces', lambda _: _('Add spaces to displayed wiki names')),
         ('remember_me', lambda _: _('Remember login information')),
         ('want_trivial', lambda _: _('Subscribe to trivial changes')),
-        
+
         ('disabled', lambda _: _('Disable this account forever')),
         # if an account is disabled, it may be used for looking up
         # id -> username for page info and recent changes, but it
         # is not usable for the user any more:
     ]
-    
+
     user_checkbox_defaults = {'mailto_author':       0,
                               'edit_on_doubleclick': 0,
                               'remember_last_visit': 0,
@@ -416,16 +434,16 @@
                               'remember_me':         1,
                               'want_trivial':        0,
                              }
-    
+
     # don't let the user change those
     # user_checkbox_disable = ['disabled', 'want_trivial']
     user_checkbox_disable = []
-    
+
     # remove those checkboxes:
     #user_checkbox_remove = ['edit_on_doubleclick', 'show_nonexist_qm', 'show_toolbar', 'show_topbottom',
     #                        'show_fancy_diff', 'wikiname_add_spaces', 'remember_me', 'disabled',]
     user_checkbox_remove = []
-    
+
     user_form_fields = [
         ('name', _('Name'), "text", "36", _("(Use Firstname''''''Lastname)")),
         ('aliasname', _('Alias-Name'), "text", "36", ''),
@@ -435,8 +453,8 @@
         ('css_url', _('User CSS URL'), "text", "40", _('(Leave it empty for disabling user CSS)')),
         ('edit_rows', _('Editor size'), "text", "3", ''),
     ]
-    
-    user_form_defaults = { # key: default - do NOT remove keys from here!
+
+    user_form_defaults = {# key: default - do NOT remove keys from here!
         'name': '',
         'aliasname': '',
         'password': '',
@@ -445,17 +463,17 @@
         'css_url': '',
         'edit_rows': "20",
     }
-    
+
     # don't let the user change those, but show them:
     #user_form_disable = ['name', 'aliasname', 'email',]
     user_form_disable = []
-    
+
     # remove those completely:
     #user_form_remove = ['password', 'password2', 'css_url', 'logout', 'create', 'account_sendmail',]
     user_form_remove = []
-    
+
     # attributes we do NOT save to the userpref file
-    user_transient_fields =  ['id', 'valid', 'may', 'auth_username', 'trusted', 'password', 'password2', 'auth_method', 'auth_attribs']
+    user_transient_fields = ['id', 'valid', 'may', 'auth_username', 'trusted', 'password', 'password2', 'auth_method', 'auth_attribs', ]
 
     user_homewiki = 'Self' # interwiki name for where user homepages are located
 
@@ -465,7 +483,7 @@
 
     xmlrpc_putpage_enabled = 0 # if 0, putpage will write to a test page only
     xmlrpc_putpage_trusted_only = 1 # if 1, you will need to be http auth authenticated
-    
+
     SecurityPolicy = None
 
     def __init__(self, siteid):
@@ -493,12 +511,12 @@
                      (e.g. ['Sample User', 'AnotherUser']).
                      Please change it in your wiki configuration and try again."""
             raise error.ConfigurationError(msg)
-        
+
         self._loadPluginModule()
 
         # Preparse user dicts
         self._fillDicts()
-        
+
         # Normalize values
         self.language_default = self.language_default.lower()
 
@@ -517,7 +535,7 @@
                 import gdchart
             except ImportError:
                 self.chart_options = None
-        
+
         # post process
         # we replace any string placeholders with config values
         # e.g u'%(page_front_page)s' % self
@@ -538,9 +556,9 @@
 
         This check is disabled by default, when enabled, it will show an
         error message with unknown names.
-        """       
+        """
         unknown = ['"%s"' % name for name in dir(self)
-                  if not name.startswith('_') and 
+                  if not name.startswith('_') and
                   not DefaultConfig.__dict__.has_key(name) and
                   not isinstance(getattr(self, name), (type(sys), type(DefaultConfig)))]
         if unknown:
@@ -574,22 +592,22 @@
 Also check your "-*- coding -*-" line at the top of your configuration
 file. It should match the actual charset of the configuration file.
 '''
-        
+
         decode_names = (
             'sitename', 'logo_string', 'navi_bar', 'page_front_page',
-            'page_category_regex', 'page_dict_regex', 
+            'page_category_regex', 'page_dict_regex',
             'page_group_regex', 'page_template_regex', 'page_license_page',
             'page_local_spelling_words', 'acl_rights_default',
             'acl_rights_before', 'acl_rights_after', 'mail_from'
             )
-        
+
         for name in decode_names:
             attr = getattr(self, name, None)
             if attr:
                 # Try to decode strings
                 if isinstance(attr, str):
                     try:
-                        setattr(self, name, unicode(attr, charset)) 
+                        setattr(self, name, unicode(attr, charset))
                     except UnicodeError:
                         raise error.ConfigurationError(message %
                                                        {'name': name})
@@ -613,7 +631,7 @@
         mode = os.F_OK | os.R_OK | os.W_OK | os.X_OK
         for attr in ('data_dir', 'data_underlay_dir'):
             path = getattr(self, attr)
-            
+
             # allow an empty underlay path or None
             if attr == 'data_underlay_dir' and not path:
                 continue
@@ -630,7 +648,7 @@
 
 It is recommended to use absolute paths and not relative paths. Check
 also the spelling of the directory name.
-''' % {'attr': attr, 'path': path,}
+''' % {'attr': attr, 'path': path, }
                 raise error.ConfigurationError(msg)
 
     def _loadPluginModule(self):
@@ -672,7 +690,10 @@
 
 Make sure your data directory path is correct, check permissions, and
 that the data/plugin directory has an __init__.py file.
-''' % {'path': self.data_dir, 'err': str(err)}
+''' % {
+    'path': self.data_dir,
+    'err': str(err),
+}
             raise error.ConfigurationError(msg)
 
     def _fillDicts(self):
@@ -689,7 +710,7 @@
     def __getitem__(self, item):
         """ Make it possible to access a config object like a dict """
         return getattr(self, item)
-    
+
 # remove the gettext pseudo function 
 del _
 
--- a/MoinMoin/packages.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/packages.py	Sun Jul 09 22:39:15 2006 +0200
@@ -91,7 +91,7 @@
         self.themename = None
         self.ignoreExceptions = False
         self.goto = 0
-        
+
         #Satisfy pylint
         self.msg = getattr(self, "msg", "")
         self.request = getattr(self, "request", None)
@@ -119,7 +119,7 @@
         @param lines: lines to ignore
         """
         _ = self.request.getText
-        
+
         from MoinMoin.version import release
         version_int = [int(x) for x in version.split(".")]
         release = [int(x) for x in release.split(".")]
@@ -204,7 +204,7 @@
 
         self.msg += package.msg
 
-    def do_addrevision(self, filename, pagename, author=u"Scripting Subsystem", comment=u"", trivial = u"No"):
+    def do_addrevision(self, filename, pagename, author=u"Scripting Subsystem", comment=u"", trivial=u"No"):
         """ Adds a revision to a page.
 
         @param filename: name of the file in this package
@@ -250,7 +250,7 @@
         pagedir = page.getPagePath(use_underlay=1, check_create=1)
 
         revdir = os.path.join(pagedir, 'revisions')
-        cfn = os.path.join(pagedir,'current')
+        cfn = os.path.join(pagedir, 'current')
 
         revstr = '%08d' % 1
         if not os.path.exists(revdir):
@@ -428,7 +428,7 @@
 
     # Setup MoinMoin environment
     from MoinMoin.request import CLI
-    request = CLI.Request(url = 'localhost/')
+    request = CLI.Request(url='localhost/')
     request.form = request.args = request.setup_args()
 
     package = ZipPackage(request, packagefile)
@@ -445,6 +445,7 @@
             print "Installation failed."
         if package.msg:
             print package.msg
-    
+
 if __name__ == '__main__':
     main()
+
--- a/MoinMoin/request/__init__.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/request/__init__.py	Sun Jul 09 22:39:15 2006 +0200
@@ -56,7 +56,7 @@
     """
     var = '%s_%s' % (scheme, header)
     return var.upper().replace('-', '_')
-    
+
 
 # Request Base ----------------------------------------------------------
 
@@ -81,7 +81,7 @@
     # headers as lowercase.
     moin_location = 'x-moin-location'
     proxy_host = 'x-forwarded-host'
-    
+
     def __init__(self, properties={}):
         # Decode values collected by sub classes
         self.path_info = self.decodePagename(self.path_info)
@@ -92,13 +92,13 @@
 
         # Pages meta data that we collect in one request
         self.pages = {}
-              
+
         self.sent_headers = 0
         self.user_headers = []
         self.cacheable = 0 # may this output get cached by http proxies/caches?
         self.page = None
         self._dicts = None
-        
+
         # Fix dircaching problems on Windows 9x
         if IsWin9x():
             import dircache
@@ -118,14 +118,14 @@
             # order is important here!
             self.__dict__.update(properties)
             self._load_multi_cfg()
-            
+
             self.isSpiderAgent = self.check_spider()
-        
+
             # Set decode charsets.  Input from the user is always in
             # config.charset, which is the page charsets. Except
             # path_info, which may use utf-8, and handled by decodePagename.
             self.decode_charsets = [config.charset]
-            
+
             # hierarchical wiki - set rootpage
             from MoinMoin.Page import Page
             #path = self.getPathinfo()
@@ -155,7 +155,7 @@
             i18n.i18n_init(self)
 
             self.user = self.get_user_from_form()
-            
+
             if not self.query_string.startswith('action=xmlrpc'):
                 if not self.forbidden and self.isForbidden():
                     self.makeForbidden403()
@@ -166,7 +166,7 @@
             self.pragma = {}
             self.mode_getpagelinks = 0
 
-            self.lang = i18n.requestLanguage(self) 
+            self.lang = i18n.requestLanguage(self)
             # Language for content. Page content should use the wiki default lang,
             # but generated content like search results should use the user language.
             self.content_lang = self.cfg.language_default
@@ -174,7 +174,7 @@
 
             self.opened_logs = 0
             self.reset()
-        
+
     def surge_protect(self):
         """ check if someone requesting too much from us """
         validuser = self.user.valid
@@ -182,14 +182,14 @@
         if not validuser and current_id.startswith('127.'): # localnet
             return False
         current_action = self.form.get('action', ['show'])[0]
-        
+
         limits = self.cfg.surge_action_limits
         default_limit = self.cfg.surge_action_limits.get('default', (30, 60))
-        
+
         now = int(time.time())
         surgedict = {}
         surge_detected = False
-        
+
         try:
             # if we have common farm users, we could also use scope='farm':
             cache = caching.CacheEntry(self, 'surgeprotect', 'surge-log', scope='wiki')
@@ -207,7 +207,7 @@
                             timestamps.append((t, surge_indicator))
                     except StandardError, err:
                         pass
-                
+
             maxnum, dt = limits.get(current_action, default_limit)
             events = surgedict.setdefault(current_id, copy.copy({}))
             timestamps = events.setdefault(current_action, copy.copy([]))
@@ -218,20 +218,20 @@
             if surge_detected:
                 if len(timestamps) < maxnum * 2:
                     timestamps.append((now + self.cfg.surge_lockout_time, surge_indicator)) # continue like that and get locked out
-        
+
             if current_action != 'AttachFile': # don't add AttachFile accesses to all or picture galleries will trigger SP
                 current_action = 'all' # put a total limit on user's requests
                 maxnum, dt = limits.get(current_action, default_limit)
                 events = surgedict.setdefault(current_id, copy.copy({}))
                 timestamps = events.setdefault(current_action, copy.copy([]))
                 surge_detected = surge_detected or len(timestamps) > maxnum
-            
+
                 surge_indicator = surge_detected and "!" or ""
                 timestamps.append((now, surge_indicator))
                 if surge_detected:
                     if len(timestamps) < maxnum * 2:
                         timestamps.append((now + self.cfg.surge_lockout_time, surge_indicator)) # continue like that and get locked out
-        
+
             data = []
             for id, events in surgedict.items():
                 for action, timestamps in events.items():
@@ -242,8 +242,8 @@
         except StandardError, err:
             pass
 
-        return surge_detected   
-        
+        return surge_detected
+
     def getDicts(self):
         """ Lazy initialize the dicts on the first access """
         if self._dicts is None:
@@ -252,20 +252,20 @@
             dicts.scandicts()
             self._dicts = dicts
         return self._dicts
-        
+
     def delDicts(self):
         """ Delete the dicts, used by some tests """
         del self._dicts
         self._dicts = None
 
     dicts = property(getDicts, None, delDicts)
-  
+
     def _load_multi_cfg(self):
         # protect against calling multiple times
         if not hasattr(self, 'cfg'):
             from MoinMoin import multiconfig
             self.cfg = multiconfig.getConfig(self.url)
-            
+
     def setAcceptedCharsets(self, accept_charset):
         """ Set accepted_charsets by parsing accept-charset header
 
@@ -276,7 +276,7 @@
         TODO: currently no code use this value.
 
         @param accept_charset: accept-charset header
-        """        
+        """
         charsets = []
         if accept_charset:
             accept_charset = accept_charset.lower()
@@ -293,13 +293,13 @@
                     qval = 1.0 - float(qval.split('=')[1])
                 else:
                     name, qval = item, 0
-                charsets.append((qval, name))                 
+                charsets.append((qval, name))
             charsets.sort()
             # Remove *, its not clear what we should do with it later
             charsets = [name for qval, name in charsets if name != '*']
 
         self.accepted_charsets = charsets
-          
+
     def _setup_vars_from_std_env(self, env):
         """ Set common request variables from CGI environment
         
@@ -324,14 +324,14 @@
 
         # REQUEST_URI is not part of CGI spec, but an addition of Apache.
         self.request_uri = env.get('REQUEST_URI', '')
-        
+
         # Values that need more work
         self.setHttpReferer(env.get('HTTP_REFERER'))
         self.setIsSSL(env)
         self.setHost(env.get('HTTP_HOST'))
         self.fixURI(env)
         self.setURL(env)
-        
+
         ##self.debugEnvironment(env)
 
     def setHttpReferer(self, referer):
@@ -367,7 +367,7 @@
                 port = ':' + self.server_port
             host = self.server_name + port
         self.http_host = host
-        
+
     def fixURI(self, env):
         """ Fix problems with script_name and path_info
         
@@ -385,12 +385,12 @@
                                       - := Hopefully not.
 
         @param env: dict like object containing cgi meta variables
-        """ 
+        """
         # Fix the script_name when using Apache on Windows.
         server_software = env.get('SERVER_SOFTWARE', '')
         if os.name == 'nt' and server_software.find('Apache/') != -1:
             # Removes elements ending in '.' from the path.
-            self.script_name = '/'.join([x for x in self.script_name.split('/') 
+            self.script_name = '/'.join([x for x in self.script_name.split('/')
                                          if not x.endswith('.')])
 
         # Fix path_info
@@ -398,13 +398,13 @@
             # Try to recreate path_info from request_uri.
             import urlparse
             scriptAndPath = urlparse.urlparse(self.request_uri)[2]
-            path = scriptAndPath.replace(self.script_name, '', 1)            
+            path = scriptAndPath.replace(self.script_name, '', 1)
             self.path_info = wikiutil.url_unquote(path, want_unicode=False)
         elif os.name == 'nt':
             # Recode path_info to utf-8
             path = wikiutil.decodeWindowsPath(self.path_info)
             self.path_info = path.encode("utf-8")
-            
+
             # Fix bug in IIS/4.0 when path_info contain script_name
             if self.path_info.startswith(self.script_name):
                 self.path_info = self.path_info[len(self.script_name):]
@@ -421,7 +421,7 @@
         # Same for the wiki config - they must use the proxy url.
         self.rewriteHost(env)
         self.rewriteURI(env)
-        
+
         if not self.request_uri:
             self.request_uri = self.makeURI()
         self.url = self.http_host + self.request_uri
@@ -464,15 +464,15 @@
         
         @param env: dict like object containing cgi meta variables or http headers.
         """
-        location = (env.get(self.moin_location) or 
+        location = (env.get(self.moin_location) or
                     env.get(cgiMetaVariable(self.moin_location)))
         if location is None:
             return
-        
+
         scriptAndPath = self.script_name + self.path_info
         location = location.rstrip('/')
         self.script_name = location
-        
+
         # This may happen when using mod_python
         if scriptAndPath.startswith(location):
             self.path_info = scriptAndPath[len(location):]
@@ -497,7 +497,7 @@
             path, query = uri.split('?', 1)
         else:
             path, query = uri, ''
-        return wikiutil.url_unquote(path, want_unicode=False), query        
+        return wikiutil.url_unquote(path, want_unicode=False), query
 
     def get_user_from_form(self):
         """ read the maybe present UserPreferences form and call get_user with the values """
@@ -509,7 +509,7 @@
                                           login=login, logout=logout,
                                           user_obj=None)
         return u
-    
+
     def get_user_default_unknown(self, **kw):
         """ call do_auth and if it doesnt return a user object, make some "Unknown User" """
         user_obj = self.get_user_default_None(**kw)
@@ -530,7 +530,7 @@
             if not continue_flag:
                 break
         return user_obj
-        
+
     def reset(self):
         """ Reset request state.
 
@@ -569,7 +569,7 @@
         fallback = 0
         if theme_name == "<default>":
             theme_name = self.cfg.theme_default
-        
+
         try:
             Theme = wikiutil.importPlugin(self.cfg, 'theme', theme_name, 'Theme')
         except wikiutil.PluginMissingError:
@@ -579,7 +579,7 @@
             except wikiutil.PluginMissingError:
                 fallback = 2
                 from MoinMoin.theme.modern import Theme
-        
+
         self.theme = Theme(self)
         return fallback
 
@@ -628,7 +628,7 @@
         pagename = self.decodePagename(pagename)
         pagename = self.normalizePagename(pagename)
         return pagename
-    
+
     def getKnownActions(self):
         """ Create a dict of avaiable actions
 
@@ -649,14 +649,14 @@
             actions.extend(plugins)
 
             # Add extensions
-            actions.extend(action.extension_actions)           
-           
+            actions.extend(action.extension_actions)
+
             # TODO: Use set when we require Python 2.3
-            actions = dict(zip(actions, [''] * len(actions)))            
+            actions = dict(zip(actions, [''] * len(actions)))
             self.cfg._known_actions = actions
 
         # Return a copy, so clients will not change the dict.
-        return self.cfg._known_actions.copy()        
+        return self.cfg._known_actions.copy()
 
     def getAvailableActions(self, page):
         """ Get list of avaiable actions for this request
@@ -683,7 +683,7 @@
             # Filter wiki excluded actions
             for key in self.cfg.actions_excluded:
                 if key in actions:
-                    del actions[key]                
+                    del actions[key]
 
             # Filter actions by page type, acl and user state
             excluded = []
@@ -695,7 +695,7 @@
                 excluded = [u'RenamePage', u'DeletePage', ] # AttachFile must NOT be here!
             for key in excluded:
                 if key in actions:
-                    del actions[key]                
+                    del actions[key]
 
             self._available_actions = actions
 
@@ -711,7 +711,7 @@
         finally:
             self.redirect()
         text = buffer.getvalue()
-        buffer.close()        
+        buffer.close()
         return text
 
     def redirect(self, file=None):
@@ -740,7 +740,7 @@
         # Add time stamp
         msg = '[%s] %s\n' % (time.asctime(), msg)
         sys.stderr.write(msg)
-    
+
     def write(self, *data):
         """ Write to output stream. """
         raise NotImplementedError
@@ -754,12 +754,12 @@
             try:
                 if isinstance(d, unicode):
                     # if we are REALLY sure, we can use "strict"
-                    d = d.encode(config.charset, 'replace') 
+                    d = d.encode(config.charset, 'replace')
                 wd.append(d)
             except UnicodeError:
-                print >>sys.stderr, "Unicode error on: %s" % repr(d)
+                self.log("Unicode error on: %s" % repr(d))
         return ''.join(wd)
-    
+
     def decodePagename(self, name):
         """ Decode path, possibly using non ascii characters
 
@@ -790,7 +790,7 @@
                     page = page.encode(config.charset, 'replace')
                 except UnicodeError:
                     pass
-                
+
             # Decode from config.charset, replacing what can't be decoded.
             page = unicode(page, config.charset, 'replace')
             decoded.append(page)
@@ -818,7 +818,7 @@
         # Split to pages and normalize each one
         pages = name.split(u'/')
         normalized = []
-        for page in pages:            
+        for page in pages:
             # Ignore empty or whitespace only pages
             if not page or page.isspace():
                 continue
@@ -833,13 +833,13 @@
             # words separated with only one space. Split handle all
             # 30 unicode spaces (isspace() == True)
             page = u' '.join(page.split())
-            
-            normalized.append(page)            
-        
+
+            normalized.append(page)
+
         # Assemble components into full pagename
         name = u'/'.join(normalized)
         return name
-        
+
     def read(self, n):
         """ Read n bytes from input stream. """
         raise NotImplementedError
@@ -928,9 +928,9 @@
                 fixedResult.append(item.value)
                 if isinstance(item, cgi.FieldStorage) and item.filename:
                     # Save upload file name in a separate key
-                    args[key + '__filename__'] = item.filename            
+                    args[key + '__filename__'] = item.filename
             args[key] = fixedResult
-            
+
         return self.decodeArgs(args)
 
     def decodeArgs(self, args):
@@ -1008,7 +1008,7 @@
         else:
             theme_name = self.user.theme_name
         self.loadTheme(theme_name)
-        
+
     def run(self):
         # Exit now if __init__ failed or request is forbidden
         if self.failed or self.forbidden:
@@ -1028,7 +1028,7 @@
             from MoinMoin import xmlrpc
             xmlrpc.xmlrpc(self)
             return self.finish()
-        
+
         if self.query_string == 'action=xmlrpc2':
             from MoinMoin import xmlrpc
             xmlrpc.xmlrpc2(self)
@@ -1037,8 +1037,8 @@
         # parse request data
         try:
             self.initTheme()
-            
-            action_name = self.form.get('action', [None])[0]
+
+            action_name = self.form.get('action', ['show'])[0]
 
             # The last component in path_info is the page name, if any
             path = self.getPathinfo()
@@ -1048,7 +1048,6 @@
                 pagename = None
 
             # Handle request. We have these options:
-            
             # 1. If user has a bad user name, delete its bad cookie and
             # send him to UserPreferences to make a new account.
             if not user.isValidName(self, self.user.name):
@@ -1060,12 +1059,12 @@
                 page.send_page(self, msg=msg)
 
             # 2. Or jump to page where user left off
-            elif not pagename and not action_name and self.user.remember_last_visit:
+            elif not pagename and self.user.remember_last_visit:
                 pagetrail = self.user.getTrail()
                 if pagetrail:
                     # Redirect to last page visited
                     if ":" in pagetrail[-1]:
-                        wikitag, wikiurl, wikitail, error = wikiutil.resolve_wiki(self, pagetrail[-1]) 
+                        wikitag, wikiurl, wikitail, error = wikiutil.resolve_wiki(self, pagetrail[-1])
                         url = wikiurl + wikiutil.quoteWikinameURL(wikitail)
                     else:
                         url = Page(self, pagetrail[-1]).url(self)
@@ -1074,11 +1073,9 @@
                     url = wikiutil.getFrontPage(self).url(self)
                 self.http_redirect(url)
                 return self.finish()
-            
+
             # 3. Or handle action
             else:
-                if action_name is None:
-                    action_name = 'show'
                 if not pagename and self.query_string:
                     pagename = self.getPageNameFromQueryString()
                 # pagename could be empty after normalization e.g. '///' -> ''
@@ -1146,9 +1143,9 @@
         self.failed = 1 # save state for self.run()            
         self.http_headers(['Status: 500 MoinMoin Internal Error'])
         self.setResponseCode(500)
-        self.log('%s: %s' % (err.__class__.__name__, str(err)))        
+        self.log('%s: %s' % (err.__class__.__name__, str(err)))
         from MoinMoin import failure
-        failure.handle(self)             
+        failure.handle(self)
 
     def open_logs(self):
         pass
@@ -1206,10 +1203,10 @@
             date = '%02d-%s-%s' % (now.tm_mday, month, str(now.tm_year)[-2:])
         else:
             raise ValueError("Invalid rfc value: %s" % rfc)
-        
+
         return '%s, %s %02d:%02d:%02d GMT' % (day, date, now.tm_hour,
                                               now.tm_min, now.tm_sec)
-    
+
     def disableHttpCaching(self):
         """ Prevent caching of pages that should not be cached
 
@@ -1227,7 +1224,7 @@
         # and http://www.cse.ohio-state.edu/cgi-bin/rfc/rfc2068.html#sec-14.9
         self.setHttpHeader('Cache-Control: no-cache="set-cookie"')
         self.setHttpHeader('Cache-Control: private')
-        self.setHttpHeader('Cache-Control: max-age=0')       
+        self.setHttpHeader('Cache-Control: max-age=0')
 
         # Set Expires for http 1.0 caches (does not support Cache-Control)
         yearago = time.time() - (3600 * 24 * 365)
@@ -1276,18 +1273,18 @@
         for name in names:
             attributes.append('  %s = %r\n' % (name, getattr(self, name, None)))
         attributes = ''.join(attributes)
-        
+
         environment = []
         names = env.keys()
         names.sort()
         for key in names:
             environment.append('  %s = %r\n' % (key, env[key]))
         environment = ''.join(environment)
-        
-        data = '\nRequest Attributes\n%s\nEnviroment\n%s' % (attributes, environment)        
+
+        data = '\nRequest Attributes\n%s\nEnviroment\n%s' % (attributes, environment)
         f = open('/tmp/env.log', 'a')
         try:
             f.write(data)
         finally:
             f.close()
-  
+
--- a/MoinMoin/script/__init__.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/__init__.py	Sun Jul 09 22:39:15 2006 +0200
@@ -18,9 +18,72 @@
 flag_quiet = 0
 script_module = '__main__'
 
-#############################################################################
-### Logging
-#############################################################################
+
+# ScriptRequest -----------------------------------------------------------
+
+class ScriptRequest(object):
+    """this is for scripts (MoinMoin/script/*) running from the commandline (CLI)
+       or from the xmlrpc server (triggered by a remote xmlrpc client).
+
+       Every script needs to do IO using this ScriptRequest class object -
+       IT IS DIFFERENT from the usual "request" you have in moin (easily to be seen
+       when you look at an xmlrpc script invocation: request.write will write to the
+       xmlrpc "channel", but scriptrequest.write needs to write to some buffer we
+       transmit later as an xmlrpc function return value.
+    """
+    def __init__(self, in, out, err):
+        self.in = in
+        self.out = out
+        self.err = err
+
+    def read(self, n=None):
+        if n is None:
+            data = self.in.read()
+        else:
+            data = self.in.read(n)
+        return data
+
+    def write(self, data):
+        self.out.write(data)
+
+    def write_err(self, data):
+        self.err.write(data)
+
+
+class ScriptRequestCLI(ScriptRequest):
+    """ When a script runs directly on the shell, we just use the CLI request
+        object (see MoinMoin.request.CLI) to do I/O (which will use stdin/out/err).
+    """
+    def __init__(self, request):
+        self.request = request
+
+    def read(self, n=None):
+        return self.request.read(n)
+
+    def write(self, data):
+        return self.request.write(n)
+
+    def write_err(self, data):
+        return self.request.write(n) # XXX use correct request method - log, error, whatever.
+
+class ScriptRequestStrings(ScriptRequest):
+    """ When a script gets run by our xmlrpc server, we have the input as a
+        string and we also need to catch the output / error output as strings.
+    """
+    def __init__(self, instr):
+        self.in = StringIO(instr)
+        self.out = StringIO()
+        self.err = StringIO()
+
+    def fetch_output(self):
+        outstr = self.out.get_value()
+        errstr = self.err.get_value()
+        self.out.close()
+        self.err.close()
+        return outstr, errstr
+
+
+# Logging -----------------------------------------------------------------
 
 def fatal(msgtext, **kw):
     """ Print error msg to stderr and exit. """
@@ -38,9 +101,7 @@
         sys.stderr.write(msgtext + "\n")
 
 
-#############################################################################
-### Commandline Support
-#############################################################################
+# Commandline Support --------------------------------------------------------
 
 class Script:
     def __init__(self, script, usage, argv=None, def_values=None):
@@ -70,12 +131,12 @@
         if def_values:
             self.parser.set_defaults(**def_values.__dict__)
         self.parser.add_option(
-            "-q", "--quiet", 
+            "-q", "--quiet",
             action="store_true", dest="quiet",
             help="Be quiet (no informational messages)"
         )
         self.parser.add_option(
-            "--show-timing", 
+            "--show-timing",
             action="store_true", dest="show_timing", default=False,
             help="Show timing values [default: %default]"
         )
@@ -122,7 +183,7 @@
             "--page", dest="page", default='',
             help="wiki page name [default: %default]"
         )
-    
+
     def init_request(self):
         """ create request """
         from MoinMoin.request import CLI
@@ -130,7 +191,7 @@
             self.request = CLI.Request(self.options.wiki_url, self.options.page)
         else:
             self.request = CLI.Request(pagename=self.options.page)
-        
+
     def mainloop(self):
         # Insert config dir or the current directory to the start of the path.
         config_dir = self.options.config_dir
--- a/MoinMoin/script/account/check.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/account/check.py	Sun Jul 09 22:39:15 2006 +0200
@@ -58,7 +58,7 @@
 # if a user subsribes to magicpages, it means that he wants to keep
 # exactly THIS account - this will avoid deleting it.
 magicpages = [
-    "ThisAccountIsCorrect", 
+    "ThisAccountIsCorrect",
     "DieserAccountIstRichtig",
 ]
 
@@ -111,28 +111,28 @@
         for uid in user.getUserList(request):
             u = user.User(request, uid)
             self.users[uid] = u
-    
+
             # collect name duplicates:
             if u.name in self.names:
                 self.names[u.name].append(uid)
             else:
                 self.names[u.name] = [uid]
-    
+
             # collect email duplicates:
             if u.email:
                 if u.email in self.emails:
                     self.emails[u.email].append(uid)
                 else:
                     self.emails[u.email] = [uid]
-    
+
             # collect account with no or invalid email address set:
             if not u.email or not re.match(".*@.*\..*", u.email):
                 self.uids_noemail[uid] = u.name
-    
+
     def hasmagicpage(self, uid):
         u = self.users[uid]
         return u.isSubscribedTo(magicpages)
-    
+
     def disableUser(self, uid):
         u = self.users[uid]
         print " %-20s %-30r %-35r" % (uid, u.name, u.email),
@@ -151,7 +151,7 @@
                 print "- disabled."
             else:
                 print "- would be disabled."
-    
+
     def getsortvalue(self, uid, user):
         t_ls = float(user.last_saved) # when user did last SAVE of his account data
         if self.options.lastsaved:
@@ -162,7 +162,7 @@
             except OSError:
                 t_lu = t_ls # better than having nothing
             return t_lu
-    
+
     def process(self, uidlist):
         sortlist = []
         for uid in uidlist:
@@ -177,17 +177,17 @@
         uid = sortlist[-1][1]
         u = self.users[uid]
         print " %-20s %-30r %-35r - keeping%s!" % (uid, u.name, u.email, self.hasmagicpage(uid) and " (magicpage)" or "")
-    
+
     def make_users_unique(self):
         for name, uids in self.names.items():
             if len(uids) > 1:
                 self.process(uids)
-    
+
     def make_emails_unique(self):
         for email, uids in self.emails.items():
             if len(uids) > 1:
                 self.process(uids)
-    
+
     def make_WikiNames(self):
         import string
         for uid, u in self.users.items():
@@ -223,8 +223,8 @@
             self.parser.error("incorrect number of arguments")
 
         # check for correct option combination
-        flags_given = (self.options.usersunique 
-                    or self.options.emailsunique 
+        flags_given = (self.options.usersunique
+                    or self.options.emailsunique
                     or self.options.wikinames
                     or self.options.removepasswords)
 
@@ -243,7 +243,7 @@
         self.collect_data()
         if self.options.usersunique:
             self.make_users_unique()
-        if self.options.emailsunique: 
+        if self.options.emailsunique:
             self.make_emails_unique()
         if self.options.wikinames:
             self.make_WikiNames()
--- a/MoinMoin/script/account/disable.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/account/disable.py	Sun Jul 09 22:39:15 2006 +0200
@@ -50,4 +50,4 @@
             print "- disabled."
         else:
             print "- is already disabled."
-            
+
--- a/MoinMoin/script/cli/show.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/cli/show.py	Sun Jul 09 22:39:15 2006 +0200
@@ -15,7 +15,7 @@
 
     def __init__(self, argv, def_values):
         MoinScript.__init__(self, argv, def_values)
-    
+
     def mainloop(self):
         self.init_request()
         self.request.run()
--- a/MoinMoin/script/export/dump.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/export/dump.py	Sun Jul 09 22:39:15 2006 +0200
@@ -73,11 +73,11 @@
         return dest_url
     else:
         return ""
-  
+
 
 class PluginScript(script.MoinScript):
     """ Dump script class """
-    
+
     def __init__(self, argv=None, def_values=None):
         script.MoinScript.__init__(self, argv, def_values)
         self.parser.add_option(
@@ -129,14 +129,14 @@
         page_front_page = wikiutil.getSysPage(request, request.cfg.page_front_page).page_name
         page_title_index = wikiutil.getSysPage(request, 'TitleIndex').page_name
         page_word_index = wikiutil.getSysPage(request, 'WordIndex').page_name
-        
+
         navibar_html = ''
         for p in [page_front_page, page_title_index, page_word_index]:
             navibar_html += '&nbsp;[<a href="%s">%s</a>]' % (wikiutil.quoteWikinameURL(p), wikiutil.escape(p))
 
         for pagename in pages:
             # we have the same name in URL and FS
-            file = wikiutil.quoteWikinameURL(pagename) 
+            file = wikiutil.quoteWikinameURL(pagename)
             script.log('Writing "%s"...' % file)
             try:
                 pagehtml = ''
--- a/MoinMoin/script/import/irclog.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/import/irclog.py	Sun Jul 09 22:39:15 2006 +0200
@@ -50,7 +50,7 @@
             "--file-dir", dest="file_dir", default='.',
             help="read files from DIRECTORY"
         )
-    
+
     def mainloop(self):
         self.init_request()
         request = self.request
--- a/MoinMoin/script/index/build.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/index/build.py	Sun Jul 09 22:39:15 2006 +0200
@@ -24,7 +24,7 @@
             "--mode", metavar="MODE", dest="mode",
             help="either add (unconditionally add to index) or update (update an existing index)"
         )
-    
+
     def mainloop(self):
         self.init_request()
         # Do we have additional files to index?
@@ -38,7 +38,7 @@
     """ Xapian index build script class """
 
     def command(self):
-        from MoinMoin.Xapian import Index
+        from MoinMoin.search.Xapian import Index
         Index(self.request).indexPages(self.files, self.options.mode)
         #Index(self.request).test(self.request)
 
--- a/MoinMoin/script/maint/cleancache.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/maint/cleancache.py	Sun Jul 09 22:39:15 2006 +0200
@@ -25,7 +25,7 @@
 class PluginScript(MoinScript):
     def __init__(self, argv, def_values):
         MoinScript.__init__(self, argv, def_values)
-    
+
     def mainloop(self):
         self.init_request()
         base = self.request.cfg.data_dir
--- a/MoinMoin/script/maint/cleanpage.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/maint/cleanpage.py	Sun Jul 09 22:39:15 2006 +0200
@@ -15,7 +15,7 @@
 class PluginScript(MoinScript):
     def __init__(self, argv, def_values):
         MoinScript.__init__(self, argv, def_values)
-    
+
     def qualify(self, p):
         """ look at page directory p and return its state """
         dir = os.listdir(p)
@@ -32,7 +32,7 @@
 
         if not revs and not atts:
             return 'trash'
-        
+
         if 'current-locked' in dir:
             return 'current-locked'
         elif 'current' in dir:
@@ -56,9 +56,9 @@
             pagedir = os.path.join(pagesdir, p)
             status = self.qualify(pagedir)
             if status in ['trash', 'empty', ]:
-                print "mv '%s' trash # %s" % (pagedir,status)
+                print "mv '%s' trash # %s" % (pagedir, status)
             elif status in ['deleted', ]:
-                print "mv '%s' deleted # %s" % (pagedir,status)
+                print "mv '%s' deleted # %s" % (pagedir, status)
             else:
                 print "# %s: '%s'" % (status, pagedir)
 
--- a/MoinMoin/script/maint/globaledit.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/maint/globaledit.py	Sun Jul 09 22:39:15 2006 +0200
@@ -17,9 +17,9 @@
 class PluginScript(MoinScript):
     def __init__(self, argv, def_values):
         MoinScript.__init__(self, argv, def_values)
-    
+
     def do_edit(self, pagename, origtext):
-        if pagename in ['LocalSpellingWords', 'LocalBadContent',] or pagename.endswith('Template'):
+        if pagename in ['LocalSpellingWords', 'LocalBadContent', ] or pagename.endswith('Template'):
             return origtext
         language_line = format_line = masterpage = None
         acl_lines = []
@@ -40,7 +40,7 @@
                 elif l.startswith('#format '):
                     format_line = l
                 elif l.startswith('##master-page:'):
-                    masterpage = l.split(':',1)[1].strip()
+                    masterpage = l.split(':', 1)[1].strip()
                     master_lines.append(l)
                 elif l.startswith('##master-date:'):
                     master_lines.append(l)
@@ -56,10 +56,10 @@
         if not format_line:
             format_line = '#format wiki'
         if not acl_lines and (
-            masterpage is None or masterpage not in ['FrontPage', 'WikiSandBox',] and not masterpage.endswith('Template')):
+            masterpage is None or masterpage not in ['FrontPage', 'WikiSandBox', ] and not masterpage.endswith('Template')):
             acl_lines = ['#acl MoinPagesEditorGroup:read,write,delete,revert All:read']
         if not master_lines:
-            master_lines = ['##master-page:Unknown-Page', '##master-date:Unknown-Date',]
+            master_lines = ['##master-page:Unknown-Page', '##master-date:Unknown-Date', ]
 
         c1old = "## Please edit (or translate) system/help pages on the moinmaster wiki ONLY."
         c2old = "## For more information, please see MoinMaster:MoinPagesEditorGroup."
@@ -68,16 +68,16 @@
         for c in (c1old, c2old, c1, c2):
             if c in comment_lines:
                 comment_lines.remove(c)
-            
+
         comment_lines = [c1, c2, ] + comment_lines
 
         if content_lines and content_lines[-1].strip(): # not an empty line at EOF
             content_lines.append('')
 
         if masterpage and masterpage.endswith('Template'):
-            changedtext = master_lines + [format_line, language_line,] + pragma_lines + content_lines
+            changedtext = master_lines + [format_line, language_line, ] + pragma_lines + content_lines
         else:
-            changedtext = comment_lines + master_lines + acl_lines + [format_line, language_line,] + pragma_lines + content_lines
+            changedtext = comment_lines + master_lines + acl_lines + [format_line, language_line, ] + pragma_lines + content_lines
         changedtext = '\n'.join(changedtext)
         return changedtext
 
@@ -85,9 +85,9 @@
         if debug:
             import codecs
             origtext = codecs.open('origtext', 'r', 'utf-8').read()
-            origtext = origtext.replace('\r\n','\n')
+            origtext = origtext.replace('\r\n', '\n')
             changedtext = self.do_edit("", origtext)
-            changedtext = changedtext.replace('\n','\r\n')
+            changedtext = changedtext.replace('\n', '\r\n')
             f = codecs.open('changedtext', 'w', 'utf-8')
             f.write(changedtext)
             f.close()
--- a/MoinMoin/script/maint/mkpagepacks.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/maint/mkpagepacks.py	Sun Jul 09 22:39:15 2006 +0200
@@ -54,13 +54,13 @@
         masterNonSystemPages = allPages - langPages - specialPages
 
         moinI18nPages = Set([x for x in masterNonSystemPages if x.startswith("MoinI18n")])
-        
+
         nodistPages = moinI18nPages | Set(["InterWikiMap", ])
 
         extraPages = masterNonSystemPages - nodistPages
 
         pageSets[ALL] = langPages
-        
+
         for name in pageSets.keys():
             if name not in (u"English"):
                 pageSets[name] -= pageSets[u"English"]
@@ -81,7 +81,7 @@
 
         cnt = 0
         script = [packLine(['MoinMoinPackage', '1']), ]
-                  
+
         for pagename in pagelist:
             pagename = pagename.strip()
             page = Page(request, pagename)
@@ -140,7 +140,7 @@
         if self.options.config_dir:
             print "NEVER EVER RUN THIS ON A REAL WIKI!!! This must be run on a local testwiki without any --config-dir!"
             return
-            
+
         self.init_request() # this request will work on a test wiki in testwiki/ directory
                             # we assume that there are current moinmaster pages there
         request = self.request
@@ -149,7 +149,7 @@
         if not ('testwiki' in request.cfg.data_dir and 'testwiki' in request.cfg.data_underlay_dir):
             print "NEVER EVER RUN THIS ON A REAL WIKI!!! This must be run on a local testwiki."
             return
-            
+
         self.gd = wikidicts.GroupDict(request)
         self.gd.reset()
 
@@ -161,10 +161,10 @@
 
         self.packageCompoundInstaller(pageSets, generate_filename(ALL))
 
-        [self.packagePages(list(pages), generate_filename(name), "ReplaceUnderlay") 
+        [self.packagePages(list(pages), generate_filename(name), "ReplaceUnderlay")
             for name, pages in pageSets.items() if not name in (u'English', ALL, NODIST)]
 
-        [self.removePages(list(pages)) 
+        [self.removePages(list(pages))
             for name, pages in pageSets.items() if not name in (u'English', ALL)]
 
         print "Finished."
--- a/MoinMoin/script/maint/reducewiki.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/maint/reducewiki.py	Sun Jul 09 22:39:15 2006 +0200
@@ -44,18 +44,18 @@
         """ quick and dirty! """
         pagedir = os.path.join(rootdir, 'pages', wikiutil.quoteWikinameFS(pagename))
         os.makedirs(pagedir)
-        
+
         # write a "current" file with content "00000001"
         revstr = '%08d' % 1
         cf = os.path.join(pagedir, 'current')
         file(cf, 'w').write(revstr+'\n')
-        
+
         # create a single revision 00000001
         revdir = os.path.join(pagedir, 'revisions')
         os.makedirs(revdir)
         tf = os.path.join(revdir, revstr)
         p = Page(request, pagename)
-        text = p.get_raw_body().replace("\n","\r\n")
+        text = p.get_raw_body().replace("\n", "\r\n")
         codecs.open(tf, 'wb', config.charset).write(text)
 
         source_dir = AttachFile.getAttachDir(request, pagename)
@@ -75,5 +75,5 @@
         pagelist = list(request.rootpage.getPageList(user=''))
         for pagename in pagelist:
             self.copypage(request, destdir, pagename)
-        
 
+
--- a/MoinMoin/script/migration/1050300.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/migration/1050300.py	Sun Jul 09 22:39:15 2006 +0200
@@ -16,13 +16,13 @@
         makedir(thisdir)
         fname = opj(thisdir, '__init__.py')
         f = open(fname, 'w')
-        f.write('''\
+        f.write("""\
 # -*- coding: iso-8859-1 -*-
 
 from MoinMoin.util import pysupport
 
 modules = pysupport.getPackageModules(__file__)
-''')
+""")
         f.close()
-    return rev+1
+    return rev + 1
 
--- a/MoinMoin/script/migration/data.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/migration/data.py	Sun Jul 09 22:39:15 2006 +0200
@@ -31,7 +31,7 @@
             "--all", action="store_true", dest="all_wikis",
             help="when given, update all wikis that belong to this farm"
         )
-    
+
     def mainloop(self):
         self.init_request()
         request = self.request
--- a/MoinMoin/script/migration/migutil.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/migration/migutil.py	Sun Jul 09 22:39:15 2006 +0200
@@ -44,7 +44,7 @@
     except OSError:
         fatalError("can't create '%s'" % src)
 
-    
+
 def listdir(path):
     """ Return list of files in path, filtering certain files """
     names = [name for name in os.listdir(path)
@@ -76,8 +76,8 @@
     try:
         data = open(fname_from).read()
         open(fname_to, "w").write(data)
-        st=os.stat(fname_from)
-        os.utime(fname_to, (st.st_atime,st.st_mtime))
+        st = os.stat(fname_from)
+        os.utime(fname_to, (st.st_atime, st.st_mtime))
     except:
         error("can't copy '%s' to '%s'" % (fname_from, fname_to))
 
@@ -109,3 +109,4 @@
             copy_file(src, dst)
         else:
             error("can't find '%s'" % src)
+
--- a/MoinMoin/script/moin.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/moin.py	Sun Jul 09 22:39:15 2006 +0200
@@ -13,7 +13,7 @@
 def run():
     from MoinMoin.script import MoinScript
     MoinScript().run(showtime=0)
-    
+
 if __name__ == "__main__":
     # Insert the path to MoinMoin in the start of the path
     import sys, os
--- a/MoinMoin/script/xmlrpc/__init__.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/xmlrpc/__init__.py	Sun Jul 09 22:39:15 2006 +0200
@@ -10,3 +10,4 @@
 
 # create a list of extension scripts from the subpackage directory
 modules = pysupport.getPackageModules(__file__)
+
--- a/MoinMoin/script/xmlrpc/mailimport.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/script/xmlrpc/mailimport.py	Sun Jul 09 22:39:15 2006 +0200
@@ -20,7 +20,7 @@
 
     def __init__(self, argv, def_values):
         MoinScript.__init__(self, argv, def_values)
-    
+
     def mainloop(self):
         try:
             import mailimportconf
@@ -29,10 +29,11 @@
 
         secret = mailimportconf.mailimport_secret
         url = mailimportconf.mailimport_url
-        
+
         s = xmlrpclib.ServerProxy(url)
 
         result = s.ProcessMail(secret, xmlrpclib.Binary(input.read()))
-        
+
         if result != "OK":
             print >>sys.stderr, result
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/script/xmlrpc/remote.py	Sun Jul 09 22:39:15 2006 +0200
@@ -0,0 +1,42 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - remote command execution, client part
+
+    This can be used as client to execute moin scripts remotely.
+
+    @copyright: 2006 by MoinMoin:ThomasWaldmann
+    @license: GNU GPL, see COPYING for details.
+"""
+
+import sys
+import xmlrpclib
+
+from MoinMoin.script import MoinScript, fatal
+
+class PluginScript(MoinScript):
+    """ Remote Script Execution Client """
+
+    def __init__(self, argv, def_values):
+        MoinScript.__init__(self, argv, def_values)
+        self.argv = argv
+
+    def mainloop(self):
+        try:
+            import remotescriptconf as conf
+        except ImportError:
+            fatal("Could not find the file remotescriptconf.py. Maybe you want to use the config param?")
+
+        secret = conf.remotescript_secret
+        url = conf.remotescript_url
+        print url, secret, self.argv
+
+        s = xmlrpclib.ServerProxy(url)
+
+        # TODO handle stdin 
+        # xmlrpclib.Binary(sys.stdin.read())
+        result = s.RemoteScript(secret, self.argv)
+        # TODO handle stdout, stderr
+
+        if result != "OK":
+            print >>sys.stderr, result
+
--- a/MoinMoin/search.py	Sun Jul 09 15:31:02 2006 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1472 +0,0 @@
-# -*- coding: iso-8859-1 -*-
-"""
-    MoinMoin - search engine
-    
-    @copyright: 2005 MoinMoin:FlorianFesti,
-                2005 MoinMoin:NirSoffer,
-                2005 MoinMoin:AlexanderSchremmer,
-                2006 MoinMoin:ThomasWaldmann,
-                2006 MoinMoin:FranzPletz
-    @license: GNU GPL, see COPYING for details
-"""
-
-import re, time, sys, StringIO, string, operator
-from sets import Set
-from MoinMoin import wikiutil, config
-from MoinMoin.Page import Page
-
-try:
-    import Xapian
-    from Xapian import Query, UnicodeQuery
-    use_stemming = Xapian.use_stemming
-except ImportError:
-    use_stemming = False
-
-#############################################################################
-### query objects
-#############################################################################
-
-class BaseExpression:
-    """ Base class for all search terms """
-    
-    def __init__(self):
-        self.negated = 0
-
-    def __str__(self):
-        return unicode(self).encode(config.charset, 'replace')
-
-    def negate(self):
-        """ Negate the result of this term """
-        self.negated = 1 
-
-    def pageFilter(self):
-        """ Return a page filtering function
-
-        This function is used to filter page list before we search
-        it. Return a function that get a page name, and return bool.
-
-        The default expression does not have any filter function and
-        return None. Sub class may define custom filter functions.
-        """
-        return None
-
-    def search(self, page):
-        """ Search a page
-
-        Returns a list of Match objects or None if term didn't find
-        anything (vice versa if negate() was called).  Terms containing
-        other terms must call this method to aggregate the results.
-        This Base class returns True (Match()) if not negated.
-        """
-        if self.negated:
-            # XXX why?
-            return [Match()]
-        else:
-            return None
-    
-    def costs(self):
-        """ Return estimated time to calculate this term
-        
-        Number is relative to other terms and has no real unit.
-        It allows to do the fast searches first.
-        """ 
-        return 0
-
-    def highlight_re(self):
-        """ Return a regular expression of what the term searches for
-
-        Used to display the needle in the page.
-        """
-        return ''
-
-    def _build_re(self, pattern, use_re=False, case=False, stemmed=False):
-        """ Make a regular expression out of a text pattern """
-        flags = case and re.U or (re.I | re.U)
-        if use_re:
-            try:
-                self.search_re = re.compile(pattern, flags)
-            except re.error:
-                pattern = re.escape(pattern)
-                self.pattern = pattern
-                self.search_re = re.compile(pattern, flags)
-            else:
-                self.pattern = pattern
-        else:
-            pattern = re.escape(pattern)
-            self.search_re = re.compile(pattern, flags)
-            self.pattern = pattern
-
-
-class AndExpression(BaseExpression):
-    """ A term connecting several sub terms with a logical AND """
-
-    operator = ' '
-
-    def __init__(self, *terms):
-        self._subterms = list(terms)
-        self._costs = 0
-        for t in self._subterms:
-            self._costs += t.costs()
-        self.negated = 0
-
-    def append(self, expression):
-        """ Append another term """
-        self._subterms.append(expression)
-        self._costs += expression.costs()
-
-    def subterms(self):
-        return self._subterms
-    
-    def costs(self):
-        return self._costs
-
-    def __unicode__(self):
-        result = ''
-        for t in self._subterms:
-            result += self.operator + t
-        return u'[' + result[len(self.operator):] + u']'
-
-    def pageFilter(self):
-        """ Return a page filtering function
-
-        This function is used to filter page list before we search it.
-
-        Return a function that gets a page name, and return bool, or None.
-        """
-        # Sort terms by cost, then get all title searches
-        self.sortByCost()
-        terms = [term for term in self._subterms if isinstance(term, TitleSearch)]
-        if terms:
-            # Create and return a filter function
-            def filter(name):
-                """ A function that return True if all terms filter name """
-                for term in terms:
-                    filter = term.pageFilter()
-                    if not filter(name):
-                        return False
-                return True
-            return filter
-        
-        return None
-
-    def sortByCost(self):
-        tmp = [(term.costs(), term) for term in self._subterms]
-        tmp.sort()
-        self._subterms = [item[1] for item in tmp]
-
-    def search(self, page):
-        """ Search for each term, cheap searches first """
-        self.sortByCost()
-        matches = []
-        for term in self._subterms:
-            result = term.search(page)
-            if not result:
-                return None
-            matches.extend(result)
-        return matches
-
-    def highlight_re(self):
-        result = []
-        for s in self._subterms:
-            highlight_re = s.highlight_re()
-            if highlight_re: result.append(highlight_re)
-            
-        return '|'.join(result)
-
-    def xapian_wanted(self):
-        wanted = True
-        for term in self._subterms:
-            wanted = wanted and term.xapian_wanted()
-        return wanted
-
-    def xapian_term(self, request):
-        # sort negated terms
-        terms = []
-        not_terms = []
-        for term in self._subterms:
-            if not term.negated:
-                terms.append(term.xapian_term(request))
-            else:
-                not_terms.append(term.xapian_term(request))
-
-        # prepare query for not negated terms
-        if len(terms) == 1:
-            t1 = Query(terms[0])
-        else:
-            t1 = Query(Query.OP_AND, terms)
-
-        # negated terms?
-        if not not_terms:
-            # no, just return query for not negated terms
-            return t1
-        
-        # yes, link not negated and negated terms' query with a AND_NOT query
-        if len(not_terms) == 1:
-            t2 = Query(not_terms[0])
-        else:
-            t2 = Query(Query.OP_OR, not_terms)
-
-        return Query(Query.OP_AND_NOT, t1, t2)
-
-
-class OrExpression(AndExpression):
-    """ A term connecting several sub terms with a logical OR """
-    
-    operator = ' or '
-
-    def search(self, page):
-        """ Search page with terms, cheap terms first
-
-        XXX Do we have any reason to sort here? we are not breaking out
-        of the search in any case.
-        """
-        self.sortByCost()
-        matches = []
-        for term in self._subterms:
-            result = term.search(page)
-            if result:
-                matches.extend(result)
-        return matches
-
-    def xapian_term(self, request):
-        # XXX: negated terms managed by _moinSearch?
-        return Query(Query.OP_OR, [term.xapian_term(request) for term in self._subterms])
-
-
-class TextSearch(BaseExpression):
-    """ A term that does a normal text search
-
-    Both page content and the page title are searched, using an
-    additional TitleSearch term.
-    """
-    
-    def __init__(self, pattern, use_re=False, case=False):
-        """ Init a text search
-
-        @param pattern: pattern to search for, ascii string or unicode
-        @param use_re: treat pattern as re of plain text, bool
-        @param case: do case sensitive search, bool 
-        """
-        self._pattern = unicode(pattern)
-        self.negated = 0
-        self.use_re = use_re
-        self.case = case
-        self._build_re(self._pattern, use_re=use_re, case=case)
-        self.titlesearch = TitleSearch(self._pattern, use_re=use_re, case=case)
-        
-    def costs(self):
-        return 10000
-    
-    def __unicode__(self):
-        neg = self.negated and '-' or ''
-        return u'%s"%s"' % (neg, unicode(self._pattern))
-
-    def highlight_re(self):
-        return u"(%s)" % self._pattern
-
-    def search(self, page):
-        matches = []
-
-        # Search in page name
-        results = self.titlesearch.search(page)
-        if results:
-            matches.extend(results)
-
-        # Search in page body
-        body = page.get_raw_body()
-        for match in self.search_re.finditer(body):
-            if use_stemming:
-                # somewhere in regular word
-                if body[match.start()] not in config.chars_upper and \
-                        body[match.start()-1] in config.chars_lower:
-                    continue
-
-                post = 0
-                for c in body[match.end():]:
-                    if c in config.chars_lower:
-                        post += 1
-                    else:
-                        break
-
-                matches.append(TextMatch(start=match.start(),
-                        end=match.end()+post))
-            else:
-                matches.append(TextMatch(re_match=match))
-
-        # Decide what to do with the results.
-        if ((self.negated and matches) or
-            (not self.negated and not matches)):
-            return None
-        elif matches:
-            return matches
-        else:
-            # XXX why not return None or empty list?
-            return [Match()]
-
-    def xapian_wanted(self):
-        return not self.use_re
-
-    def xapian_term(self, request):
-        if self.use_re:
-            return None # xapian can't do regex search
-        else:
-            analyzer = Xapian.WikiAnalyzer(language=request.cfg.language_default)
-            terms = self._pattern.split()
-
-            # all parsed wikiwords, AND'ed
-            queries = []
-            stemmed = []
-            for t in terms:
-                if use_stemming:
-                    # stemmed OR not stemmed
-                    tmp = []
-                    for i in analyzer.tokenize(t, flat_stemming=False):
-                        tmp.append(UnicodeQuery(Query.OP_OR, i))
-                        stemmed.append(i[1])
-                    t = tmp
-                else:
-                    # just not stemmed
-                    t = [UnicodeQuery(i) for i in analyzer.tokenize(t)]
-                queries.append(Query(Query.OP_AND, t))
-
-            if stemmed:
-                self._build_re(' '.join(stemmed), use_re=False,
-                        case=self.case, stemmed=True)
-
-            # titlesearch OR parsed wikiwords
-            return Query(Query.OP_OR,
-                    (self.titlesearch.xapian_term(request),
-                        Query(Query.OP_AND, queries)))
-
-
-class TitleSearch(BaseExpression):
-    """ Term searches in pattern in page title only """
-
-    def __init__(self, pattern, use_re=False, case=False):
-        """ Init a title search
-
-        @param pattern: pattern to search for, ascii string or unicode
-        @param use_re: treat pattern as re of plain text, bool
-        @param case: do case sensitive search, bool 
-        """
-        self._pattern = unicode(pattern)
-        self.negated = 0
-        self.use_re = use_re
-        self.case = case
-        self._build_re(self._pattern, use_re=use_re, case=case)
-        
-    def costs(self):
-        return 100
-
-    def __unicode__(self):
-        neg = self.negated and '-' or ''
-        return u'%s!"%s"' % (neg, unicode(self._pattern))
-
-    def highlight_re(self):
-        return u"(%s)" % self._pattern
-
-    def pageFilter(self):
-        """ Page filter function for single title search """
-        def filter(name):
-            match = self.search_re.search(name)
-            if ((self.negated and match) or
-                (not self.negated and not match)):
-                return False
-            return True
-        return filter
-            
-    def search(self, page):
-        # Get matches in page name
-        matches = []
-        for match in self.search_re.finditer(page.page_name):
-            if use_stemming:
-                # somewhere in regular word
-                if page.page_name[match.start()] not in config.chars_upper and \
-                        page.page_name[match.start()-1] in config.chars_lower:
-                    continue
-
-                post = 0
-                for c in page.page_name[match.end():]:
-                    if c in config.chars_lower:
-                        post += 1
-                    else:
-                        break
-
-                matches.append(TitleMatch(start=match.start(),
-                        end=match.end()+post))
-            else:
-                matches.append(TitleMatch(re_match=match))
-        
-        if ((self.negated and matches) or
-            (not self.negated and not matches)):
-            return None
-        elif matches:
-            return matches
-        else:
-            # XXX why not return None or empty list?
-            return [Match()]
-
-    def xapian_wanted(self):
-        return not self.use_re
-
-    def xapian_term(self, request):
-        if self.use_re:
-            return None # xapian doesn't support regex search
-        else:
-            analyzer = Xapian.WikiAnalyzer(language=request.cfg.language_default)
-            terms = self._pattern.split()
-            terms = [list(analyzer.raw_tokenize(t)) for t in terms]
-
-            # all parsed wikiwords, AND'ed
-            queries = []
-            stemmed = []
-            for t in terms:
-                if use_stemming:
-                    # stemmed OR not stemmed
-                    tmp = []
-                    for i in analyzer.tokenize(t, flat_stemming=False):
-                        tmp.append(UnicodeQuery(Query.OP_OR, ['%s%s' %
-                            (Xapian.Index.prefixMap['title'], j) for j in i]))
-                        stemmed.append(i[1])
-                    t = tmp
-                else:
-                    # just not stemmed
-                    t = [UnicodeQuery('%s%s' % (Xapian.Index.prefixMap['title'], i))
-                        for i in analyzer.tokenize(t)]
-
-                queries.append(Query(Query.OP_AND, t))
-
-            if stemmed:
-                self._build_re(' '.join(stemmed), use_re=False,
-                        case=self.case, stemmed=True)
-
-            return Query(Query.OP_AND, queries)
-
-
-class LinkSearch(BaseExpression):
-    """ Search the term in the pagelinks """
-
-    def __init__(self, pattern, use_re=False, case=True):
-        """ Init a link search
-
-        @param pattern: pattern to search for, ascii string or unicode
-        @param use_re: treat pattern as re of plain text, bool
-        @param case: do case sensitive search, bool 
-        """
-        # used for search in links
-        self._pattern = pattern
-        # used for search in text
-        self._textpattern = '(' + self._pattern.replace('/', '|') + ')'
-        self.negated = 0
-        self.use_re = use_re
-        self.case = case
-        self.textsearch = TextSearch(self._textpattern, use_re=1, case=case)
-        self._build_re(unicode(pattern), use_re=use_re, case=case)
-
-    def _build_re(self, pattern, use_re=False, case=False):
-        """ Make a regular expression out of a text pattern """
-        flags = case and re.U or (re.I | re.U)
-        try:
-            if not use_re:
-                raise re.error
-            self.search_re = re.compile(pattern, flags)
-            self.static = False
-        except re.error:
-            self.pattern = pattern
-            self.static = True
-        
-    def costs(self):
-        return 5000 # cheaper than a TextSearch
-
-    def __unicode__(self):
-        neg = self.negated and '-' or ''
-        return u'%s!"%s"' % (neg, unicode(self._pattern))
-
-    def highlight_re(self):
-        return u"(%s)" % self._textpattern
-
-    def search(self, page):
-        # Get matches in page name
-        matches = []
-
-        Found = True
-        
-        for link in page.getPageLinks(page.request):
-            if ((self.static and self.pattern == link) or
-                (not self.static and self.search_re.match(link))):
-                break
-        else:
-            Found = False
-
-        if Found:
-            # Search in page text
-            results = self.textsearch.search(page)
-            if results:
-                matches.extend(results)
-            else: #This happens e.g. for pages that use navigation macros
-                matches.append(TextMatch(0, 0))
-
-        # Decide what to do with the results.
-        if ((self.negated and matches) or
-            (not self.negated and not matches)):
-            return None
-        elif matches:
-            return matches
-        else:
-            # XXX why not return None or empty list?
-            return [Match()]
-
-    def xapian_wanted(self):
-        return not self.use_re
-
-    def xapian_term(self, request):
-        pattern = self.pattern
-        if self.use_re:
-            return None # xapian doesnt support regex search
-        else:
-            return UnicodeQuery('%s:%s' %
-                    (Xapian.Index.prefixMap['linkto'], pattern))
-
-
-class LanguageSearch(BaseExpression):
-    """ Search the pages written in a language """
-
-    def __init__(self, pattern, use_re=False, case=True):
-        """ Init a language search
-
-        @param pattern: pattern to search for, ascii string or unicode
-        @param use_re: treat pattern as re of plain text, bool
-        @param case: do case sensitive search, bool 
-        """
-        # iso language code, always lowercase
-        self._pattern = pattern.lower()
-        self.negated = 0
-        self.use_re = use_re
-        self.case = case
-        self.xapian_called = False
-        self._build_re(self._pattern, use_re=use_re, case=case)
-
-    def costs(self):
-        return 5000 # cheaper than a TextSearch
-
-    def __unicode__(self):
-        neg = self.negated and '-' or ''
-        return u'%s!"%s"' % (neg, unicode(self._pattern))
-
-    def highlight_re(self):
-        return ""
-
-    def search(self, page):
-        # We just use (and trust ;)) xapian for this.. deactivated for _moinSearch
-        if not self.xapian_called:
-            return None
-        else:
-            # XXX why not return None or empty list?
-            return [Match()]
-
-    def xapian_wanted(self):
-        return not self.use_re
-
-    def xapian_term(self, request):
-        pattern = self.pattern
-        if self.use_re:
-            return None # xapian doesnt support regex search
-        else:
-            self.xapian_called = True
-            return UnicodeQuery('%s%s' %
-                    (Xapian.Index.prefixMap['lang'], pattern))
-
-
-############################################################################
-### Results
-############################################################################
-
-class Match(object):
-    """ Base class for all Matches (found pieces of pages).
-    
-    This class represents a empty True value as returned from negated searches.
-    """
-    # Default match weight
-    _weight = 1.0
-    
-    def __init__(self, start=0, end=0, re_match=None):
-        self.re_match = re_match
-        if not re_match:
-            self._start = start
-            self._end = end
-        else:
-            self._start = self._end = 0
-
-    def __len__(self):
-        return self.end - self.start
-
-    def __eq__(self, other):
-        equal = (self.__class__ == other.__class__ and
-                 self.start == other.start and
-                 self.end == other.end)
-        return equal
-        
-    def __ne__(self, other):
-        return not self.__eq__(other)
-
-    def view(self):
-        return ''
-
-    def weight(self):
-        return self._weight
-
-    def _get_start(self):
-        if self.re_match:
-            return self.re_match.start()
-        return self._start
-
-    def _get_end(self):
-        if self.re_match:
-            return self.re_match.end()
-        return self._end
-
-    # object properties
-    start = property(_get_start)
-    end   = property(_get_end)
-
-
-class TextMatch(Match):
-    """ Represents a match in the page content """
-    pass
-
-
-class TitleMatch(Match):
-    """ Represents a match in the page title
-    
-    Has more weight as a match in the page content.
-    """
-    # Matches in titles are much more important in wikis. This setting
-    # seems to make all pages that have matches in the title to appear
-    # before pages that their title does not match.
-    _weight = 100.0
-
-
-class AttachmentMatch(Match):
-    """ Represents a match in a attachment content
-
-    Not used yet.
-    """
-    pass
-
-
-class FoundPage:
-    """ Represents a page in a search result """
-
-    def __init__(self, page_name, matches=None, page=None):
-        self.page_name = page_name
-        self.attachment = '' # this is not an attachment
-        self.page = page
-        if matches is None:
-            matches = []
-        self._matches = matches
-
-    def weight(self, unique=1):
-        """ returns how important this page is for the terms searched for
-
-        Summarize the weight of all page matches
-
-        @param unique: ignore identical matches
-        @rtype: int
-        @return: page weight
-        """
-        weight = 0
-        for match in self.get_matches(unique=unique):
-            weight += match.weight()
-            # More sophisticated things to be added, like increase
-            # weight of near matches.
-        return weight
-
-    def add_matches(self, matches):
-        """ Add found matches """
-        self._matches.extend(matches)
-
-    def get_matches(self, unique=1, sort='start', type=Match):
-        """ Return all matches of type sorted by sort
-
-        @param unique: return only unique matches (bool)
-        @param sort: match attribute to sort by (string)
-        @param type: type of match to return (Match or sub class) 
-        @rtype: list
-        @return: list of matches
-        """
-        if unique:
-            matches = self._unique_matches(type=type)
-            if sort == 'start':
-                # matches already sorted by match.start, finished.
-                return matches
-        else:
-            matches = self._matches
-
-        # Filter by type and sort by sort using fast schwartzian
-        # transform.
-        if sort == 'start':
-            tmp = [(match.start, match) for match in matches
-                   if instance(match, type)]
-        else:
-            tmp = [(match.weight(), match) for match in matches
-                   if instance(match, type)]
-        tmp.sort()
-        if sort == 'weight':
-            tmp.reverse()
-        matches = [item[1] for item in tmp]
-        
-        return matches
-
-    def _unique_matches(self, type=Match):
-        """ Get a list of unique matches of type
-
-        The result is sorted by match.start, because its easy to remove
-        duplicates like this.
-
-        @param type: type of match to return
-        @rtype: list
-        @return: list of matches of type, sorted by match.start
-        """
-        # Filter by type and sort by match.start using fast schwartzian
-        # transform.
-        tmp = [(match.start, match) for match in self._matches
-               if isinstance(match, type)]
-        tmp.sort()
-
-        if not len(tmp):
-            return []
-
-        # Get first match into matches list
-        matches = [tmp[0][1]]
-
-        # Add the remaining ones of matches ignoring identical matches
-        for item in tmp[1:]:
-            if item[1] == matches[-1]:
-                continue
-            matches.append(item[1])
-
-        return matches
-    
-
-class FoundAttachment(FoundPage):
-    """ Represent an attachment in search results """
-    
-    def __init__(self, page_name, attachment, matches=None, page=None):
-        self.page_name = page_name
-        self.attachment = attachment
-        self.page = page
-        if matches is None:
-            matches = []
-        self._matches = matches
-
-    def weight(self, unique=1):
-        return 1
-
-    def get_matches(self, unique=1, sort='start', type=Match):
-        return []
-
-    def _unique_matches(self, type=Match):
-        return []
-
-
-class FoundRemote(FoundPage):
-    """ Represent an attachment in search results """
-    
-    def __init__(self, wikiname, page_name, attachment, matches=None, page=None):
-        self.wikiname = wikiname
-        self.page_name = page_name
-        self.attachment = attachment
-        self.page = page
-        if matches is None:
-            matches = []
-        self._matches = matches
-
-    def weight(self, unique=1):
-        return 1
-
-    def get_matches(self, unique=1, sort='start', type=Match):
-        return []
-
-    def _unique_matches(self, type=Match):
-        return []
-
-
-##############################################################################
-### Parse Query
-##############################################################################
-
-
-class QueryParser:
-    """
-    Converts a String into a tree of Query objects
-    using recursive top/down parsing
-    """
-
-    def __init__(self, **kw):
-        """
-        @keyword titlesearch: treat all terms as title searches
-        @keyword case: do case sensitive search
-        @keyword regex: treat all terms as regular expressions
-        """
-        self.titlesearch = kw.get('titlesearch', 0)
-        self.case = kw.get('case', 0)
-        self.regex = kw.get('regex', 0)
-
-    def parse_query(self, query):
-        """ transform an string into a tree of Query objects """
-        if isinstance(query, str):
-            query = query.decode(config.charset)
-        self._query = query
-        result = self._or_expression()
-        if result is None:
-            result = BaseExpression()
-        return result
-
-    def _or_expression(self):
-        result = self._and_expression()
-        if self._query:
-            result = OrExpression(result)
-        while self._query:
-            q = self._and_expression()
-            if q:
-                result.append(q)
-        return result
-            
-    def _and_expression(self):
-        result = None
-        while not result and self._query:
-            result = self._single_term()
-        term = self._single_term()
-        if term:
-            result = AndExpression(result, term)
-        else:
-            return result
-        term = self._single_term()
-        while term:
-            result.append(term)
-            term = self._single_term()
-        return result
-                                
-    def _single_term(self):
-        regex = (r'(?P<NEG>-?)\s*(' +              # leading '-'
-                 r'(?P<OPS>\(|\)|(or\b(?!$)))|' +  # or, (, )
-                 r'(?P<MOD>(\w+:)*)' +
-                 r'(?P<TERM>("[^"]+")|' +
-                 r"('[^']+')|(\S+)))")             # search word itself
-        self._query = self._query.strip()
-        match = re.match(regex, self._query, re.U)
-        if not match:
-            return None
-        self._query = self._query[match.end():]
-        ops = match.group("OPS")
-        if ops == '(':
-            result = self._or_expression()
-            if match.group("NEG"): result.negate()
-            return result
-        elif ops == ')':
-            return None
-        elif ops == 'or':
-            return None
-        modifiers = match.group('MOD').split(":")[:-1]
-        text = match.group('TERM')
-        if self.isQuoted(text):
-            text = text[1:-1]
-
-        title_search = self.titlesearch
-        regex = self.regex
-        case = self.case
-        linkto = False
-        lang = False
-
-        for m in modifiers:
-            if "title".startswith(m):
-                title_search = True
-            elif "regex".startswith(m):
-                regex = True
-            elif "case".startswith(m):
-                case = True
-            elif "linkto".startswith(m):
-                linkto = True
-            elif "language".startswith(m):
-                lang = True
-
-        if lang:
-            obj = LanguageSearch(text, use_re=regex, case=False)
-        elif linkto:
-            obj = LinkSearch(text, use_re=regex, case=case)
-        elif title_search:
-            obj = TitleSearch(text, use_re=regex, case=case)
-        else:
-            obj = TextSearch(text, use_re=regex, case=case)
-
-        if match.group("NEG"):
-            obj.negate()
-        return obj
-
-    def isQuoted(self, text):
-        # Empty string '' is not considered quoted
-        if len(text) < 3:
-            return False
-        return (text.startswith('"') and text.endswith('"') or
-                text.startswith("'") and text.endswith("'"))
-
-
-############################################################################
-### Search results formatting
-############################################################################
-
-class SearchResults:
-    """ Manage search results, supply different views
-
-    Search results can hold valid search results and format them for
-    many requests, until the wiki content changes.
-
-    For example, one might ask for full page list sorted from A to Z,
-    and then ask for the same list sorted from Z to A. Or sort results
-    by name and then by rank.
-    """
-    # Public functions --------------------------------------------------
-    
-    def __init__(self, query, hits, pages, elapsed):
-        self.query = query # the query
-        self.hits = hits # hits list
-        self.sort = None # hits are unsorted initially
-        self.pages = pages # number of pages in the wiki
-        self.elapsed = elapsed # search time
-
-    def sortByWeight(self):
-        """ Sorts found pages by the weight of the matches """
-        tmp = [(hit.weight(), hit.page_name, hit) for hit in self.hits]
-        tmp.sort()
-        tmp.reverse()
-        self.hits = [item[2] for item in tmp]
-        self.sort = 'weight'
-        
-    def sortByPagename(self):
-        """ Sorts a list of found pages alphabetical by page name """
-        tmp = [(hit.page_name, hit) for hit in self.hits]
-        tmp.sort()
-        self.hits = [item[1] for item in tmp]
-        self.sort = 'page_name'
-        
-    def stats(self, request, formatter):
-        """ Return search statistics, formatted with formatter
-
-        @param request: current request
-        @param formatter: formatter to use
-        @rtype: unicode
-        @return formatted statistics
-        """
-        _ = request.getText
-        output = [
-            formatter.paragraph(1),
-            formatter.text(_("%(hits)d results out of about %(pages)d pages.") %
-                   {'hits': len(self.hits), 'pages': self.pages}),
-            u' (%s)' % formatter.text(_("%.2f seconds") % self.elapsed),
-            formatter.paragraph(0),
-            ]
-        return ''.join(output)
-
-    def pageList(self, request, formatter, info=0, numbered=1):
-        """ Format a list of found pages
-
-        @param request: current request
-        @param formatter: formatter to use
-        @param info: show match info in title
-        @param numbered: use numbered list for display
-        @rtype: unicode
-        @return formatted page list
-        """
-        self._reset(request, formatter)
-        f = formatter
-        write = self.buffer.write
-        if numbered:
-            list = f.number_list
-        else:
-            list = f.bullet_list
-
-        # Add pages formatted as list
-        if self.hits:
-            write(list(1))
-
-            for page in self.hits:
-                if page.attachment:
-                    querydict = {
-                        'action': 'AttachFile',
-                        'do': 'get',
-                        'target': page.attachment,
-                    }
-                else:
-                    querydict = None
-                querystr = self.querystring(querydict)
-            
-                matchInfo = ''
-                if info:
-                    matchInfo = self.formatInfo(f, page)
-                item = [
-                    f.listitem(1),
-                    f.pagelink(1, page.page_name, querystr=querystr),
-                    self.formatTitle(page),
-                    f.pagelink(0, page.page_name),
-                    matchInfo,
-                    f.listitem(0),
-                    ]
-                write(''.join(item))
-            write(list(0))
-
-        return self.getvalue()
-
-    def pageListWithContext(self, request, formatter, info=1, context=180,
-                            maxlines=1):
-        """ Format a list of found pages with context
-
-        The default parameter values will create Google-like search
-        results, as this is the most known search interface. Good
-        interface is familiar interface, so unless we have much better
-        solution (we don't), being like Google is the way.
-
-        @param request: current request
-        @param formatter: formatter to use
-        @param info: show match info near the page link
-        @param context: how many characters to show around each match. 
-        @param maxlines: how many contexts lines to show. 
-        @rtype: unicode
-        @return formatted page list with context
-        """
-        self._reset(request, formatter)
-        f = formatter
-        write = self.buffer.write
-        
-        # Add pages formatted as definition list
-        if self.hits:
-            write(f.definition_list(1))       
-
-            for page in self.hits:
-                matchInfo = ''
-                if info:
-                    matchInfo = self.formatInfo(f, page)
-                if page.attachment:
-                    fmt_context = ""
-                    querydict = {
-                        'action': 'AttachFile',
-                        'do': 'get',
-                        'target': page.attachment,
-                    }
-                elif page.page_name.startswith('FS/'): # XXX FS hardcoded
-                    fmt_context = ""
-                    querydict = None
-                else:
-                    fmt_context = self.formatContext(page, context, maxlines)
-                    querydict = None
-                querystr = self.querystring(querydict)
-                item = [
-                    f.definition_term(1),
-                    f.pagelink(1, page.page_name, querystr=querystr),
-                    self.formatTitle(page),
-                    f.pagelink(0, page.page_name),
-                    matchInfo,
-                    f.definition_term(0),
-                    f.definition_desc(1),
-                    fmt_context,
-                    f.definition_desc(0),
-                    ]
-                write(''.join(item))
-            write(f.definition_list(0))
-        
-        return self.getvalue()
-
-    # Private -----------------------------------------------------------
-
-    # This methods are not meant to be used by clients and may change
-    # without notice.
-    
-    def formatContext(self, page, context, maxlines):
-        """ Format search context for each matched page
-
-        Try to show first maxlines interesting matches context.
-        """
-        f = self.formatter
-        if not page.page:
-            page.page = Page(self.request, page.page_name)
-        body = page.page.get_raw_body()
-        last = len(body) - 1
-        lineCount = 0
-        output = []
-        
-        # Get unique text matches sorted by match.start, try to ignore
-        # matches in page header, and show the first maxlines matches.
-        # TODO: when we implement weight algorithm for text matches, we
-        # should get the list of text matches sorted by weight and show
-        # the first maxlines matches.
-        matches = page.get_matches(unique=1, sort='start', type=TextMatch)
-        i, start = self.firstInterestingMatch(page, matches)            
-
-        # Format context
-        while i < len(matches) and lineCount < maxlines:
-            match = matches[i]
-            
-            # Get context range for this match
-            start, end = self.contextRange(context, match, start, last)
-
-            # Format context lines for matches. Each complete match in
-            # the context will be highlighted, and if the full match is
-            # in the context, we increase the index, and will not show
-            # same match again on a separate line.
-
-            output.append(f.text(u'...'))
-            
-            # Get the index of the first match completely within the
-            # context.
-            for j in xrange(0, len(matches)):
-                if matches[j].start >= start:
-                    break
-
-            # Add all matches in context and the text between them 
-            while True:
-                match = matches[j]
-                # Ignore matches behind the current position
-                if start < match.end:
-                    # Append the text before match
-                    if start < match.start:
-                        output.append(f.text(body[start:match.start]))
-                    # And the match
-                    output.append(self.formatMatch(body, match, start))
-                    start = match.end
-                # Get next match, but only if its completely within the context
-                if j < len(matches) - 1 and matches[j + 1].end <= end:
-                    j += 1
-                else:
-                    break
-
-            # Add text after last match and finish the line
-            if match.end < end:
-               output.append(f.text(body[match.end:end]))
-            output.append(f.text(u'...'))
-            output.append(f.linebreak(preformatted=0))
-
-            # Increase line and point to the next match
-            lineCount += 1
-            i = j + 1
-
-        output = ''.join(output)
-
-        if not output:
-            # Return the first context characters from the page text
-            output = f.text(page.page.getPageText(length=context))
-            output = output.strip()
-            if not output:
-                # This is a page with no text, only header, for example,
-                # a redirect page.
-                output = f.text(page.page.getPageHeader(length=context))
-        
-        return output
-        
-    def firstInterestingMatch(self, page, matches):
-        """ Return the first interesting match
-
-        This function is needed only because we don't have yet a weight
-        algorithm for page text matches.
-        
-        Try to find the first match in the page text. If we can't find
-        one, we return the first match and start=0.
-
-        @rtype: tuple
-        @return: index of first match, start of text
-        """
-        header = page.page.getPageHeader()
-        start = len(header)
-        # Find first match after start
-        for i in xrange(len(matches)):
-            if matches[i].start >= start:
-                return i, start
-        return 0, 0
-
-    def contextRange(self, context, match, start, last):
-        """ Compute context range
-
-        Add context around each match. If there is no room for context
-        before or after the match, show more context on the other side.
-
-        @param context: context length
-        @param match: current match
-        @param start: context should not start before that index, unless
-                      end is past the last character.
-        @param last: last character index
-        @rtype: tuple
-        @return: start, end of context
-        """
-        # Start by giving equal context on both sides of match
-        contextlen = max(context - len(match), 0)
-        cstart = match.start - contextlen / 2
-        cend = match.end + contextlen / 2
-
-        # If context start before start, give more context on end
-        if cstart < start:
-            cend += start - cstart
-            cstart = start
-            
-        # But if end if after last, give back context to start
-        if cend > last:
-            cstart -= cend - last
-            cend = last
-
-        # Keep context start positive for very short texts
-        cstart = max(cstart, 0)
-
-        return cstart, cend
-
-    def formatTitle(self, page):
-        """ Format page title
-
-        Invoke format match on all unique matches in page title.
-
-        @param page: found page
-        @rtype: unicode
-        @return: formatted title
-        """
-        # Get unique title matches sorted by match.start
-        matches = page.get_matches(unique=1, sort='start', type=TitleMatch)
-        
-        # Format
-        pagename = page.page_name
-        f = self.formatter
-        output = []
-        start = 0
-        for match in matches:
-            # Ignore matches behind the current position
-            if start < match.end:
-                # Append the text before the match
-                if start < match.start:
-                    output.append(f.text(pagename[start:match.start]))
-                # And the match
-                output.append(self.formatMatch(pagename, match, start))
-                start = match.end
-        # Add text after match
-        if start < len(pagename):
-            output.append(f.text(pagename[start:]))
-        
-        if page.attachment: # show the attachment that matched
-            output.extend([
-                    " ",
-                    f.strong(1),
-                    f.text("(%s)" % page.attachment),
-                    f.strong(0)])
-
-        return ''.join(output)
-
-    def formatMatch(self, body, match, location):
-        """ Format single match in text
-
-        Format the part of the match after the current location in the
-        text. Matches behind location are ignored and an empty string is
-        returned.
-
-        @param body: text containing match
-        @param match: search match in text
-        @param location: current location in text
-        @rtype: unicode
-        @return: formatted match or empty string
-        """        
-        start = max(location, match.start)
-        if start < match.end:
-            f = self.formatter
-            output = [
-                f.strong(1),
-                f.text(body[start:match.end]),
-                f.strong(0),
-                ]
-            return ''.join(output)
-        return ''
-
-    def querystring(self, querydict=None):
-        """ Return query string, used in the page link """
-        if querydict is None:
-            querydict = {'highlight': self.query.highlight_re()}
-        querystr = wikiutil.makeQueryString(querydict)
-        #querystr = wikiutil.escape(querystr)
-        return querystr
-
-    def formatInfo(self, formatter, page):
-        """ Return formatted match info """
-        template = u' . . . %s %s'
-        template = u"%s%s%s" % (formatter.span(1, css_class="info"),
-                                template,
-                                formatter.span(0))
-        # Count number of unique matches in text of all types
-        count = len(page.get_matches(unique=1))
-        info = template % (count, self.matchLabel[count != 1])
-        return info
-
-    def getvalue(self):
-        """ Return output in div with CSS class """
-        write = self.request.write
-        value = [
-            self.formatter.div(1, css_class='searchresults'),
-            self.buffer.getvalue(),
-            self.formatter.div(0),
-            ]
-        return '\n'.join(value)
-
-    def _reset(self, request, formatter):
-        """ Update internal state before new output
-
-        Do not call this, it should be called only by the instance code.
-
-        Each request might need different translations or other user
-        preferences.
-        """
-        self.buffer = StringIO.StringIO()
-        self.formatter = formatter
-        self.request = request
-        # Use 1 match, 2 matches...
-        _ = request.getText    
-        self.matchLabel = (_('match'), _('matches'))
-
-
-##############################################################################
-### Searching
-##############################################################################
-
-class Search:
-    """ A search run """
-    
-    def __init__(self, request, query):
-        self.request = request
-        self.query = query
-        self.filtered = False
-        self.fs_rootpage = "FS" # XXX FS hardcoded
-
-    def run(self):
-        """ Perform search and return results object """
-        start = time.time()
-        if self.request.cfg.xapian_search:
-            hits = self._xapianSearch()
-        else:
-            hits = self._moinSearch()
-            
-        # important - filter deleted pages or pages the user may not read!
-        if not self.filtered:
-            hits = self._filter(hits)
-        
-        result_hits = []
-        for wikiname, page, attachment, match in hits:
-            if wikiname in (self.request.cfg.interwikiname, 'Self'): # a local match
-                if attachment:
-                    result_hits.append(FoundAttachment(page.page_name, attachment))
-                else:
-                    result_hits.append(FoundPage(page.page_name, match))
-            else:
-                result_hits.append(FoundRemote(wikiname, page, attachment, match))
-        elapsed = time.time() - start
-        count = self.request.rootpage.getPageCount()
-        return SearchResults(self.query, result_hits, count, elapsed)
-
-    # ----------------------------------------------------------------
-    # Private!
-
-    def _xapianSearch(self):
-        """ Search using Xapian
-        
-        Get a list of pages using fast xapian search and
-        return moin search in those pages.
-        """
-        pages = None
-        try:
-            index = Xapian.Index(self.request)
-        except NameError:
-            index = None
-        if index and index.exists() and self.query.xapian_wanted():
-            self.request.clock.start('_xapianSearch')
-            try:
-                from MoinMoin.support import xapwrap
-                query = self.query.xapian_term(self.request)
-                self.request.log("xapianSearch: query = %r" %
-                        query.get_description())
-                query = xapwrap.index.QObjQuery(query)
-                hits = index.search(query)
-                self.request.log("xapianSearch: finds: %r" % hits)
-                def dict_decode(d):
-                    """ decode dict values to unicode """
-                    for k, v in d.items():
-                        d[k] = d[k].decode(config.charset)
-                    return d
-                pages = [dict_decode(hit['values']) for hit in hits]
-                self.request.log("xapianSearch: finds pages: %r" % pages)
-            except index.LockedException:
-                pass
-            self.request.clock.stop('_xapianSearch')
-        return self._moinSearch(pages)
-
-    def _moinSearch(self, pages=None):
-        """ Search pages using moin's built-in full text search 
-        
-        Return list of tuples (page, match). The list may contain
-        deleted pages or pages the user may not read.
-        """
-        self.request.clock.start('_moinSearch')
-        from MoinMoin.Page import Page
-        if pages is None:
-            # if we are not called from _xapianSearch, we make a full pagelist,
-            # but don't search attachments (thus attachment name = '')
-            pages = [{'pagename': p, 'attachment': '', 'wikiname': 'Self', } for p in self._getPageList()]
-        hits = []
-        fs_rootpage = self.fs_rootpage
-        for valuedict in pages:
-            wikiname = valuedict['wikiname']
-            pagename = valuedict['pagename']
-            attachment = valuedict['attachment']
-            if wikiname in (self.request.cfg.interwikiname, 'Self'): # THIS wiki
-                page = Page(self.request, pagename)
-                if attachment:
-                    if pagename == fs_rootpage: # not really an attachment
-                        page = Page(self.request, "%s/%s" % (fs_rootpage, attachment))
-                        hits.append((wikiname, page, None, None))
-                    else:
-                        hits.append((wikiname, page, attachment, None))
-                else:
-                    match = self.query.search(page)
-                    if match:
-                        hits.append((wikiname, page, attachment, match))
-            else: # other wiki
-                hits.append((wikiname, pagename, attachment, None))
-        self.request.clock.stop('_moinSearch')
-        return hits
-
-    def _getPageList(self):
-        """ Get list of pages to search in 
-        
-        If the query has a page filter, use it to filter pages before
-        searching. If not, get a unfiltered page list. The filtering
-        will happen later on the hits, which is faster with current
-        slow storage.
-        """
-        filter = self.query.pageFilter()
-        if filter:
-            # There is no need to filter the results again.
-            self.filtered = True
-            return self.request.rootpage.getPageList(filter=filter)
-        else:
-            return self.request.rootpage.getPageList(user='', exists=0)
-        
-    def _filter(self, hits):
-        """ Filter out deleted or acl protected pages """
-        userMayRead = self.request.user.may.read
-        fs_rootpage = self.fs_rootpage + "/"
-        thiswiki = (self.request.cfg.interwikiname, 'Self')
-        filtered = [(wikiname, page, attachment, match) for wikiname, page, attachment, match in hits
-                    if not wikiname in thiswiki or
-                       page.exists() and userMayRead(page.page_name) or
-                       page.page_name.startswith(fs_rootpage)]    
-        return filtered
-        
-        
-def searchPages(request, query, **kw):
-    """ Search the text of all pages for query.
-    
-    @param request: current request
-    @param query: the expression we want to search for
-    @rtype: SearchResults instance
-    @return: search results
-    """
-    return Search(request, query).run()
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/search/Xapian.py	Sun Jul 09 22:39:15 2006 +0200
@@ -0,0 +1,482 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - xapian indexing search engine
+
+    @copyright: 2006 MoinMoin:ThomasWaldmann,
+                2006 MoinMoin:FranzPletz
+    @license: GNU GPL, see COPYING for details.
+"""
+debug = True
+
+import sys, os, re, codecs, time
+from pprint import pprint
+
+import xapian
+from xapian import Query
+from MoinMoin.support.xapwrap import document as xapdoc
+from MoinMoin.support.xapwrap import index as xapidx
+from MoinMoin.parser.text_moin_wiki import Parser as WikiParser
+
+from MoinMoin.Page import Page
+from MoinMoin import config, wikiutil
+from MoinMoin.search.builtin import BaseIndex
+
+try:
+    # PyStemmer, snowball python bindings from http://snowball.tartarus.org/
+    from Stemmer import Stemmer
+except ImportError:
+    Stemmer = None
+
+class UnicodeQuery(xapian.Query):
+    def __init__(self, *args, **kwargs):
+        self.encoding = kwargs.get('encoding', config.charset)
+
+        nargs = []
+        for term in args:
+            if isinstance(term, unicode):
+                term = term.encode(self.encoding)
+            elif isinstance(term, list) or isinstance(term, tuple):
+                term = [t.encode(self.encoding) for t in term]
+            nargs.append(term)
+
+        xapian.Query.__init__(self, *nargs, **kwargs)
+
+
+##############################################################################
+### Tokenizer
+##############################################################################
+
+def getWikiAnalyzerFactory(request=None, language='en'):
+    return (lambda: WikiAnalyzer(request, language))
+
+class WikiAnalyzer:
+    singleword = r"[%(u)s][%(l)s]+" % {
+                     'u': config.chars_upper,
+                     'l': config.chars_lower,
+                 }
+
+    singleword_re = re.compile(singleword, re.U)
+    wikiword_re = re.compile(WikiParser.word_rule, re.U)
+
+    token_re = re.compile(
+        r"(?P<company>\w+[&@]\w+)|" + # company names like AT&T and Excite@Home.
+        r"(?P<email>\w+([.-]\w+)*@\w+([.-]\w+)*)|" +    # email addresses
+        r"(?P<hostname>\w+(\.\w+)+)|" +                 # hostnames
+        r"(?P<num>(\w+[-/.,])*\w*\d\w*([-/.,]\w+)*)|" + # version numbers
+        r"(?P<acronym>(\w\.)+)|" +          # acronyms: U.S.A., I.B.M., etc.
+        r"(?P<word>\w+)",                   # words (including WikiWords)
+        re.U)
+
+    dot_re = re.compile(r"[-_/,.]")
+    mail_re = re.compile(r"[-_/,.]|(@)")
+    
+    # XXX limit stuff above to xapdoc.MAX_KEY_LEN
+    # WORD_RE = re.compile('\\w{1,%i}' % MAX_KEY_LEN, re.U)
+
+    def __init__(self, request=None, language=None):
+        if request and request.cfg.xapian_stemming and language:
+            self.stemmer = Stemmer(language)
+        else:
+            self.stemmer = None
+
+    def raw_tokenize(self, value):
+        def enc(uc):
+            """ 'encode' unicode results into whatever xapian / xapwrap wants """
+            lower = uc.lower()
+            return lower
+            
+        if isinstance(value, list): # used for page links
+            for v in value:
+                yield (enc(v), 0)
+        else:
+            tokenstream = re.finditer(self.token_re, value)
+            for m in tokenstream:
+                if m.group("acronym"):
+                    yield (enc(m.group("acronym").replace('.', '')),
+                            m.start())
+                elif m.group("company"):
+                    yield (enc(m.group("company")), m.start())
+                elif m.group("email"):
+                    displ = 0
+                    for word in self.mail_re.split(m.group("email")):
+                        if word:
+                            yield (enc(word), m.start() + displ)
+                            displ += len(word) + 1
+                elif m.group("hostname"):
+                    displ = 0
+                    for word in self.dot_re.split(m.group("hostname")):
+                        yield (enc(word), m.start() + displ)
+                        displ += len(word) + 1
+                elif m.group("num"):
+                    displ = 0
+                    for word in self.dot_re.split(m.group("num")):
+                        yield (enc(word), m.start() + displ)
+                        displ += len(word) + 1
+                elif m.group("word"):
+                    word = m.group("word")
+                    yield (enc(word), m.start())
+                    # if it is a CamelCaseWord, we additionally yield Camel, Case and Word
+                    if self.wikiword_re.match(word):
+                        for sm in re.finditer(self.singleword_re, word):
+                            yield (enc(sm.group()), m.start() + sm.start())
+
+    def tokenize(self, value, flat_stemming=True):
+        """Yield a stream of lower cased raw and stemmed (optional) words from a string.
+           value must be an UNICODE object or a list of unicode objects
+        """
+        for word, pos in self.raw_tokenize(value):
+            if flat_stemming:
+                # XXX: should we really use a prefix for that?
+                # Index.prefixMap['raw'] + i
+                yield (word, pos)
+                if self.stemmer:
+                    yield (self.stemmer.stemWord(word), pos)
+            else:
+                yield (word, self.stemmer.stemWord(word), pos)
+
+
+#############################################################################
+### Indexing
+#############################################################################
+
+class Index(BaseIndex):
+    indexValueMap = {
+        # mapping the value names we can easily fetch from the index to
+        # integers required by xapian. 0 and 1 are reserved by xapwrap!
+        'pagename': 2,
+        'attachment': 3,
+        'mtime': 4,
+        'wikiname': 5,
+    }
+    prefixMap = {
+        # http://svn.xapian.org/*checkout*/trunk/xapian-applications/omega/docs/termprefixes.txt
+        'author': 'A',
+        'date':   'D', # numeric format: YYYYMMDD or "latest" - e.g. D20050224 or Dlatest
+                       #G   newsGroup (or similar entity - e.g. a web forum name)
+        'hostname': 'H',
+        'keyword': 'K',
+        'lang': 'L',   # ISO Language code
+                       #M   Month (numeric format: YYYYMM)
+                       #N   ISO couNtry code (or domaiN name)
+                       #P   Pathname
+                       #Q   uniQue id
+        'raw':  'R',   # Raw (i.e. unstemmed) term
+        'title': 'S',  # Subject (or title)
+        'mimetype': 'T',
+        'url': 'U',    # full URL of indexed document - if the resulting term would be > 240
+                       # characters, a hashing scheme is used to prevent overflowing
+                       # the Xapian term length limit (see omindex for how to do this).
+                       #W   "weak" (approximately 10 day intervals, taken as YYYYMMD from
+                       #  the D term, and changing the last digit to a '2' if it's a '3')
+                       #X   longer prefix for user-defined use
+        'linkto': 'XLINKTO', # this document links to that document
+        'stem_lang': 'XSTEMLANG', # ISO Language code this document was stemmed in 
+                       #Y   year (four digits)
+    }
+
+    def __init__(self, request):
+        BaseIndex.__init__(self, request)
+
+        # Check if we should and can stem words
+        if request.cfg.xapian_stemming and not Stemmer:
+            request.cfg.xapian_stemming = False
+
+    def _main_dir(self):
+        if self.request.cfg.xapian_index_dir:
+            return os.path.join(self.request.cfg.xapian_index_dir,
+                    self.request.cfg.siteid)
+        else:
+            return os.path.join(self.request.cfg.cache_dir, 'xapian')
+
+    def _search(self, query):
+        """ read lock must be acquired """
+        while True:
+            try:
+                searcher, timestamp = self.request.cfg.xapian_searchers.pop()
+                if timestamp != self.mtime():
+                    searcher.close()
+                else:
+                    break
+            except IndexError:
+                searcher = xapidx.ReadOnlyIndex(self.dir)
+                searcher.configure(self.prefixMap, self.indexValueMap)
+                timestamp = self.mtime()
+                break
+        
+        hits = searcher.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname'])
+        self.request.cfg.xapian_searchers.append((searcher, timestamp))
+        return hits
+    
+    def _do_queued_updates(self, request, amount=5):
+        """ Assumes that the write lock is acquired """
+        writer = xapidx.Index(self.dir, True)
+        writer.configure(self.prefixMap, self.indexValueMap)
+        pages = self.queue.pages()[:amount]
+        for name in pages:
+            p = Page(request, name)
+            self._index_page(writer, p, mode='update')
+            self.queue.remove([name])
+        writer.close()
+
+    def allterms(self):
+        db = xapidx.ExceptionTranslater.openIndex(True, self.dir)
+        i = db.allterms_begin()
+        while i != db.allterms_end():
+            yield i.get_term()
+            i.next()
+
+    def termpositions(self, uid, term):
+        db = xapidx.ExceptionTranslater.openIndex(True, self.dir)
+        pos = db.positionlist_begin(uid, term)
+        while pos != db.positionlist_end(uid, term):
+            yield pos.get_termpos()
+            pos.next()
+
+    def _index_file(self, request, writer, filename, mode='update'):
+        """ index a file as it were a page named pagename
+            Assumes that the write lock is acquired
+        """
+        fs_rootpage = 'FS' # XXX FS hardcoded
+        try:
+            wikiname = request.cfg.interwikiname or 'Self'
+            itemid = "%s:%s" % (wikiname, os.path.join(fs_rootpage, filename))
+            mtime = os.path.getmtime(filename)
+            mtime = wikiutil.timestamp2version(mtime)
+            if mode == 'update':
+                query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', itemid))
+                docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
+                if docs:
+                    doc = docs[0] # there should be only one
+                    uid = doc['uid']
+                    docmtime = long(doc['values']['mtime'])
+                    updated = mtime > docmtime
+                    if debug: request.log("uid %r: mtime %r > docmtime %r == updated %r" % (uid, mtime, docmtime, updated))
+                else:
+                    uid = None
+                    updated = True
+            elif mode == 'add':
+                updated = True
+            if debug: request.log("%s %r" % (filename, updated))
+            if updated:
+                xitemid = xapdoc.Keyword('itemid', itemid)
+                mimetype, file_content = self.contentfilter(filename)
+                xwname = xapdoc.SortKey('wikiname', request.cfg.interwikiname or "Self")
+                xpname = xapdoc.SortKey('pagename', fs_rootpage)
+                xattachment = xapdoc.SortKey('attachment', filename) # XXX we should treat files like real pages, not attachments
+                xmtime = xapdoc.SortKey('mtime', mtime)
+                title = " ".join(os.path.join(fs_rootpage, filename).split("/"))
+                xtitle = xapdoc.Keyword('title', title)
+                xmimetype = xapdoc.TextField('mimetype', mimetype, True)
+                xcontent = xapdoc.TextField('content', file_content)
+                doc = xapdoc.Document(textFields=(xcontent, xmimetype, ),
+                                      keywords=(xtitle, xitemid, ),
+                                      sortFields=(xpname, xattachment, xmtime, xwname, ),
+                                     )
+                doc.analyzerFactory = getWikiAnalyzerFactory()
+                if mode == 'update':
+                    if debug: request.log("%s (replace %r)" % (filename, uid))
+                    doc.uid = uid
+                    id = writer.index(doc)
+                elif mode == 'add':
+                    if debug: request.log("%s (add)" % (filename,))
+                    id = writer.index(doc)
+        except (OSError, IOError), err:
+            pass
+
+    def _get_languages(self, page):
+        body = page.get_raw_body()
+        default_lang = page.request.cfg.language_default
+
+        lang = ''
+
+        if page.request.cfg.xapian_stemming:
+            for line in body.split('\n'):
+                if line.startswith('#language'):
+                    lang = line.split(' ')[1]
+                    try:
+                        Stemmer(lang)
+                    except KeyError:
+                        # lang is not stemmable
+                        break
+                    else:
+                        # lang is stemmable
+                        return (lang, lang)
+                elif not line.startswith('#'):
+                    break
+        
+        if not lang:
+            # no lang found at all.. fallback to default language
+            lang = default_lang
+
+        # return actual lang and lang to stem in
+        return (lang, default_lang)
+
+    def _index_page(self, writer, page, mode='update'):
+        """ Index a page - assumes that the write lock is acquired
+            @arg writer: the index writer object
+            @arg page: a page object
+            @arg mode: 'add' = just add, no checks
+                       'update' = check if already in index and update if needed (mtime)
+            
+        """
+        request = page.request
+        wikiname = request.cfg.interwikiname or "Self"
+        pagename = page.page_name
+        mtime = page.mtime_usecs()
+        itemid = "%s:%s" % (wikiname, pagename)
+        # XXX: Hack until we get proper metadata
+        language, stem_language = self._get_languages(page)
+        updated = False
+
+        if mode == 'update':
+            # from #xapian: if you generate a special "unique id" term,
+            # you can just call database.replace_document(uid_term, doc)
+            # -> done in xapwrap.index.Index.index()
+            query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', itemid))
+            docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
+            if docs:
+                doc = docs[0] # there should be only one
+                uid = doc['uid']
+                docmtime = long(doc['values']['mtime'])
+                updated = mtime > docmtime
+                if debug: request.log("uid %r: mtime %r > docmtime %r == updated %r" % (uid, mtime, docmtime, updated))
+            else:
+                uid = None
+                updated = True
+        elif mode == 'add':
+            updated = True
+        if debug: request.log("%s %r" % (pagename, updated))
+        if updated:
+            xwname = xapdoc.SortKey('wikiname', request.cfg.interwikiname or "Self")
+            xpname = xapdoc.SortKey('pagename', pagename)
+            xattachment = xapdoc.SortKey('attachment', '') # this is a real page, not an attachment
+            xmtime = xapdoc.SortKey('mtime', mtime)
+            xtitle = xapdoc.TextField('title', pagename, True) # prefixed
+            xkeywords = [xapdoc.Keyword('itemid', itemid),
+                    xapdoc.Keyword('lang', language),
+                    xapdoc.Keyword('stem_lang', stem_language)]
+            for pagelink in page.getPageLinks(request):
+                xkeywords.append(xapdoc.Keyword('linkto', pagelink))
+            xcontent = xapdoc.TextField('content', page.get_raw_body())
+            doc = xapdoc.Document(textFields=(xcontent, xtitle),
+                                  keywords=xkeywords,
+                                  sortFields=(xpname, xattachment, xmtime, xwname, ),
+                                 )
+            doc.analyzerFactory = getWikiAnalyzerFactory(request,
+                    stem_language)
+
+            if mode == 'update':
+                if debug: request.log("%s (replace %r)" % (pagename, uid))
+                doc.uid = uid
+                id = writer.index(doc)
+            elif mode == 'add':
+                if debug: request.log("%s (add)" % (pagename,))
+                id = writer.index(doc)
+
+        from MoinMoin.action import AttachFile
+
+        attachments = AttachFile._get_files(request, pagename)
+        for att in attachments:
+            filename = AttachFile.getFilename(request, pagename, att)
+            att_itemid = "%s//%s" % (itemid, att)
+            mtime = wikiutil.timestamp2version(os.path.getmtime(filename))
+            if mode == 'update':
+                query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', att_itemid))
+                docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', ])
+                if debug: request.log("##%r %r" % (filename, docs))
+                if docs:
+                    doc = docs[0] # there should be only one
+                    uid = doc['uid']
+                    docmtime = long(doc['values']['mtime'])
+                    updated = mtime > docmtime
+                    if debug: request.log("uid %r: mtime %r > docmtime %r == updated %r" % (uid, mtime, docmtime, updated))
+                else:
+                    uid = None
+                    updated = True
+            elif mode == 'add':
+                updated = True
+            if debug: request.log("%s %s %r" % (pagename, att, updated))
+            if updated:
+                xatt_itemid = xapdoc.Keyword('itemid', att_itemid)
+                xpname = xapdoc.SortKey('pagename', pagename)
+                xattachment = xapdoc.SortKey('attachment', att) # this is an attachment, store its filename
+                xmtime = xapdoc.SortKey('mtime', mtime)
+                xtitle = xapdoc.Keyword('title', '%s/%s' % (pagename, att))
+                xlanguage = xapdoc.Keyword('lang', language)
+                xstem_language = xapdoc.Keyword('stem_lang', stem_language)
+                mimetype, att_content = self.contentfilter(filename)
+                xmimetype = xapdoc.TextField('mimetype', mimetype, True)
+                xcontent = xapdoc.TextField('content', att_content)
+                doc = xapdoc.Document(textFields=(xcontent, xmimetype, ),
+                                      keywords=(xatt_itemid, xtitle, xlanguage, xstem_language, ),
+                                      sortFields=(xpname, xattachment, xmtime, xwname, ),
+                                     )
+                doc.analyzerFactory = getWikiAnalyzerFactory(request,
+                        stem_language)
+                if mode == 'update':
+                    if debug: request.log("%s (replace %r)" % (pagename, uid))
+                    doc.uid = uid
+                    id = writer.index(doc)
+                elif mode == 'add':
+                    if debug: request.log("%s (add)" % (pagename,))
+                    id = writer.index(doc)
+        #writer.flush()
+
+    def _index_pages(self, request, files=None, mode='update'):
+        """ Index all pages (and all given files)
+        
+        This should be called from indexPages or indexPagesInNewThread only!
+        
+        This may take some time, depending on the size of the wiki and speed
+        of the machine.
+
+        When called in a new thread, lock is acquired before the call,
+        and this method must release it when it finishes or fails.
+        """
+        try:
+            writer = xapidx.Index(self.dir, True)
+            writer.configure(self.prefixMap, self.indexValueMap)
+            pages = request.rootpage.getPageList(user='', exists=1)
+            request.log("indexing all (%d) pages..." % len(pages))
+            for pagename in pages:
+                p = Page(request, pagename)
+                self._index_page(writer, p, mode)
+            if files:
+                request.log("indexing all files...")
+                for fname in files:
+                    fname = fname.strip()
+                    self._index_file(request, writer, fname, mode)
+            writer.close()
+        finally:
+            writer.__del__()
+
+def run_query(query, db):
+    enquire = xapian.Enquire(db)
+    parser = xapian.QueryParser()
+    query = parser.parse_query(query, xapian.QueryParser.FLAG_WILDCARD)
+    print query.get_description()
+    enquire.set_query(query)
+    return enquire.get_mset(0, 10)
+
+def run(request):
+    pass
+    #print "Begin"
+    #db = xapian.WritableDatabase(xapian.open('test.db',
+    #                                         xapian.DB_CREATE_OR_OPEN))
+    #
+    # index_data(db) ???
+    #del db
+    #mset = run_query(sys.argv[1], db)
+    #print mset.get_matches_estimated()
+    #iterator = mset.begin()
+    #while iterator != mset.end():
+    #    print iterator.get_document().get_data()
+    #    iterator.next()
+    #for i in xrange(1,170):
+    #    doc = db.get_document(i)
+    #    print doc.get_data()
+
+if __name__ == '__main__':
+    run()
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/search/__init__.py	Sun Jul 09 22:39:15 2006 +0200
@@ -0,0 +1,27 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - search engine
+    
+    @copyright: 2005 MoinMoin:FlorianFesti,
+                2005 MoinMoin:NirSoffer,
+                2005 MoinMoin:AlexanderSchremmer,
+                2006 MoinMoin:ThomasWaldmann,
+                2006 MoinMoin:FranzPletz
+    @license: GNU GPL, see COPYING for details
+"""
+
+from MoinMoin.search.queryparser import QueryParser
+from MoinMoin.search.builtin import Search
+
+def searchPages(request, query, **kw):
+    """ Search the text of all pages for query.
+    
+    @param request: current request
+    @param query: the expression (string or query objects) we want to search for
+    @rtype: SearchResults instance
+    @return: search results
+    """
+    if isinstance(query, str) or isinstance(query, unicode):
+        query = QueryParser(**kw).parse_query(query)
+    return Search(request, query).run()
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/search/builtin.py	Sun Jul 09 22:39:15 2006 +0200
@@ -0,0 +1,504 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - search engine
+    
+    @copyright: 2005 MoinMoin:FlorianFesti,
+                2005 MoinMoin:NirSoffer,
+                2005 MoinMoin:AlexanderSchremmer,
+                2006 MoinMoin:ThomasWaldmann,
+                2006 MoinMoin:FranzPletz
+    @license: GNU GPL, see COPYING for details
+"""
+
+import time, sys, os, errno
+from MoinMoin import wikiutil, config
+from MoinMoin.Page import Page
+from MoinMoin.util import filesys, lock
+from MoinMoin.search.results import getSearchResults
+from MoinMoin.search.queryparser import TextMatch, TitleMatch
+
+##############################################################################
+# Search Engine Abstraction
+##############################################################################
+
+class UpdateQueue:
+    def __init__(self, file, lock_dir):
+        self.file = file
+        self.writeLock = lock.WriteLock(lock_dir, timeout=10.0)
+        self.readLock = lock.ReadLock(lock_dir, timeout=10.0)
+
+    def exists(self):
+        return os.path.exists(self.file)
+
+    def append(self, pagename):
+        """ Append a page to queue """
+        if not self.writeLock.acquire(60.0):
+            request.log("can't add %r to xapian update queue: can't lock queue" %
+                        pagename)
+            return
+        try:
+            f = codecs.open(self.file, 'a', config.charset)
+            try:
+                f.write(pagename + "\n")
+            finally:
+                f.close()
+        finally:
+            self.writeLock.release()
+
+    def pages(self):
+        """ Return list of pages in the queue """
+        if self.readLock.acquire(1.0):
+            try:
+                return self._decode(self._read())
+            finally:
+                self.readLock.release()
+        return []
+
+    def remove(self, pages):
+        """ Remove pages from the queue
+        
+        When the queue is empty, the queue file is removed, so exists()
+        can tell if there is something waiting in the queue.
+        """
+        if self.writeLock.acquire(30.0):
+            try:
+                queue = self._decode(self._read())
+                for page in pages:
+                    try:
+                        queue.remove(page)
+                    except ValueError:
+                        pass
+                if queue:
+                    self._write(queue)
+                else:
+                    self._removeFile()
+                return True
+            finally:
+                self.writeLock.release()
+        return False
+
+    # Private -------------------------------------------------------
+
+    def _decode(self, data):
+        """ Decode queue data """
+        pages = data.splitlines()
+        return self._filterDuplicates(pages)
+
+    def _filterDuplicates(self, pages):
+        """ Filter duplicates in page list, keeping the order """
+        unique = []
+        seen = {}
+        for name in pages:
+            if not name in seen:
+                unique.append(name)
+                seen[name] = 1
+        return unique
+
+    def _read(self):
+        """ Read and return queue data
+        
+        This does not do anything with the data so we can release the
+        lock as soon as possible, enabling others to update the queue.
+        """
+        try:
+            f = codecs.open(self.file, 'r', config.charset)
+            try:
+                return f.read()
+            finally:
+                f.close()
+        except (OSError, IOError), err:
+            if err.errno != errno.ENOENT:
+                raise
+            return ''
+
+    def _write(self, pages):
+        """ Write pages to queue file
+        
+        Requires queue write locking.
+        """
+        # XXX use tmpfile/move for atomic replace on real operating systems
+        data = '\n'.join(pages) + '\n'
+        f = codecs.open(self.file, 'w', config.charset)
+        try:
+            f.write(data)
+        finally:
+            f.close()
+
+    def _removeFile(self):
+        """ Remove queue file 
+        
+        Requires queue write locking.
+        """
+        try:
+            os.remove(self.file)
+        except OSError, err:
+            if err.errno != errno.ENOENT:
+                raise
+
+class BaseIndex:
+    class LockedException(Exception):
+        pass
+
+    def __init__(self, request):
+        self.request = request
+        cache_dir = request.cfg.cache_dir
+        main_dir = self._main_dir()
+        self.dir = os.path.join(main_dir, 'index')
+        filesys.makeDirs(self.dir)
+        self.sig_file = os.path.join(main_dir, 'complete')
+        lock_dir = os.path.join(main_dir, 'index-lock')
+        self.lock = lock.WriteLock(lock_dir,
+                                   timeout=3600.0, readlocktimeout=60.0)
+        self.read_lock = lock.ReadLock(lock_dir, timeout=3600.0)
+        self.queue = UpdateQueue(os.path.join(main_dir, 'update-queue'),
+                                 os.path.join(main_dir, 'update-queue-lock'))
+
+        # Disabled until we have a sane way to build the index with a
+        # queue in small steps.
+        ## if not self.exists():
+        ##    self.indexPagesInNewThread(request)
+
+    def _main_dir(self):
+        raise NotImplemented
+
+    def exists(self):
+        """ Check if index exists """        
+        return os.path.exists(self.sig_file)
+                
+    def mtime(self):
+        return os.path.getmtime(self.dir)
+    
+    def _search(self, query):
+        raise NotImplemented
+
+    def search(self, query):
+        if not self.read_lock.acquire(1.0):
+            raise self.LockedException
+        try:
+            hits = self._search(query)
+        finally:
+            self.read_lock.release()
+        return hits
+
+    def update_page(self, page):
+        self.queue.append(page.page_name)
+        self._do_queued_updates_InNewThread()
+
+    def indexPages(self, files=None, mode='update'):
+        """ Index all pages (and files, if given)
+        
+        Can be called only from a script. To index pages during a user
+        request, use indexPagesInNewThread.
+        @arg files: iterator or list of files to index additionally
+        """
+        if not self.lock.acquire(1.0):
+            self.request.log("can't index: can't acquire lock")
+            return
+        try:
+            self._unsign()
+            start = time.time()
+            request = self._indexingRequest(self.request)
+            self._index_pages(request, files, mode)
+            request.log("indexing completed successfully in %0.2f seconds." %
+                        (time.time() - start))
+            self._sign()
+        finally:
+            self.lock.release()
+
+    def indexPagesInNewThread(self, files=None, mode='update'):
+        """ Index all pages in a new thread
+        
+        Should be called from a user request. From a script, use indexPages.
+        """
+        # Prevent rebuilding the index just after it was finished
+        if self.exists():
+            return
+
+        from threading import Thread
+        indexThread = Thread(target=self._index_pages, args=(files, mode))
+        indexThread.setDaemon(True)
+        
+        # Join the index thread after current request finish, prevent
+        # Apache CGI from killing the process.
+        def joinDecorator(finish):
+            def func():
+                finish()
+                indexThread.join()
+            return func
+
+        self.request.finish = joinDecorator(self.request.finish)
+        indexThread.start()
+
+    def _index_pages(self, request, files=None, mode='update'):
+        """ Index all pages (and all given files)
+        
+        This should be called from indexPages or indexPagesInNewThread only!
+        
+        This may take some time, depending on the size of the wiki and speed
+        of the machine.
+
+        When called in a new thread, lock is acquired before the call,
+        and this method must release it when it finishes or fails.
+        """
+        raise NotImplemented
+
+    def _do_queued_updates_InNewThread(self):
+        """ do queued index updates in a new thread
+        
+        Should be called from a user request. From a script, use indexPages.
+        """
+        if not self.lock.acquire(1.0):
+            self.request.log("can't index: can't acquire lock")
+            return
+        try:
+            def lockedDecorator(self, f):
+                def func(*args, **kwargs):
+                    try:
+                        return f(*args, **kwargs)
+                    finally:
+                        self.lock.release()
+                return func
+
+            from threading import Thread
+            indexThread = Thread(
+                    target=lockedDecorator(self._do_queued_updates),
+                    args=(self._indexingRequest(self.request),))
+            indexThread.setDaemon(True)
+            
+            # Join the index thread after current request finish, prevent
+            # Apache CGI from killing the process.
+            def joinDecorator(finish):
+                def func():
+                    finish()
+                    indexThread.join()
+                return func
+                
+            self.request.finish = joinDecorator(self.request.finish)
+            indexThread.start()
+        except:
+            self.lock.release()
+            raise
+
+    def _do_queued_updates(self, request, amount=5):
+        raise NotImplemented
+
+    def optimize(self):
+        raise NotImplemented
+
+    def contentfilter(self, filename):
+        """ Get a filter for content of filename and return unicode content. """
+        request = self.request
+        mt = wikiutil.MimeType(filename=filename)
+        for modulename in mt.module_name():
+            try:
+                execute = wikiutil.importPlugin(request.cfg, 'filter', modulename)
+                break
+            except wikiutil.PluginMissingError:
+                pass
+            else:
+                request.log("Cannot load filter for mimetype." + modulename)
+        try:
+            data = execute(self, filename)
+            # XXX: proper debugging?
+            #if debug:
+            #    request.log("Filter %s returned %d characters for file %s" % (modulename, len(data), filename))
+        except (OSError, IOError), err:
+            data = ''
+            request.log("Filter %s threw error '%s' for file %s" % (modulename, str(err), filename))
+        return mt.mime_type(), data
+
+    def test(self, request):
+        raise NotImplemented
+
+    def _indexingRequest(self, request):
+        """ Return a new request that can be used for index building.
+        
+        This request uses a security policy that lets the current user
+        read any page. Without this policy some pages will not render,
+        which will create broken pagelinks index.        
+        """
+        from MoinMoin.request.CLI import Request
+        from MoinMoin.security import Permissions
+        request = Request(request.url)
+        class SecurityPolicy(Permissions):
+            def read(*args, **kw):
+                return True        
+        request.user.may = SecurityPolicy(request.user)
+        return request
+
+    def _unsign(self):
+        """ Remove sig file - assume write lock acquired """
+        try:
+            os.remove(self.sig_file)
+        except OSError, err:
+            if err.errno != errno.ENOENT:
+                raise
+
+    def _sign(self):
+        """ Add sig file - assume write lock acquired """
+        f = file(self.sig_file, 'w')
+        try:
+            f.write('')
+        finally:
+            f.close()
+
+##############################################################################
+### Searching
+##############################################################################
+
+class Search:
+    """ A search run """
+    
+    def __init__(self, request, query):
+        self.request = request
+        self.query = query
+        self.filtered = False
+        self.fs_rootpage = "FS" # XXX FS hardcoded
+
+    def run(self):
+        """ Perform search and return results object """
+        start = time.time()
+        if self.request.cfg.xapian_search:
+            hits = self._xapianSearch()
+        else:
+            hits = self._moinSearch()
+            
+        # important - filter deleted pages or pages the user may not read!
+        if not self.filtered:
+            hits = self._filter(hits)
+
+        return getSearchResults(self.request, self.query, hits, start)
+        
+
+    # ----------------------------------------------------------------
+    # Private!
+
+    def _xapianSearch(self):
+        """ Search using Xapian
+        
+        Get a list of pages using fast xapian search and
+        return moin search in those pages.
+        """
+        pages = None
+        try:
+            from MoinMoin.search.Xapian import Index
+            index = Index(self.request)
+        except ImportError:
+            index = None
+        
+        if index and index.exists(): #and self.query.xapian_wanted():
+            self.request.clock.start('_xapianSearch')
+            try:
+                from MoinMoin.support import xapwrap
+                query = self.query.xapian_term(self.request, index.allterms)
+                self.request.log("xapianSearch: query = %r" %
+                        query.get_description())
+                query = xapwrap.index.QObjQuery(query)
+                enq, hits = index.search(query)
+                self.request.log("xapianSearch: finds: %r" % hits)
+                def dict_decode(d):
+                    """ decode dict values to unicode """
+                    for k, v in d.items():
+                        d[k] = d[k].decode(config.charset)
+                    return d
+                pages = [{'uid': hit['uid'], 'values': dict_decode(hit['values'])}
+                        for hit in hits]
+                self.request.log("xapianSearch: finds pages: %r" % pages)
+                self._xapianEnquire = enq
+                self._xapianIndex = index
+            except BaseIndex.LockedException:
+                pass
+            #except AttributeError:
+            #    pages = []
+            self.request.clock.stop('_xapianSearch')
+            return self._getHits(hits, self._xapianMatch)
+        else:
+            return self._moinSearch(pages)
+
+    def _xapianMatch(self, page, uid):
+        matches = []
+        term = self._xapianEnquire.get_matching_terms_begin(uid)
+        #print hit['uid']
+        while term != self._xapianEnquire.get_matching_terms_end(uid):
+            print term.get_term(), ':', list(self._xapianIndex.termpositions(uid, term.get_term()))
+            for pos in self._xapianIndex.termpositions(uid, term.get_term()):
+                matches.append(TextMatch(start=pos,
+                    end=pos+len(term.get_term())))
+            term.next()
+        return matches
+
+    def _moinSearch(self, pages=None):
+        """ Search pages using moin's built-in full text search 
+        
+        Return list of tuples (page, match). The list may contain
+        deleted pages or pages the user may not read.
+        """
+        self.request.clock.start('_moinSearch')
+        from MoinMoin.Page import Page
+        if pages is None:
+            # if we are not called from _xapianSearch, we make a full pagelist,
+            # but don't search attachments (thus attachment name = '')
+            pages = [{'pagename': p, 'attachment': '', 'wikiname': 'Self', } for p in self._getPageList()]
+        hits = self._getHits(pages, self._moinMatch)
+        self.request.clock.stop('_moinSearch')
+        return hits
+    
+    def _moinMatch(self, page, uid):
+        return self.query.search(page)
+
+    def _getHits(self, pages, matchSearchFunction):
+        hits = []
+        fs_rootpage = self.fs_rootpage
+        for hit in pages:
+            if 'values' in hit:
+                valuedict = hit['values']
+                uid = hit['uid']
+            else:
+                valuedict = hit
+
+            wikiname = valuedict['wikiname']
+            pagename = valuedict['pagename']
+            attachment = valuedict['attachment']
+            if wikiname in (self.request.cfg.interwikiname, 'Self'): # THIS wiki
+                page = Page(self.request, pagename)
+                if attachment:
+                    if pagename == fs_rootpage: # not really an attachment
+                        page = Page(self.request, "%s/%s" % (fs_rootpage, attachment))
+                        hits.append((wikiname, page, None, None))
+                    else:
+                        hits.append((wikiname, page, attachment, None))
+                else:
+                    match = matchSearchFunction(page, uid)
+                    if match:
+                        hits.append((wikiname, page, attachment, match))
+            else: # other wiki
+                hits.append((wikiname, pagename, attachment, None))
+        return hits
+
+    def _getPageList(self):
+        """ Get list of pages to search in 
+        
+        If the query has a page filter, use it to filter pages before
+        searching. If not, get a unfiltered page list. The filtering
+        will happen later on the hits, which is faster with current
+        slow storage.
+        """
+        filter = self.query.pageFilter()
+        if filter:
+            # There is no need to filter the results again.
+            self.filtered = True
+            return self.request.rootpage.getPageList(filter=filter)
+        else:
+            return self.request.rootpage.getPageList(user='', exists=0)
+        
+    def _filter(self, hits):
+        """ Filter out deleted or acl protected pages """
+        userMayRead = self.request.user.may.read
+        fs_rootpage = self.fs_rootpage + "/"
+        thiswiki = (self.request.cfg.interwikiname, 'Self')
+        filtered = [(wikiname, page, attachment, match) for wikiname, page, attachment, match in hits
+                    if not wikiname in thiswiki or
+                       page.exists() and userMayRead(page.page_name) or
+                       page.page_name.startswith(fs_rootpage)]
+        return filtered
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/search/queryparser.py	Sun Jul 09 22:39:15 2006 +0200
@@ -0,0 +1,730 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - search engine query parser
+    
+    @copyright: 2005 MoinMoin:FlorianFesti,
+                2005 MoinMoin:NirSoffer,
+                2005 MoinMoin:AlexanderSchremmer,
+                2006 MoinMoin:ThomasWaldmann,
+                2006 MoinMoin:FranzPletz
+    @license: GNU GPL, see COPYING for details
+"""
+
+import re, string
+from MoinMoin import config
+from MoinMoin.search.results import Match, TitleMatch, TextMatch
+
+try:
+    from MoinMoin.search import Xapian
+    from MoinMoin.search.Xapian import Query, UnicodeQuery
+except ImportError:
+    pass
+
+#############################################################################
+### query objects
+#############################################################################
+
+class BaseExpression:
+    """ Base class for all search terms """
+    
+    def __init__(self):
+        self.negated = 0
+
+    def __str__(self):
+        return unicode(self).encode(config.charset, 'replace')
+
+    def negate(self):
+        """ Negate the result of this term """
+        self.negated = 1 
+
+    def pageFilter(self):
+        """ Return a page filtering function
+
+        This function is used to filter page list before we search
+        it. Return a function that get a page name, and return bool.
+
+        The default expression does not have any filter function and
+        return None. Sub class may define custom filter functions.
+        """
+        return None
+
+    def search(self, page):
+        """ Search a page
+
+        Returns a list of Match objects or None if term didn't find
+        anything (vice versa if negate() was called).  Terms containing
+        other terms must call this method to aggregate the results.
+        This Base class returns True (Match()) if not negated.
+        """
+        if self.negated:
+            # XXX why?
+            return [Match()]
+        else:
+            return None
+    
+    def costs(self):
+        """ Return estimated time to calculate this term
+        
+        Number is relative to other terms and has no real unit.
+        It allows to do the fast searches first.
+        """ 
+        return 0
+
+    def highlight_re(self):
+        """ Return a regular expression of what the term searches for
+
+        Used to display the needle in the page.
+        """
+        return ''
+
+    def _build_re(self, pattern, use_re=False, case=False, stemmed=False):
+        """ Make a regular expression out of a text pattern """
+        flags = case and re.U or (re.I | re.U)
+        if use_re:
+            try:
+                self.search_re = re.compile(pattern, flags)
+            except re.error:
+                pattern = re.escape(pattern)
+                self.pattern = pattern
+                self.search_re = re.compile(pattern, flags)
+            else:
+                self.pattern = pattern
+        else:
+            pattern = re.escape(pattern)
+            self.search_re = re.compile(pattern, flags)
+            self.pattern = pattern
+
+
+class AndExpression(BaseExpression):
+    """ A term connecting several sub terms with a logical AND """
+
+    operator = ' '
+
+    def __init__(self, *terms):
+        self._subterms = list(terms)
+        self._costs = 0
+        for t in self._subterms:
+            self._costs += t.costs()
+        self.negated = 0
+
+    def append(self, expression):
+        """ Append another term """
+        self._subterms.append(expression)
+        self._costs += expression.costs()
+
+    def subterms(self):
+        return self._subterms
+    
+    def costs(self):
+        return self._costs
+
+    def __unicode__(self):
+        result = ''
+        for t in self._subterms:
+            result += self.operator + t
+        return u'[' + result[len(self.operator):] + u']'
+
+    def pageFilter(self):
+        """ Return a page filtering function
+
+        This function is used to filter page list before we search it.
+
+        Return a function that gets a page name, and return bool, or None.
+        """
+        # Sort terms by cost, then get all title searches
+        self.sortByCost()
+        terms = [term for term in self._subterms if isinstance(term, TitleSearch)]
+        if terms:
+            # Create and return a filter function
+            def filter(name):
+                """ A function that return True if all terms filter name """
+                for term in terms:
+                    filter = term.pageFilter()
+                    if not filter(name):
+                        return False
+                return True
+            return filter
+        
+        return None
+
+    def sortByCost(self):
+        tmp = [(term.costs(), term) for term in self._subterms]
+        tmp.sort()
+        self._subterms = [item[1] for item in tmp]
+
+    def search(self, page):
+        """ Search for each term, cheap searches first """
+        self.sortByCost()
+        matches = []
+        for term in self._subterms:
+            result = term.search(page)
+            if not result:
+                return None
+            matches.extend(result)
+        return matches
+
+    def highlight_re(self):
+        result = []
+        for s in self._subterms:
+            highlight_re = s.highlight_re()
+            if highlight_re: result.append(highlight_re)
+            
+        return '|'.join(result)
+
+    def xapian_wanted(self):
+        wanted = True
+        for term in self._subterms:
+            wanted = wanted and term.xapian_wanted()
+        return wanted
+
+    def xapian_term(self, request, allterms):
+        # sort negated terms
+        terms = []
+        not_terms = []
+        for term in self._subterms:
+            if not term.negated:
+                terms.append(term.xapian_term(request, allterms))
+            else:
+                not_terms.append(term.xapian_term(request, allterms))
+
+        # prepare query for not negated terms
+        if len(terms) == 1:
+            t1 = Query(terms[0])
+        else:
+            t1 = Query(Query.OP_AND, terms)
+
+        # negated terms?
+        if not not_terms:
+            # no, just return query for not negated terms
+            return t1
+        
+        # yes, link not negated and negated terms' query with a AND_NOT query
+        if len(not_terms) == 1:
+            t2 = Query(not_terms[0])
+        else:
+            t2 = Query(Query.OP_OR, not_terms)
+
+        return Query(Query.OP_AND_NOT, t1, t2)
+
+
+class OrExpression(AndExpression):
+    """ A term connecting several sub terms with a logical OR """
+    
+    operator = ' or '
+
+    def search(self, page):
+        """ Search page with terms, cheap terms first
+
+        XXX Do we have any reason to sort here? we are not breaking out
+        of the search in any case.
+        """
+        self.sortByCost()
+        matches = []
+        for term in self._subterms:
+            result = term.search(page)
+            if result:
+                matches.extend(result)
+        return matches
+
+    def xapian_term(self, request, allterms):
+        # XXX: negated terms managed by _moinSearch?
+        return Query(Query.OP_OR, [term.xapian_term(request, allterms) for term in self._subterms])
+
+
+class TextSearch(BaseExpression):
+    """ A term that does a normal text search
+
+    Both page content and the page title are searched, using an
+    additional TitleSearch term.
+    """
+    
+    def __init__(self, pattern, use_re=False, case=False):
+        """ Init a text search
+
+        @param pattern: pattern to search for, ascii string or unicode
+        @param use_re: treat pattern as re of plain text, bool
+        @param case: do case sensitive search, bool 
+        """
+        self._pattern = unicode(pattern)
+        self.negated = 0
+        self.use_re = use_re
+        self.case = case
+        self._build_re(self._pattern, use_re=use_re, case=case)
+        self.titlesearch = TitleSearch(self._pattern, use_re=use_re, case=case)
+        
+    def costs(self):
+        return 10000
+    
+    def __unicode__(self):
+        neg = self.negated and '-' or ''
+        return u'%s"%s"' % (neg, unicode(self._pattern))
+
+    def highlight_re(self):
+        return u"(%s)" % self._pattern
+
+    def search(self, page):
+        matches = []
+
+        # Search in page name
+        results = self.titlesearch.search(page)
+        if results:
+            matches.extend(results)
+
+        # Search in page body
+        body = page.get_raw_body()
+        for match in self.search_re.finditer(body):
+            if page.request.cfg.xapian_stemming:
+                # somewhere in regular word
+                if body[match.start()] not in config.chars_upper and \
+                        body[match.start()-1] in config.chars_lower:
+                    continue
+
+                post = 0
+                for c in body[match.end():]:
+                    if c in config.chars_lower:
+                        post += 1
+                    else:
+                        break
+
+                matches.append(TextMatch(start=match.start(),
+                        end=match.end()+post))
+            else:
+                matches.append(TextMatch(re_match=match))
+
+        # Decide what to do with the results.
+        if ((self.negated and matches) or
+            (not self.negated and not matches)):
+            return None
+        elif matches:
+            return matches
+        else:
+            return []
+
+    def xapian_wanted(self):
+        return not self.use_re
+
+    def xapian_term(self, request, allterms):
+        if self.use_re:
+            # basic regex matching per term
+            terms = [term for term in allterms() if
+                    self.search_re.match(term)]
+            if not terms:
+                return None
+            queries = [Query(Query.OP_OR, terms)]
+        else:
+            analyzer = Xapian.WikiAnalyzer(request=request,
+                    language=request.cfg.language_default)
+            terms = self._pattern.split()
+
+            # all parsed wikiwords, AND'ed
+            queries = []
+            stemmed = []
+            for t in terms:
+                if request.cfg.xapian_stemming:
+                    # stemmed OR not stemmed
+                    tmp = []
+                    for w, s, pos in analyzer.tokenize(t, flat_stemming=False):
+                        tmp.append(UnicodeQuery(Query.OP_OR, (w, s)))
+                        stemmed.append(w)
+                    t = tmp
+                else:
+                    # just not stemmed
+                    t = [UnicodeQuery(w) for w, pos in analyzer.tokenize(t)]
+                queries.append(Query(Query.OP_AND, t))
+
+            if stemmed:
+                self._build_re(' '.join(stemmed), use_re=False,
+                        case=self.case, stemmed=True)
+
+        # titlesearch OR parsed wikiwords
+        return Query(Query.OP_OR,
+                (self.titlesearch.xapian_term(request, allterms),
+                    Query(Query.OP_AND, queries)))
+
+class TitleSearch(BaseExpression):
+    """ Term searches in pattern in page title only """
+
+    def __init__(self, pattern, use_re=False, case=False):
+        """ Init a title search
+
+        @param pattern: pattern to search for, ascii string or unicode
+        @param use_re: treat pattern as re of plain text, bool
+        @param case: do case sensitive search, bool 
+        """
+        self._pattern = unicode(pattern)
+        self.negated = 0
+        self.use_re = use_re
+        self.case = case
+        self._build_re(self._pattern, use_re=use_re, case=case)
+        
+    def costs(self):
+        return 100
+
+    def __unicode__(self):
+        neg = self.negated and '-' or ''
+        return u'%s!"%s"' % (neg, unicode(self._pattern))
+
+    def highlight_re(self):
+        return u"(%s)" % self._pattern
+
+    def pageFilter(self):
+        """ Page filter function for single title search """
+        def filter(name):
+            match = self.search_re.search(name)
+            if ((self.negated and match) or
+                (not self.negated and not match)):
+                return False
+            return True
+        return filter
+            
+    def search(self, page):
+        # Get matches in page name
+        matches = []
+        for match in self.search_re.finditer(page.page_name):
+            if page.request.cfg.xapian_stemming:
+                # somewhere in regular word
+                if page.page_name[match.start()] not in config.chars_upper and \
+                        page.page_name[match.start()-1] in config.chars_lower:
+                    continue
+
+                post = 0
+                for c in page.page_name[match.end():]:
+                    if c in config.chars_lower:
+                        post += 1
+                    else:
+                        break
+
+                matches.append(TitleMatch(start=match.start(),
+                        end=match.end()+post))
+            else:
+                matches.append(TitleMatch(re_match=match))
+        
+        if ((self.negated and matches) or
+            (not self.negated and not matches)):
+            return None
+        elif matches:
+            return matches
+        else:
+            return []
+
+    def xapian_wanted(self):
+        return not self.use_re
+
+    def xapian_term(self, request, allterms):
+        if self.use_re:
+            # basic regex matching per term
+            terms = [term for term in allterms() if
+                    self.search_re.match(term)]
+            if not terms:
+                return None
+            queries = [Query(Query.OP_OR, terms)]
+        else:
+            analyzer = Xapian.WikiAnalyzer(request=request,
+                    language=request.cfg.language_default)
+            terms = self._pattern.split()
+            terms = [[w for w, pos in analyzer.raw_tokenize(t)] for t in terms]
+
+            # all parsed wikiwords, AND'ed
+            queries = []
+            stemmed = []
+            for t in terms:
+                if request.cfg.xapian_stemming:
+                    # stemmed OR not stemmed
+                    tmp = []
+                    for w, s, pos in analyzer.tokenize(t, flat_stemming=False):
+                        tmp.append(UnicodeQuery(Query.OP_OR,
+                            ['%s%s' % (Xapian.Index.prefixMap['title'], j)
+                                for j in (w, s)]))
+                        stemmed.append(w)
+                    t = tmp
+                else:
+                    # just not stemmed
+                    t = [UnicodeQuery('%s%s' % (Xapian.Index.prefixMap['title'], w))
+                        for w, pos in analyzer.tokenize(t)]
+
+                queries.append(Query(Query.OP_AND, t))
+
+            if stemmed:
+                self._build_re(' '.join(stemmed), use_re=False,
+                        case=self.case, stemmed=True)
+
+        return Query(Query.OP_AND, queries)
+
+
+class LinkSearch(BaseExpression):
+    """ Search the term in the pagelinks """
+
+    def __init__(self, pattern, use_re=False, case=True):
+        """ Init a link search
+
+        @param pattern: pattern to search for, ascii string or unicode
+        @param use_re: treat pattern as re of plain text, bool
+        @param case: do case sensitive search, bool 
+        """
+        # used for search in links
+        self._pattern = pattern
+        # used for search in text
+        self._textpattern = '(' + self._pattern.replace('/', '|') + ')'
+        self.negated = 0
+        self.use_re = use_re
+        self.case = case
+        self.textsearch = TextSearch(self._textpattern, use_re=1, case=case)
+        self._build_re(unicode(pattern), use_re=use_re, case=case)
+
+    def _build_re(self, pattern, use_re=False, case=False):
+        """ Make a regular expression out of a text pattern """
+        flags = case and re.U or (re.I | re.U)
+        if use_re:
+            self.search_re = re.compile(pattern, flags)
+            self.static = False
+        else:
+            self.pattern = pattern
+            self.static = True
+        
+    def costs(self):
+        return 5000 # cheaper than a TextSearch
+
+    def __unicode__(self):
+        neg = self.negated and '-' or ''
+        return u'%s!"%s"' % (neg, unicode(self._pattern))
+
+    def highlight_re(self):
+        return u"(%s)" % self._textpattern
+
+    def search(self, page):
+        # Get matches in page name
+        matches = []
+
+        Found = True
+        
+        for link in page.getPageLinks(page.request):
+            if ((self.static and self.pattern == link) or
+                (not self.static and self.search_re.match(link))):
+                break
+        else:
+            Found = False
+
+        if Found:
+            # Search in page text
+            results = self.textsearch.search(page)
+            if results:
+                matches.extend(results)
+            else: #This happens e.g. for pages that use navigation macros
+                matches.append(TextMatch(0, 0))
+
+        # Decide what to do with the results.
+        if ((self.negated and matches) or
+            (not self.negated and not matches)):
+            return None
+        elif matches:
+            return matches
+        else:
+            return []
+
+    def xapian_wanted(self):
+        return not self.use_re
+
+    def xapian_term(self, request, allterms):
+        prefix = Xapian.Index.prefixMap['linkto']
+        if self.use_re:
+            # basic regex matching per term
+            terms = []
+            found = None
+            n = len(prefix)
+            for term in allterms():
+                if prefix == term[:n]:
+                    found = True
+                    if self.search_re.match(term[n+1:]):
+                        terms.append(term)
+                elif found:
+                    continue
+
+            if not terms:
+                return None
+            return Query(Query.OP_OR, terms)
+        else:
+            return UnicodeQuery('%s:%s' % (prefix, self.pattern))
+
+
+class LanguageSearch(BaseExpression):
+    """ Search the pages written in a language """
+
+    def __init__(self, pattern, use_re=False, case=True):
+        """ Init a language search
+
+        @param pattern: pattern to search for, ascii string or unicode
+        @param use_re: treat pattern as re of plain text, bool
+        @param case: do case sensitive search, bool 
+        """
+        # iso language code, always lowercase
+        self._pattern = pattern.lower()
+        self.negated = 0
+        self.use_re = use_re
+        self.case = case
+        self.xapian_called = False
+        self._build_re(self._pattern, use_re=use_re, case=case)
+
+    def costs(self):
+        return 5000 # cheaper than a TextSearch
+
+    def __unicode__(self):
+        neg = self.negated and '-' or ''
+        return u'%s!"%s"' % (neg, unicode(self._pattern))
+
+    def highlight_re(self):
+        return ""
+
+    def search(self, page):
+        # We just use (and trust ;)) xapian for this.. deactivated for _moinSearch
+        if not self.xapian_called:
+            return []
+        else:
+            return [Match()]
+
+    def xapian_wanted(self):
+        return not self.use_re
+
+    def xapian_term(self, request, allterms):
+        self.xapian_called = True
+        prefix = Xapian.Index.prefixMap['lang']
+        if self.use_re:
+            # basic regex matching per term
+            terms = []
+            found = None
+            n = len(prefix)
+            for term in allterms():
+                if prefix == term[:n]:
+                    found = True
+                    if self.search_re.match(term[n:]):
+                        terms.append(term)
+                elif found:
+                    continue
+
+            if not terms:
+                return None
+            return Query(Query.OP_OR, terms)
+        else:
+            pattern = self.pattern
+            return UnicodeQuery('%s%s' % (prefix, pattern))
+
+
+##############################################################################
+### Parse Query
+##############################################################################
+
+class QueryParser:
+    """
+    Converts a String into a tree of Query objects
+    using recursive top/down parsing
+    """
+
+    def __init__(self, **kw):
+        """
+        @keyword titlesearch: treat all terms as title searches
+        @keyword case: do case sensitive search
+        @keyword regex: treat all terms as regular expressions
+        """
+        self.titlesearch = kw.get('titlesearch', 0)
+        self.case = kw.get('case', 0)
+        self.regex = kw.get('regex', 0)
+
+    def parse_query(self, query):
+        """ transform an string into a tree of Query objects """
+        if isinstance(query, str):
+            query = query.decode(config.charset)
+        self._query = query
+        result = self._or_expression()
+        if result is None:
+            result = BaseExpression()
+        return result
+
+    def _or_expression(self):
+        result = self._and_expression()
+        if self._query:
+            result = OrExpression(result)
+        while self._query:
+            q = self._and_expression()
+            if q:
+                result.append(q)
+        return result
+            
+    def _and_expression(self):
+        result = None
+        while not result and self._query:
+            result = self._single_term()
+        term = self._single_term()
+        if term:
+            result = AndExpression(result, term)
+        else:
+            return result
+        term = self._single_term()
+        while term:
+            result.append(term)
+            term = self._single_term()
+        return result
+                                
+    def _single_term(self):
+        regex = (r'(?P<NEG>-?)\s*(' +              # leading '-'
+                 r'(?P<OPS>\(|\)|(or\b(?!$)))|' +  # or, (, )
+                 r'(?P<MOD>(\w+:)*)' +
+                 r'(?P<TERM>("[^"]+")|' +
+                 r"('[^']+')|(\S+)))")             # search word itself
+        self._query = self._query.strip()
+        match = re.match(regex, self._query, re.U)
+        if not match:
+            return None
+        self._query = self._query[match.end():]
+        ops = match.group("OPS")
+        if ops == '(':
+            result = self._or_expression()
+            if match.group("NEG"): result.negate()
+            return result
+        elif ops == ')':
+            return None
+        elif ops == 'or':
+            return None
+        modifiers = match.group('MOD').split(":")[:-1]
+        text = match.group('TERM')
+        if self.isQuoted(text):
+            text = text[1:-1]
+
+        title_search = self.titlesearch
+        regex = self.regex
+        case = self.case
+        linkto = False
+        lang = False
+
+        for m in modifiers:
+            if "title".startswith(m):
+                title_search = True
+            elif "regex".startswith(m):
+                regex = True
+            elif "case".startswith(m):
+                case = True
+            elif "linkto".startswith(m):
+                linkto = True
+            elif "language".startswith(m):
+                lang = True
+
+        if lang:
+            obj = LanguageSearch(text, use_re=regex, case=False)
+        elif linkto:
+            obj = LinkSearch(text, use_re=regex, case=case)
+        elif title_search:
+            obj = TitleSearch(text, use_re=regex, case=case)
+        else:
+            obj = TextSearch(text, use_re=regex, case=case)
+
+        if match.group("NEG"):
+            obj.negate()
+        return obj
+
+    def isQuoted(self, text):
+        # Empty string '' is not considered quoted
+        if len(text) < 3:
+            return False
+        return (text.startswith('"') and text.endswith('"') or
+                text.startswith("'") and text.endswith("'"))
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/search/results.py	Sun Jul 09 22:39:15 2006 +0200
@@ -0,0 +1,656 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - search engine
+    
+    @copyright: 2005 MoinMoin:FlorianFesti,
+                2005 MoinMoin:NirSoffer,
+                2005 MoinMoin:AlexanderSchremmer,
+                2006 MoinMoin:ThomasWaldmann,
+                2006 MoinMoin:FranzPletz
+    @license: GNU GPL, see COPYING for details
+"""
+
+import StringIO, time
+from MoinMoin import config, wikiutil
+from MoinMoin.Page import Page
+
+############################################################################
+### Results
+############################################################################
+
+class Match(object):
+    """ Base class for all Matches (found pieces of pages).
+    
+    This class represents a empty True value as returned from negated searches.
+    """
+    # Default match weight
+    _weight = 1.0
+    
+    def __init__(self, start=0, end=0, re_match=None):
+        self.re_match = re_match
+        if not re_match:
+            self._start = start
+            self._end = end
+        else:
+            self._start = self._end = 0
+
+    def __len__(self):
+        return self.end - self.start
+
+    def __eq__(self, other):
+        equal = (self.__class__ == other.__class__ and
+                 self.start == other.start and
+                 self.end == other.end)
+        return equal
+        
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def view(self):
+        return ''
+
+    def weight(self):
+        return self._weight
+
+    def _get_start(self):
+        if self.re_match:
+            return self.re_match.start()
+        return self._start
+
+    def _get_end(self):
+        if self.re_match:
+            return self.re_match.end()
+        return self._end
+
+    # object properties
+    start = property(_get_start)
+    end   = property(_get_end)
+
+
+class TextMatch(Match):
+    """ Represents a match in the page content """
+    pass
+
+
+class TitleMatch(Match):
+    """ Represents a match in the page title
+    
+    Has more weight as a match in the page content.
+    """
+    # Matches in titles are much more important in wikis. This setting
+    # seems to make all pages that have matches in the title to appear
+    # before pages that their title does not match.
+    _weight = 100.0
+
+
+class AttachmentMatch(Match):
+    """ Represents a match in a attachment content
+
+    Not used yet.
+    """
+    pass
+
+
+class FoundPage:
+    """ Represents a page in a search result """
+
+    def __init__(self, page_name, matches=None, page=None):
+        self.page_name = page_name
+        self.attachment = '' # this is not an attachment
+        self.page = page
+        if matches is None:
+            matches = []
+        self._matches = matches
+
+    def weight(self, unique=1):
+        """ returns how important this page is for the terms searched for
+
+        Summarize the weight of all page matches
+
+        @param unique: ignore identical matches
+        @rtype: int
+        @return: page weight
+        """
+        weight = 0
+        for match in self.get_matches(unique=unique):
+            weight += match.weight()
+            # More sophisticated things to be added, like increase
+            # weight of near matches.
+        return weight
+
+    def add_matches(self, matches):
+        """ Add found matches """
+        self._matches.extend(matches)
+
+    def get_matches(self, unique=1, sort='start', type=Match):
+        """ Return all matches of type sorted by sort
+
+        @param unique: return only unique matches (bool)
+        @param sort: match attribute to sort by (string)
+        @param type: type of match to return (Match or sub class) 
+        @rtype: list
+        @return: list of matches
+        """
+        if unique:
+            matches = self._unique_matches(type=type)
+            if sort == 'start':
+                # matches already sorted by match.start, finished.
+                return matches
+        else:
+            matches = self._matches
+
+        # Filter by type and sort by sort using fast schwartzian
+        # transform.
+        if sort == 'start':
+            tmp = [(match.start, match) for match in matches
+                   if instance(match, type)]
+        else:
+            tmp = [(match.weight(), match) for match in matches
+                   if instance(match, type)]
+        tmp.sort()
+        if sort == 'weight':
+            tmp.reverse()
+        matches = [item[1] for item in tmp]
+        
+        return matches
+
+    def _unique_matches(self, type=Match):
+        """ Get a list of unique matches of type
+
+        The result is sorted by match.start, because its easy to remove
+        duplicates like this.
+
+        @param type: type of match to return
+        @rtype: list
+        @return: list of matches of type, sorted by match.start
+        """
+        # Filter by type and sort by match.start using fast schwartzian
+        # transform.
+        tmp = [(match.start, match) for match in self._matches
+               if isinstance(match, type)]
+        tmp.sort()
+
+        if not len(tmp):
+            return []
+
+        # Get first match into matches list
+        matches = [tmp[0][1]]
+
+        # Add the remaining ones of matches ignoring identical matches
+        for item in tmp[1:]:
+            if item[1] == matches[-1]:
+                continue
+            matches.append(item[1])
+
+        return matches
+    
+
+class FoundAttachment(FoundPage):
+    """ Represent an attachment in search results """
+    
+    def __init__(self, page_name, attachment, matches=None, page=None):
+        self.page_name = page_name
+        self.attachment = attachment
+        self.page = page
+        if matches is None:
+            matches = []
+        self._matches = matches
+
+    def weight(self, unique=1):
+        return 1
+
+    def get_matches(self, unique=1, sort='start', type=Match):
+        return []
+
+    def _unique_matches(self, type=Match):
+        return []
+
+
+class FoundRemote(FoundPage):
+    """ Represent an attachment in search results """
+    
+    def __init__(self, wikiname, page_name, attachment, matches=None, page=None):
+        self.wikiname = wikiname
+        self.page_name = page_name
+        self.attachment = attachment
+        self.page = page
+        if matches is None:
+            matches = []
+        self._matches = matches
+
+    def weight(self, unique=1):
+        return 1
+
+    def get_matches(self, unique=1, sort='start', type=Match):
+        return []
+
+    def _unique_matches(self, type=Match):
+        return []
+
+
+############################################################################
+### Search results formatting
+############################################################################
+
+class SearchResults:
+    """ Manage search results, supply different views
+
+    Search results can hold valid search results and format them for
+    many requests, until the wiki content changes.
+
+    For example, one might ask for full page list sorted from A to Z,
+    and then ask for the same list sorted from Z to A. Or sort results
+    by name and then by rank.
+    """
+    # Public functions --------------------------------------------------
+    
+    def __init__(self, query, hits, pages, elapsed):
+        self.query = query # the query
+        self.hits = hits # hits list
+        self.sort = None # hits are unsorted initially
+        self.pages = pages # number of pages in the wiki
+        self.elapsed = elapsed # search time
+
+    def sortByWeight(self):
+        """ Sorts found pages by the weight of the matches """
+        tmp = [(hit.weight(), hit.page_name, hit) for hit in self.hits]
+        tmp.sort()
+        tmp.reverse()
+        self.hits = [item[2] for item in tmp]
+        self.sort = 'weight'
+        
+    def sortByPagename(self):
+        """ Sorts a list of found pages alphabetical by page name """
+        tmp = [(hit.page_name, hit) for hit in self.hits]
+        tmp.sort()
+        self.hits = [item[1] for item in tmp]
+        self.sort = 'page_name'
+        
+    def stats(self, request, formatter):
+        """ Return search statistics, formatted with formatter
+
+        @param request: current request
+        @param formatter: formatter to use
+        @rtype: unicode
+        @return formatted statistics
+        """
+        _ = request.getText
+        output = [
+            formatter.paragraph(1),
+            formatter.text(_("%(hits)d results out of about %(pages)d pages.") %
+                   {'hits': len(self.hits), 'pages': self.pages}),
+            u' (%s)' % formatter.text(_("%.2f seconds") % self.elapsed),
+            formatter.paragraph(0),
+            ]
+        return ''.join(output)
+
+    def pageList(self, request, formatter, info=0, numbered=1):
+        """ Format a list of found pages
+
+        @param request: current request
+        @param formatter: formatter to use
+        @param info: show match info in title
+        @param numbered: use numbered list for display
+        @rtype: unicode
+        @return formatted page list
+        """
+        self._reset(request, formatter)
+        f = formatter
+        write = self.buffer.write
+        if numbered:
+            list = f.number_list
+        else:
+            list = f.bullet_list
+
+        # Add pages formatted as list
+        if self.hits:
+            write(list(1))
+
+            for page in self.hits:
+                if page.attachment:
+                    querydict = {
+                        'action': 'AttachFile',
+                        'do': 'get',
+                        'target': page.attachment,
+                    }
+                else:
+                    querydict = None
+                querystr = self.querystring(querydict)
+            
+                matchInfo = ''
+                if info:
+                    matchInfo = self.formatInfo(f, page)
+                item = [
+                    f.listitem(1),
+                    f.pagelink(1, page.page_name, querystr=querystr),
+                    self.formatTitle(page),
+                    f.pagelink(0, page.page_name),
+                    matchInfo,
+                    f.listitem(0),
+                    ]
+                write(''.join(item))
+            write(list(0))
+
+        return self.getvalue()
+
+    def pageListWithContext(self, request, formatter, info=1, context=180,
+                            maxlines=1):
+        """ Format a list of found pages with context
+
+        The default parameter values will create Google-like search
+        results, as this is the most known search interface. Good
+        interface is familiar interface, so unless we have much better
+        solution (we don't), being like Google is the way.
+
+        @param request: current request
+        @param formatter: formatter to use
+        @param info: show match info near the page link
+        @param context: how many characters to show around each match. 
+        @param maxlines: how many contexts lines to show. 
+        @rtype: unicode
+        @return formatted page list with context
+        """
+        self._reset(request, formatter)
+        f = formatter
+        write = self.buffer.write
+        
+        # Add pages formatted as definition list
+        if self.hits:
+            write(f.definition_list(1))
+
+            for page in self.hits:
+                matchInfo = ''
+                if info:
+                    matchInfo = self.formatInfo(f, page)
+                if page.attachment:
+                    fmt_context = ""
+                    querydict = {
+                        'action': 'AttachFile',
+                        'do': 'get',
+                        'target': page.attachment,
+                    }
+                elif page.page_name.startswith('FS/'): # XXX FS hardcoded
+                    fmt_context = ""
+                    querydict = None
+                else:
+                    fmt_context = self.formatContext(page, context, maxlines)
+                    querydict = None
+                querystr = self.querystring(querydict)
+                item = [
+                    f.definition_term(1),
+                    f.pagelink(1, page.page_name, querystr=querystr),
+                    self.formatTitle(page),
+                    f.pagelink(0, page.page_name),
+                    matchInfo,
+                    f.definition_term(0),
+                    f.definition_desc(1),
+                    fmt_context,
+                    f.definition_desc(0),
+                    ]
+                write(''.join(item))
+            write(f.definition_list(0))
+        
+        return self.getvalue()
+
+    # Private -----------------------------------------------------------
+
+    # This methods are not meant to be used by clients and may change
+    # without notice.
+    
+    def formatContext(self, page, context, maxlines):
+        """ Format search context for each matched page
+
+        Try to show first maxlines interesting matches context.
+        """
+        f = self.formatter
+        if not page.page:
+            page.page = Page(self.request, page.page_name)
+        body = page.page.get_raw_body()
+        last = len(body) - 1
+        lineCount = 0
+        output = []
+        
+        # Get unique text matches sorted by match.start, try to ignore
+        # matches in page header, and show the first maxlines matches.
+        # TODO: when we implement weight algorithm for text matches, we
+        # should get the list of text matches sorted by weight and show
+        # the first maxlines matches.
+        matches = page.get_matches(unique=1, sort='start', type=TextMatch)
+        i, start = self.firstInterestingMatch(page, matches)
+
+        # Format context
+        while i < len(matches) and lineCount < maxlines:
+            match = matches[i]
+            
+            # Get context range for this match
+            start, end = self.contextRange(context, match, start, last)
+
+            # Format context lines for matches. Each complete match in
+            # the context will be highlighted, and if the full match is
+            # in the context, we increase the index, and will not show
+            # same match again on a separate line.
+
+            output.append(f.text(u'...'))
+            
+            # Get the index of the first match completely within the
+            # context.
+            for j in xrange(0, len(matches)):
+                if matches[j].start >= start:
+                    break
+
+            # Add all matches in context and the text between them 
+            while True:
+                match = matches[j]
+                # Ignore matches behind the current position
+                if start < match.end:
+                    # Append the text before match
+                    if start < match.start:
+                        output.append(f.text(body[start:match.start]))
+                    # And the match
+                    output.append(self.formatMatch(body, match, start))
+                    start = match.end
+                # Get next match, but only if its completely within the context
+                if j < len(matches) - 1 and matches[j + 1].end <= end:
+                    j += 1
+                else:
+                    break
+
+            # Add text after last match and finish the line
+            if match.end < end:
+               output.append(f.text(body[match.end:end]))
+            output.append(f.text(u'...'))
+            output.append(f.linebreak(preformatted=0))
+
+            # Increase line and point to the next match
+            lineCount += 1
+            i = j + 1
+
+        output = ''.join(output)
+
+        if not output:
+            # Return the first context characters from the page text
+            output = f.text(page.page.getPageText(length=context))
+            output = output.strip()
+            if not output:
+                # This is a page with no text, only header, for example,
+                # a redirect page.
+                output = f.text(page.page.getPageHeader(length=context))
+        
+        return output
+        
+    def firstInterestingMatch(self, page, matches):
+        """ Return the first interesting match
+
+        This function is needed only because we don't have yet a weight
+        algorithm for page text matches.
+        
+        Try to find the first match in the page text. If we can't find
+        one, we return the first match and start=0.
+
+        @rtype: tuple
+        @return: index of first match, start of text
+        """
+        header = page.page.getPageHeader()
+        start = len(header)
+        # Find first match after start
+        for i in xrange(len(matches)):
+            if matches[i].start >= start:
+                return i, start
+        return 0, 0
+
+    def contextRange(self, context, match, start, last):
+        """ Compute context range
+
+        Add context around each match. If there is no room for context
+        before or after the match, show more context on the other side.
+
+        @param context: context length
+        @param match: current match
+        @param start: context should not start before that index, unless
+                      end is past the last character.
+        @param last: last character index
+        @rtype: tuple
+        @return: start, end of context
+        """
+        # Start by giving equal context on both sides of match
+        contextlen = max(context - len(match), 0)
+        cstart = match.start - contextlen / 2
+        cend = match.end + contextlen / 2
+
+        # If context start before start, give more context on end
+        if cstart < start:
+            cend += start - cstart
+            cstart = start
+            
+        # But if end if after last, give back context to start
+        if cend > last:
+            cstart -= cend - last
+            cend = last
+
+        # Keep context start positive for very short texts
+        cstart = max(cstart, 0)
+
+        return cstart, cend
+
+    def formatTitle(self, page):
+        """ Format page title
+
+        Invoke format match on all unique matches in page title.
+
+        @param page: found page
+        @rtype: unicode
+        @return: formatted title
+        """
+        # Get unique title matches sorted by match.start
+        matches = page.get_matches(unique=1, sort='start', type=TitleMatch)
+        
+        # Format
+        pagename = page.page_name
+        f = self.formatter
+        output = []
+        start = 0
+        for match in matches:
+            # Ignore matches behind the current position
+            if start < match.end:
+                # Append the text before the match
+                if start < match.start:
+                    output.append(f.text(pagename[start:match.start]))
+                # And the match
+                output.append(self.formatMatch(pagename, match, start))
+                start = match.end
+        # Add text after match
+        if start < len(pagename):
+            output.append(f.text(pagename[start:]))
+        
+        if page.attachment: # show the attachment that matched
+            output.extend([
+                    " ",
+                    f.strong(1),
+                    f.text("(%s)" % page.attachment),
+                    f.strong(0)])
+
+        return ''.join(output)
+
+    def formatMatch(self, body, match, location):
+        """ Format single match in text
+
+        Format the part of the match after the current location in the
+        text. Matches behind location are ignored and an empty string is
+        returned.
+
+        @param body: text containing match
+        @param match: search match in text
+        @param location: current location in text
+        @rtype: unicode
+        @return: formatted match or empty string
+        """        
+        start = max(location, match.start)
+        if start < match.end:
+            f = self.formatter
+            output = [
+                f.strong(1),
+                f.text(body[start:match.end]),
+                f.strong(0),
+                ]
+            return ''.join(output)
+        return ''
+
+    def querystring(self, querydict=None):
+        """ Return query string, used in the page link """
+        if querydict is None:
+            querydict = {'highlight': self.query.highlight_re()}
+        querystr = wikiutil.makeQueryString(querydict)
+        #querystr = wikiutil.escape(querystr)
+        return querystr
+
+    def formatInfo(self, formatter, page):
+        """ Return formatted match info """
+        template = u' . . . %s %s'
+        template = u"%s%s%s" % (formatter.span(1, css_class="info"),
+                                template,
+                                formatter.span(0))
+        # Count number of unique matches in text of all types
+        count = len(page.get_matches(unique=1))
+        info = template % (count, self.matchLabel[count != 1])
+        return info
+
+    def getvalue(self):
+        """ Return output in div with CSS class """
+        write = self.request.write
+        value = [
+            self.formatter.div(1, css_class='searchresults'),
+            self.buffer.getvalue(),
+            self.formatter.div(0),
+            ]
+        return '\n'.join(value)
+
+    def _reset(self, request, formatter):
+        """ Update internal state before new output
+
+        Do not call this, it should be called only by the instance code.
+
+        Each request might need different translations or other user
+        preferences.
+        """
+        self.buffer = StringIO.StringIO()
+        self.formatter = formatter
+        self.request = request
+        # Use 1 match, 2 matches...
+        _ = request.getText
+        self.matchLabel = (_('match'), _('matches'))
+
+
+def getSearchResults(request, query, hits, start):
+    result_hits = []
+    for wikiname, page, attachment, match in hits:
+        if wikiname in (request.cfg.interwikiname, 'Self'): # a local match
+            if attachment:
+                result_hits.append(FoundAttachment(page.page_name, attachment))
+            else:
+                result_hits.append(FoundPage(page.page_name, match))
+        else:
+            result_hits.append(FoundRemote(wikiname, page, attachment, match))
+    elapsed = time.time() - start
+    count = request.rootpage.getPageCount()
+    return SearchResults(query, result_hits, count, elapsed)
+
--- a/MoinMoin/support/xapwrap/document.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/support/xapwrap/document.py	Sun Jul 09 22:39:15 2006 +0200
@@ -140,12 +140,16 @@
 
     def toXapianDocument(self, indexValueMap, prefixMap=None):
         d = xapian.Document()
-        position = 1
+        position = 0
         analyzer = self.analyzerFactory()
 
         # add text fields
         for field in self.textFields:
             for token in analyzer.tokenize(field.text):
+                if isinstance(token, tuple):
+                    token, position = token
+                else:
+                    position += 1
                 # the xapian swig bindings don't like unicode objects, so we
                 # decode terms to UTF-8 before indexing. this is fine as
                 # long as all data that goes into the db (whether for
@@ -159,12 +163,13 @@
                 # the process, the string length could expand, so we
                 # need to check here as well.
                 d.add_posting(checkKeyLen(token), position)
-                position += 1
             position += INTER_FIELD_POSITION_GAP
 
             if field.prefix:
                 prefix = field.name
                 for token in analyzer.tokenize(field.text):
+                    if isinstance(token, tuple):
+                        token = token[0]
                     # token is unicode, but gets converted to UTF-8
                     # by makePairForWrite:
                     term = makePairForWrite(prefix, token, prefixMap)
--- a/MoinMoin/support/xapwrap/index.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/support/xapwrap/index.py	Sun Jul 09 22:39:15 2006 +0200
@@ -635,7 +635,7 @@
                         valRes[valName] = xapDoc.get_value(valueIndex)
                     thisResult['values'] = valRes
                 results.append(thisResult)
-            return results
+            return enq, results
         except:
             del enq, mset
             raise
--- a/MoinMoin/userform.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/userform.py	Sun Jul 09 22:39:15 2006 +0200
@@ -55,7 +55,7 @@
     def handleData(self):
         _ = self._
         form = self.request.form
-    
+
         if form.has_key('cancel'):
             return
 
@@ -67,7 +67,7 @@
                 email = form['email'][0].lower()
             except KeyError:
                 return _("Please provide a valid email address!")
-    
+
             u = user.get_by_email_address(self.request, email)
             if u:
                 msg = u.mailAccountData()
@@ -85,7 +85,7 @@
                 theuser = self.request.get_user_from_form()
             else:
                 theuser = user.User(self.request, auth_method="request:152")
-                
+
             # Require non-empty name
             try:
                 theuser.name = form['name'][0]
@@ -109,7 +109,7 @@
 
             # try to get the password and pw repeat
             password = form.get('password', [''])[0]
-            password2 = form.get('password2',[''])[0]
+            password2 = form.get('password2', [''])[0]
 
             # Check if password is given and matches with password repeat
             if password != password2:
@@ -145,13 +145,13 @@
             theuser.save()
             if form.has_key('create_and_mail'):
                 theuser.mailAccountData()
-            
+
             result = _("User account created! You can use this account to login now...")
             if _debug:
                 result = result + util.dumpFormData(form)
             return result
-        
-        
+
+
         # Select user profile (su user) - only works with cookie auth active.
         if form.has_key('select_user'):
             if (wikiutil.checkTicket(self.request.form['ticket'][0]) and
@@ -173,7 +173,7 @@
             if self.request.request_method != 'POST':
                 return _("Use UserPreferences to change your settings or create an account.")
             theuser = self.request.get_user_from_form()
-                
+
             if not 'name' in theuser.auth_attribs:
                 # Require non-empty name
                 theuser.name = form.get('name', [theuser.name])[0]
@@ -198,7 +198,7 @@
             if not 'password' in theuser.auth_attribs:
                 # try to get the password and pw repeat
                 password = form.get('password', [''])[0]
-                password2 = form.get('password2',[''])[0]
+                password2 = form.get('password2', [''])[0]
 
                 # Check if password is given and matches with password repeat
                 if password != password2:
@@ -239,14 +239,14 @@
 
             # editor size
             theuser.edit_rows = util.web.getIntegerInput(self.request, 'edit_rows', theuser.edit_rows, 10, 60)
-                
+
             # try to get the editor
             theuser.editor_default = form.get('editor_default', [self.cfg.editor_default])[0]
             theuser.editor_ui = form.get('editor_ui', [self.cfg.editor_ui])[0]
 
             # time zone
             theuser.tz_offset = util.web.getIntegerInput(self.request, 'tz_offset', theuser.tz_offset, -84600, 84600)
-    
+
             # datetime format
             try:
                 dt_d_combined = UserSettings._date_formats.get(form['datetime_fmt'][0], '')
@@ -254,7 +254,7 @@
             except (KeyError, ValueError):
                 theuser.datetime_fmt = '' # default
                 theuser.date_fmt = '' # default
-    
+
             # try to get the (optional) theme
             theme_name = form.get('theme_name', [self.cfg.theme_default])[0]
             if theme_name != theuser.theme_name:
@@ -292,7 +292,7 @@
                 default = self.cfg.user_form_defaults[key]
                 value = form.get(key, [default])[0]
                 setattr(theuser, key, value)
-            
+
             # checkbox options
             if not newuser:
                 for key, label in self.cfg.user_checkbox_fields:
@@ -304,15 +304,15 @@
                             pass
                         else:
                             setattr(theuser, key, value)
-    
+
             # quicklinks for navibar
-            theuser.quicklinks = self.decodePageList('quicklinks')            
-            
+            theuser.quicklinks = self.decodePageList('quicklinks')
+
             # subscription for page change notification
             theuser.subscribed_pages = self.decodePageList('subscribed_pages')
-                    
+
             # save data
-            theuser.save()            
+            theuser.save()
             self.request.user = theuser
 
             result = _("User preferences saved!")
@@ -329,10 +329,10 @@
     """ User login and settings management. """
 
     _date_formats = { # datetime_fmt & date_fmt
-        'iso':  '%Y-%m-%d %H:%M:%S & %Y-%m-%d',
-        'us':   '%m/%d/%Y %I:%M:%S %p & %m/%d/%Y',
+        'iso': '%Y-%m-%d %H:%M:%S & %Y-%m-%d',
+        'us': '%m/%d/%Y %I:%M:%S %p & %m/%d/%Y',
         'euro': '%d.%m.%Y %H:%M:%S & %d.%m.%Y',
-        'rfc':  '%a %b %d %H:%M:%S %Y & %a %b %d %Y',
+        'rfc': '%a %b %d %H:%M:%S %Y & %a %b %d %Y',
     }
 
     def __init__(self, request):
@@ -363,7 +363,7 @@
                     string.zfill("%d" % (abs(offset) % 3600 / 60), 2),
                 ),
             ))
- 
+
         return util.web.makeSelection('tz_offset', options, str(tz))
 
 
@@ -393,9 +393,9 @@
         for lang in langs:
             name = lang[1]['x-language']
             options.append((lang[0], name))
-                
+
         return util.web.makeSelection('language', options, cur_lang)
-  
+
     def _user_select(self):
         options = []
         users = user.getUserList(self.request)
@@ -404,24 +404,24 @@
             options.append((name, name))
         options.sort()
 
-        size = min(5, len(options))  
+        size = min(5, len(options))
         current_user = self.request.user.name
         return util.web.makeSelection('selected_user', options, current_user, size=size)
-            
+
     def _theme_select(self):
         """ Create theme selection. """
         cur_theme = self.request.user.valid and self.request.user.theme_name or self.cfg.theme_default
         options = [("<default>", "<%s>" % self._("Default"))]
         for theme in wikiutil.getPlugins('theme', self.request.cfg):
             options.append((theme, theme))
-                
+
         return util.web.makeSelection('theme_name', options, cur_theme)
-  
+
     def _editor_default_select(self):
         """ Create editor selection. """
         editor_default = self.request.user.valid and self.request.user.editor_default or self.cfg.editor_default
         options = [("<default>", "<%s>" % self._("Default"))]
-        for editor in ['text','gui',]:
+        for editor in ['text', 'gui', ]:
             options.append((editor, editor))
         return util.web.makeSelection('editor_default', options, editor_default)
 
@@ -433,7 +433,7 @@
                    ("freechoice", self._("free choice")),
                   ]
         return util.web.makeSelection('editor_ui', options, editor_ui)
-                
+
     def make_form(self):
         """ Create the FORM, and the TABLE with the input fields
         """
@@ -478,7 +478,7 @@
                      ' ',
                  ])
             self.make_row('', button_cell)
-            
+
         if self.request.user.valid and not create_only:
             buttons = [('save', _('Save')), ('cancel', _('Cancel')), ]
             uf_remove = self.cfg.user_form_remove
@@ -494,11 +494,11 @@
                 if not key in uf_remove:
                     if key in uf_disable:
                         self.make_row(_(label),
-                                  [ html.INPUT(type=type, size=length, name=key, disabled="disabled",
-                                    value=getattr(self.request.user, key)), ' ', _(textafter), ])
+                                  [html.INPUT(type=type, size=length, name=key, disabled="disabled",
+                                   value=getattr(self.request.user, key)), ' ', _(textafter), ])
                     else:
                         self.make_row(_(label),
-                                  [ html.INPUT(type=type, size=length, name=key, value=getattr(self.request.user, key)), ' ', _(textafter), ])
+                                  [html.INPUT(type=type, size=length, name=key, value=getattr(self.request.user, key)), ' ', _(textafter), ])
 
             if not self.cfg.theme_force and not "theme_name" in self.cfg.user_form_remove:
                 self.make_row(_('Preferred theme'), [self._theme_select()])
@@ -524,7 +524,7 @@
 
             if not "language" in self.cfg.user_form_remove:
                 self.make_row(_('Preferred language'), [self._lang_select()])
-            
+
             # boolean user options
             bool_options = []
             checkbox_fields = self.cfg.user_checkbox_fields
@@ -560,7 +560,7 @@
                             _("This list does not work, unless you have"
                               " entered a valid email address!")
                         )]
-                
+
                 self.make_row(
                     html.Raw(_('Subscribed wiki pages (one regex per line)')),
                     [html.TEXTAREA(name="subscribed_pages", rows="6", cols="50").append(
@@ -580,9 +580,9 @@
             for key, label, type, length, textafter in self.cfg.user_form_fields:
                 if key in ('name', 'password', 'password2', 'email'):
                     self.make_row(_(label),
-                              [ html.INPUT(type=type, size=length, name=key,
-                                           value=''),
-                                ' ', _(textafter), ])
+                              [html.INPUT(type=type, size=length, name=key,
+                                          value=''),
+                               ' ', _(textafter), ])
 
         if self.cfg.mail_enabled:
             buttons.append(("account_sendmail", _('Mail me my account data')))
@@ -711,10 +711,10 @@
              request.formatter.text(account.email) +
              request.formatter.url(0)),
             request.page.link_to(request, text=_('Mail me my account data'),
-                                 querystr= {"action":"userform",
-                                            "email": account.email,  
-                                            "account_sendmail": "1",
-                                            "sysadm": "users",},
+                                 querystr={"action":"userform",
+                                           "email": account.email,
+                                           "account_sendmail": "1",
+                                           "sysadm": "users", },
                                  rel='nofollow')
         ))
 
--- a/MoinMoin/version.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/version.py	Sun Jul 09 22:39:15 2006 +0200
@@ -14,7 +14,7 @@
     patchlevel = 'release'
 
 project = "MoinMoin"
-release  = '1.6.0alpha'
+release = '1.6.0alpha'
 revision = patchlevel
 
 def update():
--- a/MoinMoin/wikidicts.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/wikidicts.py	Sun Jul 09 22:39:15 2006 +0200
@@ -2,7 +2,7 @@
 """
     MoinMoin - Dictionary / Group Functions
 
-    @copyright: 2003 by Thomas Waldmann, http://linuxwiki.de/ThomasWaldmann
+    @copyright: 2003-2006 by Thomas Waldmann, MoinMoin:ThomasWaldmann
     @copyright: 2003 by Gustavo Niemeyer, http://moin.conectiva.com.br/GustavoNiemeyer
     @license: GNU GPL, see COPYING for details.
 """
@@ -20,14 +20,14 @@
 
 # Set pickle protocol, see http://docs.python.org/lib/node64.html
 PICKLE_PROTOCOL = pickle.HIGHEST_PROTOCOL
- 
+
 from MoinMoin import config, caching, wikiutil, Page, logfile
 from MoinMoin.logfile.editlog import EditLog
 
 # Version of the internal data structure which is pickled
 # Please increment if you have changed the structure
 DICTS_PICKLE_VERSION = 4
-    
+
 
 class DictBase:
     """ Base class for wiki dicts
@@ -36,7 +36,7 @@
     """
     # Regular expression used to parse text - sub class should override this
     regex = ''
-    
+
     def __init__(self, request, name):
         """ Initialize, starting from <nothing>.
 
@@ -64,16 +64,16 @@
         return self._dict.has_key(key)
 
     def get(self, key, default):
-        return self._dict.get(key,default)
+        return self._dict.get(key, default)
 
     def __getitem__(self, key):
         return self._dict[key]
-    
+
     def __repr__(self):
         return "<DictBase name=%r items=%r>" % (self.name, self._dict.items())
 
 class Dict(DictBase):
-    ''' Mapping of keys to values in a wiki page
+    """ Mapping of keys to values in a wiki page
 
        How a Dict definition page should look like:
 
@@ -84,7 +84,7 @@
         ...
         keyn:: ....
        any text ignored      
-    '''
+    """
     # Key:: Value - ignore all but key:: value pairs, strip whitespace
     regex = r'^ (?P<key>.+?):: (?P<val>.*?) *$'
 
@@ -102,7 +102,7 @@
         return "<Dict name=%r items=%r>" % (self.name, self._dict.items())
 
 class Group(DictBase):
-    ''' Group of users, of pages, of whatever
+    """ Group of users, of pages, of whatever
 
     How a Group definition page should look like:
 
@@ -116,7 +116,7 @@
 
     if there are any free links using ["free link"] notation, the markup
     is stripped from the member 
-    '''
+    """
     # * Member - ignore all but first level list items, strip whitespace
     # Strip free links markup if exists
     regex = r'^ \* +(?:\[\")?(?P<member>.+?)(?:\"\])? *$'
@@ -176,7 +176,7 @@
             # Add member and its children
             members[member] = 1
             if groupdict.hasgroup(member):
-                members.update(self._expandgroup(groupdict, member))            
+                members.update(self._expandgroup(groupdict, member))
         return members
 
     def expandgroups(self, groupdict):
@@ -283,7 +283,7 @@
 
     def addgroup(self, request, groupname):
         """add a new group (will be read from the wiki page)"""
-        grp =  Group(request, groupname)
+        grp = Group(request, groupname)
         self.dictdict[groupname] = grp
         self.groupdict[groupname] = grp
 
@@ -320,7 +320,7 @@
                 cache = caching.CacheEntry(request, arena, key, scope='wiki')
                 data = pickle.loads(cache.content())
                 self.__dict__.update(data)
-                
+
                 # invalidate the cache if the pickle version changed
                 if self.picklever != DICTS_PICKLE_VERSION:
                     self.reset()
@@ -382,7 +382,7 @@
                         self.addgroup(request, pagename)
                     dump = 1
             self.pageupdate_timestamp = now
-            
+
             if not self.base_timestamp:
                 self.base_timestamp = int(time.time())
 
@@ -394,7 +394,7 @@
             "groupdict": self.groupdict,
             "picklever": self.picklever
         }
-        
+
         if dump:
             # copy unexpanded groups to self.dictdict
             for name, grp in self.groupdict.items():
@@ -405,8 +405,7 @@
 
             cache = caching.CacheEntry(request, arena, key, scope='wiki')
             cache.update(pickle.dumps(data, PICKLE_PROTOCOL))
-            
+
         # remember it (persistent environments)
         self.cfg.DICTS_DATA = data
 
-
--- a/MoinMoin/wikiutil.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/wikiutil.py	Sun Jul 09 22:39:15 2006 +0200
@@ -5,7 +5,7 @@
     @copyright: 2000 - 2004 by Jürgen Hermann <jh@web.de>
     @license: GNU GPL, see COPYING for details.
 """
-    
+
 import os, re, urllib, cgi
 import codecs, types
 
@@ -15,7 +15,7 @@
 
 # Exceptions
 class InvalidFileNameError(Exception):
-    """ Called when we find an invalid file name """ 
+    """ Called when we find an invalid file name """
     pass
 
 # constants for page names
@@ -47,7 +47,7 @@
             return unicode(text, cur_charset, 'replace')
         except LookupError:
             return unicode(text, 'iso-8859-1', 'replace')
-    
+
 def decodeUnknownInput(text):
     """ Decode unknown input, like text attachments
 
@@ -66,7 +66,7 @@
     # Shortcut for unicode input
     if isinstance(text, unicode):
         return text
-    
+
     try:
         return unicode(text, 'utf-8')
     except UnicodeError:
@@ -76,7 +76,7 @@
             except UnicodeError:
                 pass
         return unicode(text, 'iso-8859-1', 'replace')
-        
+
 
 def decodeUserInput(s, charsets=[config.charset]):
     """
@@ -244,6 +244,9 @@
     """ Clean comment - replace CR, LF, TAB by whitespace, delete control chars
         TODO: move this to config, create on first call then return cached.
     """
+    # we only have input fields with max 200 chars, but spammers send us more
+    if len(comment) > 201:
+        comment = u''
     remap_chars = {
         ord(u'\t'): u' ',
         ord(u'\r'): u' ',
@@ -296,21 +299,21 @@
     @return: quoted name, safe for any file system
     """
     filename = wikiname.encode(charset)
-    
-    quoted = []    
+
+    quoted = []
     location = 0
     for needle in UNSAFE.finditer(filename):
         # append leading safe stuff
         quoted.append(filename[location:needle.start()])
-        location = needle.end()                    
+        location = needle.end()
         # Quote and append unsafe stuff           
         quoted.append('(')
         for character in needle.group():
             quoted.append('%02x' % ord(character))
         quoted.append(')')
-    
+
     # append rest of string
-    quoted.append(filename[location:])    
+    quoted.append(filename[location:])
     return ''.join(quoted)
 
 
@@ -339,18 +342,18 @@
     if isinstance(filename, type(u'')):
         filename = filename.encode(config.charset)
     ### Temporary fix end ###
-        
-    parts = []    
+
+    parts = []
     start = 0
-    for needle in QUOTED.finditer(filename):  
+    for needle in QUOTED.finditer(filename):
         # append leading unquoted stuff
         parts.append(filename[start:needle.start()])
-        start = needle.end()            
+        start = needle.end()
         # Append quoted stuff
-        group =  needle.group(1)
+        group = needle.group(1)
         # Filter invalid filenames
         if (len(group) % 2 != 0):
-            raise InvalidFileNameError(filename) 
+            raise InvalidFileNameError(filename)
         try:
             for i in range(0, len(group), 2):
                 byte = group[i:i+2]
@@ -359,12 +362,12 @@
         except ValueError:
             # byte not in hex, e.g 'xy'
             raise InvalidFileNameError(filename)
-    
+
     # append rest of string
     if start == 0:
         wikiname = filename
     else:
-        parts.append(filename[start:len(filename)])   
+        parts.append(filename[start:len(filename)])
         wikiname = ''.join(parts)
 
     # This looks wrong, because at this stage "()" can be both errors
@@ -373,7 +376,7 @@
     # Filter invalid filenames. Any left (xx) must be invalid
     #if '(' in wikiname or ')' in wikiname:
     #    raise InvalidFileNameError(filename)
-    
+
     wikiname = decodeUserInput(wikiname, charsets)
     return wikiname
 
@@ -429,7 +432,7 @@
                 value = int(value)
             dict.__setitem__(self, key, value)
         self.loaded = True
-    
+
     def _put_meta(self):
         """ put the meta dict into an arbitrary filename.
             does not keep or modify state, does uncached, direct disk access.
@@ -486,7 +489,7 @@
     except AttributeError:
         _interwiki_list = {}
         lines = []
- 
+
         # order is important here, the local intermap file takes
         # precedence over the shared one, and is thus read AFTER
         # the shared one
@@ -504,7 +507,7 @@
         for line in lines:
             if not line or line[0] == '#': continue
             try:
-                line = "%s %s/InterWiki" % (line, request.getScriptname()) 
+                line = "%s %s/InterWiki" % (line, request.getScriptname())
                 wikitag, urlprefix, trash = line.split(None, 2)
             except ValueError:
                 pass
@@ -520,9 +523,9 @@
 
         # save for later
         request.cfg._interwiki_list = _interwiki_list
-    
+
     return _interwiki_list
-    
+
 def split_wiki(wikiurl):
     """ Split a wiki url, e.g:
     
@@ -696,7 +699,7 @@
     @return localized page_front_page, if there is a translation
     """
     return getSysPage(request, request.cfg.page_front_page)
-    
+
 
 def getHomePage(request, username=None):
     """
@@ -815,7 +818,7 @@
 # mimetype stuff ------------------------------------------------------------
 class MimeType(object):
     """ represents a mimetype like text/plain """
-    
+
     def __init__(self, mimestr=None, filename=None):
         self.major = self.minor = None # sanitized mime type and subtype
         self.params = {} # parameters like "charset" or others
@@ -825,13 +828,13 @@
             self.parse_mimetype(mimestr)
         elif filename:
             self.parse_filename(filename)
-    
+
     def parse_filename(self, filename):
         mtype, encoding = mimetypes.guess_type(filename)
         if mtype is None:
             mtype = 'application/octet-stream'
         self.parse_mimetype(mtype)
-        
+
     def parse_mimetype(self, mimestr):
         """ take a string like used in content-type and parse it into components,
             alternatively it also can process some abbreviated string like "wiki"
@@ -854,7 +857,7 @@
         if self.params.has_key('charset'):
             self.charset = self.params['charset'].lower()
         self.sanitize()
-            
+
     def parse_format(self, format):
         """ maps from what we currently use on-page in a #format xxx processing
             instruction to a sanitized mimetype major, minor tuple.
@@ -1038,7 +1041,7 @@
     """
     # Copy names from builtin plugins - so we dont destroy the value
     all_plugins = builtinPlugins(kind)[:]
-    
+
     # Add extension plugins without duplicates
     for plugin in wikiPlugins(kind, cfg):
         if plugin not in all_plugins:
@@ -1080,7 +1083,7 @@
                     etd = Parser
         cfg._EXT_TO_PARSER = etp
         cfg._EXT_TO_PARSER_DEFAULT = etd
-        
+
     return cfg._EXT_TO_PARSER.get(extension, cfg._EXT_TO_PARSER_DEFAULT)
 
 
@@ -1230,11 +1233,11 @@
         parameter_list = [None] * len(self.param_list)
         parameter_dict = {}
         check_list = [0] * len(self.param_list)
-            
+
         i = 0
         start = 0
         named = False
-        while start<len(input):
+        while start < len(input):
             match = re.match(self.param_re, input[start:])
             if not match: raise ValueError, "Misformatted value"
             start += match.end()
@@ -1256,7 +1259,7 @@
 
             parameter_list.append(value)
             if match.group("name"):
-                if not self.param_dict.has_key( match.group("name")):
+                if not self.param_dict.has_key(match.group("name")):
                     raise ValueError, "Unknown parameter name '%s'" % match.group("name")
                 nr = self.param_dict[match.group("name")]
                 if check_list[nr]:
@@ -1274,7 +1277,6 @@
             # check type
             #if not type in self.param_list[nr]:
 
-                
             i += 1
         return parameter_list, parameter_dict
 
@@ -1282,9 +1284,9 @@
     def _check_type(value, type, format):
         if type == 'n' and 's' in format: # n as s
             return value
-        
+
         if type in format: return value # x -> x
-        
+
         if type == 'i':
             if 'f' in format: return float(value) # i -> f
             elif 'b' in format: return value # i -> b
@@ -1296,7 +1298,7 @@
 
 
         if 's' in format: # * -> s
-            return str(value) 
+            return str(value)
         else:
             pass # XXX error
 
@@ -1422,14 +1424,14 @@
         text = params # default
     if formatter:
         url = "%s/%s" % (request.getScriptname(), params)
-        if on != None:
+        if on is not None:
             return formatter.url(on, url, css_class, **kw)
         return (formatter.url(1, url, css_class, **kw) +
                 formatter.rawHTML(text) +
                 formatter.url(0))
-    if on != None and not on:
+    if on is not None and not on:
         return '</a>'
-    
+
     attrs = ''
     if css_class:
         attrs += ' class="%s"' % css_class
@@ -1454,14 +1456,14 @@
     @rtype: list
     @return: lines like diff tool does output.
     """
-    false = lambda s: None 
+    false = lambda s: None
     if kw.get('ignorews', 0):
         d = difflib.Differ(false)
     else:
         d = difflib.Differ(false, false)
 
-    lines = list(d.compare(oldlines,newlines))
- 
+    lines = list(d.compare(oldlines, newlines))
+
     # return empty list if there were no changes
     changed = 0
     for l in lines:
@@ -1472,7 +1474,7 @@
 
     if not "we want the unchanged lines, too":
         if "no questionmark lines":
-            lines = filter(lambda line : line[0]!='?', lines)
+            lines = filter(lambda line: line[0] != '?', lines)
         return lines
 
 
@@ -1501,7 +1503,7 @@
                 count = 0
             else:
                 count = 0
-                i = i + 1                            
+                i += 1
             if marker == '-': lcount_old = lcount_old + 1
             else: lcount_new = lcount_new + 1
         elif marker == '?':
@@ -1510,7 +1512,7 @@
     # remove unchanged lines a the end
     if count > 3:
         lines[-count+3:] = []
-    
+
     return lines
 
 
@@ -1529,16 +1531,16 @@
     from MoinMoin.Page import Page
     lines1 = Page(request, pagename1, rev=rev1).getlines()
     lines2 = Page(request, pagename2, rev=rev2).getlines()
-    
+
     lines = linediff(lines1, lines2, **kw)
     return lines
- 
+
 
 ########################################################################
 ### Tickets - used by RenamePage and DeletePage
 ########################################################################
 
-def createTicket(tm = None):
+def createTicket(tm=None):
     """Create a ticket using a site-specific secret (the config)"""
     import sha, time, types
     ticket = tm or "%010x" % time.time()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/xmlrpc/RemoteScript.py	Sun Jul 09 22:39:15 2006 +0200
@@ -0,0 +1,30 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - Remote Script Execution Server part
+
+    @copyright: 2006 by MoinMoin:ThomasWaldmann
+    @license: GNU GPL, see COPYING for details.
+"""
+
+from MoinMoin.script import MoinScript
+
+def execute(xmlrpcobj, their_secret, argv):
+    request = xmlrpcobj.request
+    their_secret = xmlrpcobj._instr(their_secret)
+
+    our_secret = request.cfg.remote_script_secret
+    if not our_secret:
+        return u"No password set"
+
+    if our_secret != their_secret:
+        return u"Invalid password"
+
+    try:
+        request.log("RemoteScript argv: %r" % argv)
+        MoinScript(argv).run(showtime=0)
+    except Exception, err:
+        e = str(err)
+        request.log(e)
+        return xmlrpcobj._outstr(e)
+    return xmlrpcobj._outstr(u"OK")
+
--- a/MoinMoin/xmlrpc/__init__.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/MoinMoin/xmlrpc/__init__.py	Sun Jul 09 22:39:15 2006 +0200
@@ -484,8 +484,7 @@
 
     def xmlrpc_searchPages(self, query_string):
         from MoinMoin import search
-        query = search.QueryParser().parse_query(query_string)
-        results = search.searchPages(self.request, query)
+        results = search.searchPages(self.request, query_string)
         results.formatter = self.request.html_formatter
         results.request = self.request
         return [(self._outstr(hit.page_name),
--- a/docs/CHANGES	Sun Jul 09 15:31:02 2006 +0200
+++ b/docs/CHANGES	Sun Jul 09 22:39:15 2006 +0200
@@ -159,6 +159,8 @@
     * @SIG@ etc. generate local time stamps now, no difference visible on page
       view.
     * Speeded up linkto search by avoiding read locks on the pagelinks file.
+    * The i18n system no loads *.po files directly (no *.py or *.mo any more)
+      and caches the results (farm wide cache/i18n/*).
 
   Bugfixes:
     * on action "info" page, "revert" link will not be displayed for empty page
--- a/docs/CHANGES.fpletz	Sun Jul 09 15:31:02 2006 +0200
+++ b/docs/CHANGES.fpletz	Sun Jul 09 22:39:15 2006 +0200
@@ -2,23 +2,27 @@
 =============================
 
   Known main issues:
-    * Regex searching with Xapian?
+    * Only term-based regex searching possible, modifier or heuristic to
+      enable usage of _moinSearch for full compatibility?
+    * HACK: MoinMoin.Xapian.Index._get_languages (wait for proper metadata)
 
   ToDo:
-    * HACK: MoinMoin.Xapian.Index._get_languages (wait for proper
-      metadata)
-    * Mockup the new search UI
+    * Implement the new search UI
     * Write/update documentation for all the new search stuff
     * Indexing and searching of categories (new term prefix)
-    * MoinMoin.Xapian.use_stemming -> request.cfg.xapian_use_stemming
+    * Drop _moinSearch when using Xapian and use term positions provided
+      by Xapian itself, needs some reworking of WikiAnalyzer/xapwrap to
+      get the position of stemmed words right
 
   New Features:
     * Faster search thanks to Xapian
     * Searching for languages with new prefix lang/language, i.e. lang:de
-      Note: Only available when Xapian is activated
+      Note: Currently only available when Xapian is used
     * New config options:
         xapian_search        0      enables xapian-powered search
         xapian_index_dir     None   directory for xapian indices
+        xapian_stemming      True   Toggles usage of stemmer, fallback
+                                    to False if no stemmer installed
   
   Bugfixes (only stuff that is buggy in moin/1.6 main branch):
     * ...
@@ -80,3 +84,41 @@
     * All stemming/matching issues resolved (hopefully)
     * Works now without xapian installed (enhance error reporting)
 
+2006-06-21
+    * Making stemming configurable (xapian_stemming) with fallback to
+      False if no stemmer available
+    * Xapian.use_stemming -> request.cfg.xapian_stemming
+    * Fixed bug in the selection of the stemming language
+
+2006-06-27
+    * Splitting out MoinMoin/search.py to MoinMoin/search/*.py, no more
+      need to invoke QueryParser manually when using searchPages, minor
+      refactoring
+
+2006-06-28
+    * Abstraction of a locked search engine index:
+      MoinMoin.search.builtin.BaseIndex, MoinMoin.search.Xapian.Index is
+      derived from this, cleanups in calling structure and function
+      prototypes to make it more extensible
+
+2006-06-29
+    * Tested some ideas with regexp searching
+
+2006-07-01
+    * Fully implemented term-based regexp searching
+
+2006-07-04
+    * Evaluating the current framework for the new UI (no new sane code to
+      commit)
+
+2006-07-05
+    * Indexing correct positions in xapwrap
+
+2006-07-06
+    * Played with Xapian to get correct positions and where to integrate
+      in MoinMoin
+
+2006-07-07
+    * Basic (quick and dirty, limitations and bugs included, but
+      commit-ready) implementation of getting matches out of the Xapian DB
+
--- a/setup.py	Sun Jul 09 15:31:02 2006 +0200
+++ b/setup.py	Sun Jul 09 22:39:15 2006 +0200
@@ -15,6 +15,23 @@
 
 from MoinMoin.version import release, revision
 
+# we need this for distutils from python 2.3 compatibility, python 2.4 has the
+# 'package_data' keyword to the 'setup' function to install data in packages 
+# see http://wiki.python.org/moin/DistutilsInstallDataScattered
+from distutils.command.install_data import install_data
+class smart_install_data(install_data):
+    def run(self):
+        i18n_data_files = [(target, files) for (target, files) in self.data_files if target.startswith('MoinMoin/i18n')]
+        share_data_files = [(target, files) for (target, files) in self.data_files if target.startswith('share/moin')]
+        # first install the share/moin stuff:
+        self.data_files = share_data_files
+        install_data.run(self)
+        # now we need to install the *.po files to the package dir:
+        # need to change self.install_dir to the library dir
+        install_cmd = self.get_finalized_command('install')
+        self.install_dir = getattr(install_cmd, 'install_lib')
+        self.data_files = i18n_data_files
+        return install_data.run(self)
 
 #############################################################################
 ### Helpers
@@ -61,7 +78,7 @@
     dir = dir.rstrip('/')
     strip = len(dir) + 1
     found = []
-    os.path.walk(dir, visit, (prefix, strip, found)) 
+    os.path.walk(dir, visit, (prefix, strip, found))
     return found
 
 def visit((prefix, strip, found), dirname, names):
@@ -86,7 +103,7 @@
             files.append(path)
     destination = os.path.join(prefix, dirname[strip:])
     found.append((destination, files))
-    
+
 
 #############################################################################
 ### Build script files
@@ -116,7 +133,7 @@
         if not self.package_name:
             raise Exception("You have to inherit build_scripts_create and"
                 " provide a package name")
-        
+
         to_module = string.maketrans('-/', '_.')
 
         self.mkpath(self.build_dir)
@@ -238,25 +255,25 @@
         'MoinMoin._tests',
     ],
 
-    # TODO package_dir and package_data only works for python >= 2.4
-    # in case we don't require python >= 2.4 for 1.6 release, we need to find
-    # a solution for python 2.3.x
-    'package_dir': { 'MoinMoin.i18n': 'MoinMoin/i18n', },
-    'package_data': { 'MoinMoin.i18n': ['README', 'Makefile', 'MoinMoin.pot', 'POTFILES.in',
-                                        '*.po',
-                                        'tools/*',], },
+    # We can use package_* instead of the smart_install_data hack when we
+    # require Python 2.4.
+    #'package_dir': { 'MoinMoin.i18n': 'MoinMoin/i18n', },
+    #'package_data': { 'MoinMoin.i18n': ['README', 'Makefile', 'MoinMoin.pot', 'POTFILES.in',
+    #                                    '*.po',
+    #                                    'tools/*',], },
 
     # Override certain command classes with our own ones
     'cmdclass': {
         'build_scripts': build_scripts_moin,
+        'install_data': smart_install_data, # hack needed for 2.3
     },
 
     'scripts': moin_scripts,
 
-    # This copy the contents of wiki dir under sys.prefix/share/moin
+    # This copies the contents of wiki dir under sys.prefix/share/moin
     # Do not put files that should not be installed in the wiki dir, or
     # clean the dir before you make the distribution tarball.
-    'data_files': makeDataFiles('share/moin', 'wiki'),
+    'data_files': makeDataFiles('share/moin', 'wiki') + makeDataFiles('MoinMoin/i18n', 'MoinMoin/i18n')
 }
 
 if hasattr(distutils.dist.DistributionMetadata, 'get_keywords'):
@@ -271,7 +288,7 @@
 except distutils.errors.DistutilsPlatformError, ex:
     print
     print str(ex)
-    
+
     print """
 POSSIBLE CAUSE