changeset 1218:b3c2d87024c3

merge with main
author Franz Pletz <fpletz AT franz-pletz DOT org>
date Sat, 05 Aug 2006 20:24:25 +0200
parents 237ca54182a7 (diff) 4d0f0ecc7880 (current diff)
children d56eeab4e070
files MoinMoin/parser/ParserBase.py
diffstat 11 files changed, 410 insertions(+), 119 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/action/fullsearch.py	Sat Aug 05 00:15:06 2006 +0200
+++ b/MoinMoin/action/fullsearch.py	Sat Aug 05 20:24:25 2006 +0200
@@ -42,6 +42,7 @@
     needle = request.form.get(fieldname, [''])[0]
     case = int(request.form.get('case', [0])[0])
     regex = int(request.form.get('regex', [0])[0]) # no interface currently
+    hitsFrom = int(request.form.get('from', [0])[0])
 
     max_context = 1 # only show first `max_context` contexts XXX still unused
 
@@ -92,15 +93,16 @@
     request.write(request.formatter.startContent("content"))
 
     # First search stats
-    request.write(results.stats(request, request.formatter))
+    request.write(results.stats(request, request.formatter, hitsFrom))
 
     # Then search results
     info = not titlesearch
     if context:
-        output = results.pageListWithContext(request, request.formatter, info=info,
-                                             context=context)
+        output = results.pageListWithContext(request, request.formatter,
+                info=info, context=context, hitsFrom=hitsFrom)
     else:
-        output = results.pageList(request, request.formatter, info=info)
+        output = results.pageList(request, request.formatter, info=info,
+                hitsFrom=hitsFrom)
     request.write(output)
 
     request.write(request.formatter.endContent())
--- a/MoinMoin/config/multiconfig.py	Sat Aug 05 00:15:06 2006 +0200
+++ b/MoinMoin/config/multiconfig.py	Sat Aug 05 20:24:25 2006 +0200
@@ -300,6 +300,7 @@
     xapian_search = False # disabled until xapian is finished
     xapian_index_dir = None
     xapian_stemming = True
+    search_results_per_page = 10
 
     mail_login = None # or "user pwd" if you need to use SMTP AUTH
     mail_sendmail = None # "/usr/sbin/sendmail -t -i" to not use SMTP, but sendmail
--- a/MoinMoin/macro/FullSearch.py	Sat Aug 05 00:15:06 2006 +0200
+++ b/MoinMoin/macro/FullSearch.py	Sat Aug 05 20:24:25 2006 +0200
@@ -32,13 +32,63 @@
 
 Dependencies = ["pages"]
 
+
+def search_box(type, macro):
+    """ Make a search box
+
+    Make both Title Search and Full Search boxes, according to type.
+
+    @param type: search box type: 'titlesearch' or 'fullsearch'
+    @rtype: unicode
+    @return: search box html fragment
+    """
+    _ = macro._
+    if macro.form.has_key('value'):
+        default = wikiutil.escape(macro.form["value"][0], quote=1)
+    else:
+        default = ''
+
+    # Title search settings
+    boxes = ''
+    button = _("Search Titles")
+
+    # Special code for fullsearch
+    if type == "fullsearch":
+        boxes = [
+            u'<br>',
+            u'<input type="checkbox" name="context" value="160" checked="checked">',
+            _('Display context of search results'),
+            u'<br>',
+            u'<input type="checkbox" name="case" value="1">',
+            _('Case-sensitive searching'),
+            ]
+        boxes = u'\n'.join(boxes)
+        button = _("Search Text")
+
+    # Format
+    type = (type == "titlesearch")
+    html = [
+        u'<form method="get" action="">',
+        u'<div>',
+        u'<input type="hidden" name="action" value="fullsearch">',
+        u'<input type="hidden" name="titlesearch" value="%i">' % type,
+        u'<input type="text" name="value" size="30" value="%s">' % default,
+        u'<input type="submit" value="%s">' % button,
+        boxes,
+        u'</div>',
+        u'</form>',
+        ]
+    html = u'\n'.join(html)
+    return macro.formatter.rawHTML(html)
+
+
 def execute(macro, needle):
     request = macro.request
     _ = request.getText
 
     # if no args given, invoke "classic" behavior
     if needle is None:
-        return macro._m_search("fullsearch")
+        return search_box("fullsearch", macro)
 
     # With empty arguments, simulate title click (backlinks to page)
     elif needle == '':
@@ -57,6 +107,6 @@
     results = search.searchPages(request, needle)
     results.sortByPagename()
 
-    return results.pageList(request, macro.formatter)
+    return results.pageList(request, macro.formatter, paging=False)
 
 
--- a/MoinMoin/macro/SystemInfo.py	Sat Aug 05 00:15:06 2006 +0200
+++ b/MoinMoin/macro/SystemInfo.py	Sat Aug 05 20:24:25 2006 +0200
@@ -111,12 +111,19 @@
     row(_('Local extension parsers'),
         ', '.join(wikiutil.wikiPlugins('parser', Macro.cfg)) or nonestr)
 
-    state = (_('Disabled'), _('Enabled'))
     from MoinMoin.search.builtin import Search
-    row(_('Xapian search'), '%s, %sactive' % (state[request.cfg.xapian_search],
-                not Search._xapianIndex(request) and 'not ' or ''))
+    xapState = (_('Disabled'), _('Enabled'))
+    idxState = (_('index available'), _('index unavailable'))
+    idx = Search._xapianIndex(request)
+    available = idx and idxState[0] or idxState[1]
+    mtime = _('last modified: %s') % (idx and
+            request.user.getFormattedDateTime(
+                wikiutil.version2timestamp(idx.mtime())) or
+                _('N/A'))
+    row(_('Xapian search'), '%s, %s, %s'
+            % (xapState[request.cfg.xapian_search], available, mtime))
 
-    row(_('Active threads'), t_count or 'N/A')
+    row(_('Active threads'), t_count or _('N/A'))
     buf.write(u'</dl>')
 
     return Macro.formatter.rawHTML(buf.getvalue())
--- a/MoinMoin/macro/__init__.py	Sat Aug 05 00:15:06 2006 +0200
+++ b/MoinMoin/macro/__init__.py	Sat Aug 05 20:24:25 2006 +0200
@@ -145,55 +145,8 @@
             return self.defaultDependency
 
     def _macro_TitleSearch(self, args):
-        return self._m_search("titlesearch")
-
-    def _m_search(self, type):
-        """ Make a search box
-
-        Make both Title Search and Full Search boxes, according to type.
-
-        @param type: search box type: 'titlesearch' or 'fullsearch'
-        @rtype: unicode
-        @return: search box html fragment
-        """
-        _ = self._
-        if self.form.has_key('value'):
-            default = wikiutil.escape(self.form["value"][0], quote=1)
-        else:
-            default = ''
-
-        # Title search settings
-        boxes = ''
-        button = _("Search Titles")
-
-        # Special code for fullsearch
-        if type == "fullsearch":
-            boxes = [
-                u'<br>',
-                u'<input type="checkbox" name="context" value="160" checked="checked">',
-                _('Display context of search results'),
-                u'<br>',
-                u'<input type="checkbox" name="case" value="1">',
-                _('Case-sensitive searching'),
-                ]
-            boxes = u'\n'.join(boxes)
-            button = _("Search Text")
-
-        # Format
-        type = (type == "titlesearch")
-        html = [
-            u'<form method="get" action="">',
-            u'<div>',
-            u'<input type="hidden" name="action" value="fullsearch">',
-            u'<input type="hidden" name="titlesearch" value="%i">' % type,
-            u'<input type="text" name="value" size="30" value="%s">' % default,
-            u'<input type="submit" value="%s">' % button,
-            boxes,
-            u'</div>',
-            u'</form>',
-            ]
-        html = u'\n'.join(html)
-        return self.formatter.rawHTML(html)
+        from FullSearch import search_box
+        return search_box("titlesearch", self)
 
     def _macro_GoTo(self, args):
         """ Make a goto box
@@ -332,7 +285,7 @@
         results = search.searchPages(self.request, needle,
                 titlesearch=1, case=case)
         results.sortByPagename()
-        return results.pageList(self.request, self.formatter)
+        return results.pageList(self.request, self.formatter, paging=False)
 
     def _macro_InterWiki(self, args):
         from StringIO import StringIO
--- a/MoinMoin/search/Xapian.py	Sat Aug 05 00:15:06 2006 +0200
+++ b/MoinMoin/search/Xapian.py	Sat Aug 05 20:24:25 2006 +0200
@@ -170,7 +170,9 @@
                        #  the D term, and changing the last digit to a '2' if it's a '3')
                        #X   longer prefix for user-defined use
         'linkto': 'XLINKTO', # this document links to that document
-        'stem_lang': 'XSTEMLANG', # ISO Language code this document was stemmed in 
+        'stem_lang': 'XSTEMLANG', # ISO Language code this document was stemmed in
+        'category': 'XCAT', # category this document belongs to
+        'full_title': 'XFT', # full title (for regex)
                        #Y   year (four digits)
     }
 
@@ -213,6 +215,7 @@
     
     def _do_queued_updates(self, request, amount=5):
         """ Assumes that the write lock is acquired """
+        self.touch()
         writer = xapidx.Index(self.dir, True)
         writer.configure(self.prefixMap, self.indexValueMap)
         pages = self.queue.pages()[:amount]
@@ -249,7 +252,7 @@
             mtime = wikiutil.timestamp2version(mtime)
             if mode == 'update':
                 query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', itemid))
-                docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
+                enq, docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
                 if docs:
                     doc = docs[0] # there should be only one
                     uid = doc['uid']
@@ -316,6 +319,22 @@
         # return actual lang and lang to stem in
         return (lang, default_lang)
 
+    def _get_categories(self, page):
+        body = page.get_raw_body()
+
+        prev, next = (0, 1)
+        pos = 0
+        while next:
+            if next != 1:
+                pos += next.end()
+            prev, next = next, re.search(r'----*\r?\n', body[pos:])
+
+        if not prev or prev == 1:
+            return []
+
+        return [cat.lower()
+                for cat in re.findall(r'Category([^\s]+)', body[pos:])]
+
     def _index_page(self, writer, page, mode='update'):
         """ Index a page - assumes that the write lock is acquired
             @arg writer: the index writer object
@@ -331,6 +350,7 @@
         itemid = "%s:%s" % (wikiname, pagename)
         # XXX: Hack until we get proper metadata
         language, stem_language = self._get_languages(page)
+        categories = self._get_categories(page)
         updated = False
 
         if mode == 'update':
@@ -338,7 +358,7 @@
             # you can just call database.replace_document(uid_term, doc)
             # -> done in xapwrap.index.Index.index()
             query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', itemid))
-            docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
+            enq, docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', 'wikiname', ])
             if docs:
                 doc = docs[0] # there should be only one
                 uid = doc['uid']
@@ -359,9 +379,12 @@
             xtitle = xapdoc.TextField('title', pagename, True) # prefixed
             xkeywords = [xapdoc.Keyword('itemid', itemid),
                     xapdoc.Keyword('lang', language),
-                    xapdoc.Keyword('stem_lang', stem_language)]
+                    xapdoc.Keyword('stem_lang', stem_language),
+                    xapdoc.Keyword('full_title', pagename.lower())]
             for pagelink in page.getPageLinks(request):
                 xkeywords.append(xapdoc.Keyword('linkto', pagelink))
+            for category in categories:
+                xkeywords.append(xapdoc.Keyword('category', category))
             xcontent = xapdoc.TextField('content', page.get_raw_body())
             doc = xapdoc.Document(textFields=(xcontent, xtitle),
                                   keywords=xkeywords,
@@ -387,7 +410,7 @@
             mtime = wikiutil.timestamp2version(os.path.getmtime(filename))
             if mode == 'update':
                 query = xapidx.RawQuery(xapdoc.makePairForWrite('itemid', att_itemid))
-                docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', ])
+                enq, docs = writer.search(query, valuesWanted=['pagename', 'attachment', 'mtime', ])
                 if debug: request.log("##%r %r" % (filename, docs))
                 if docs:
                     doc = docs[0] # there should be only one
@@ -446,6 +469,7 @@
             mode = 'add'
 
         try:
+            self.touch()
             writer = xapidx.Index(self.dir, True)
             writer.configure(self.prefixMap, self.indexValueMap)
             pages = request.rootpage.getPageList(user='', exists=1)
--- a/MoinMoin/search/builtin.py	Sat Aug 05 00:15:06 2006 +0200
+++ b/MoinMoin/search/builtin.py	Sat Aug 05 20:24:25 2006 +0200
@@ -10,12 +10,12 @@
     @license: GNU GPL, see COPYING for details
 """
 
-import time, sys, os, errno
+import time, sys, os, errno, codecs
 from MoinMoin import wikiutil, config
 from MoinMoin.Page import Page
 from MoinMoin.util import filesys, lock
 from MoinMoin.search.results import getSearchResults
-from MoinMoin.search.queryparser import TextMatch, TitleMatch
+from MoinMoin.search.queryparser import Match, TextMatch, TitleMatch
 
 ##############################################################################
 # Search Engine Abstraction
@@ -159,7 +159,7 @@
         ##    self.indexPagesInNewThread(request)
 
     def _main_dir(self):
-        raise NotImplemented
+        raise NotImplemented('...')
 
     def exists(self):
         """ Check if index exists """        
@@ -167,9 +167,12 @@
                 
     def mtime(self):
         return os.path.getmtime(self.dir)
+
+    def touch(self):
+        os.utime(self.dir, None)
     
     def _search(self, query):
-        raise NotImplemented
+        raise NotImplemented('...')
 
     def search(self, query):
         #if not self.read_lock.acquire(1.0):
@@ -240,7 +243,7 @@
         When called in a new thread, lock is acquired before the call,
         and this method must release it when it finishes or fails.
         """
-        raise NotImplemented
+        raise NotImplemented('...')
 
     def _do_queued_updates_InNewThread(self):
         """ do queued index updates in a new thread
@@ -251,7 +254,7 @@
             self.request.log("can't index: can't acquire lock")
             return
         try:
-            def lockedDecorator(self, f):
+            def lockedDecorator(f):
                 def func(*args, **kwargs):
                     try:
                         return f(*args, **kwargs)
@@ -280,10 +283,10 @@
             raise
 
     def _do_queued_updates(self, request, amount=5):
-        raise NotImplemented
+        raise NotImplemented('...')
 
     def optimize(self):
-        raise NotImplemented
+        raise NotImplemented('...')
 
     def contentfilter(self, filename):
         """ Get a filter for content of filename and return unicode content. """
@@ -308,7 +311,7 @@
         return mt.mime_type(), data
 
     def test(self, request):
-        raise NotImplemented
+        raise NotImplemented('...')
 
     def _indexingRequest(self, request):
         """ Return a new request that can be used for index building.
@@ -393,7 +396,7 @@
         """
         pages = None
         index = self._xapianIndex(self.request)
-        if index: #and self.query.xapian_wanted():
+        if index and self.query.xapian_wanted():
             self.request.clock.start('_xapianSearch')
             try:
                 from MoinMoin.support import xapwrap
@@ -408,8 +411,9 @@
                     for k, v in d.items():
                         d[k] = d[k].decode(config.charset)
                     return d
-                pages = [{'uid': hit['uid'], 'values': dict_decode(hit['values'])}
-                        for hit in hits]
+                #pages = [{'uid': hit['uid'], 'values': dict_decode(hit['values'])}
+                #        for hit in hits]
+                pages = [dict_decode(hit['values']) for hit in hits]
                 self.request.log("xapianSearch: finds pages: %r" % pages)
                 self._xapianEnquire = enq
                 self._xapianIndex = index
@@ -418,9 +422,11 @@
             #except AttributeError:
             #    pages = []
             self.request.clock.stop('_xapianSearch')
-            return self._getHits(hits, self._xapianMatch)
-        else:
-            return self._moinSearch(pages)
+
+            if not self.query.xapian_need_postproc():
+                return self._getHits(hits, self._xapianMatch)
+        
+        return self._moinSearch(pages)
 
     def _xapianMatchDecider(self, term, pos):
         if term[0] == 'S':      # TitleMatch
@@ -439,9 +445,14 @@
                         len(positions[pos]) < len(term_name):
                     positions[pos] = term_name
             term.next()
-        return [self._xapianMatchDecider(term, pos) for pos, term
+        matches = [self._xapianMatchDecider(term, pos) for pos, term
             in positions.iteritems()]
 
+        if not matches:
+            return [Match()]    # dummy for metadata, we got a match!
+
+        return matches
+
     def _moinSearch(self, pages=None):
         """ Search pages using moin's built-in full text search 
         
--- a/MoinMoin/search/queryparser.py	Sat Aug 05 00:15:06 2006 +0200
+++ b/MoinMoin/search/queryparser.py	Sat Aug 05 20:24:25 2006 +0200
@@ -177,6 +177,12 @@
             wanted = wanted and term.xapian_wanted()
         return wanted
 
+    def xapian_need_postproc(self):
+        for term in self._subterms:
+            if term.xapian_need_postproc():
+                return True
+        return False
+
     def xapian_term(self, request, allterms):
         # sort negated terms
         terms = []
@@ -266,9 +272,10 @@
         matches = []
 
         # Search in page name
-        results = self.titlesearch.search(page)
-        if results:
-            matches.extend(results)
+        if self.titlesearch:
+            results = self.titlesearch.search(page)
+            if results:
+                matches.extend(results)
 
         # Search in page body
         body = page.get_raw_body()
@@ -301,15 +308,19 @@
             return []
 
     def xapian_wanted(self):
+        # XXX: Add option for term-based matching
         return not self.use_re
 
+    def xapian_need_postproc(self):
+        return self.case
+
     def xapian_term(self, request, allterms):
         if self.use_re:
             # basic regex matching per term
             terms = [term for term in allterms() if
                     self.search_re.match(term)]
             if not terms:
-                return None
+                return Query()
             queries = [Query(Query.OP_OR, terms)]
         else:
             analyzer = Xapian.WikiAnalyzer(request=request,
@@ -332,7 +343,7 @@
                     t = [UnicodeQuery(w) for w, pos in analyzer.tokenize(t)]
                 queries.append(Query(Query.OP_AND, t))
 
-            if stemmed:
+            if not self.case and stemmed:
                 self._build_re(' '.join(stemmed), use_re=False,
                         case=self.case, stemmed=True)
 
@@ -383,7 +394,8 @@
         for match in self.search_re.finditer(page.page_name):
             if page.request.cfg.xapian_stemming:
                 # somewhere in regular word
-                if page.page_name[match.start()] not in config.chars_upper and \
+                if not self.case and \
+                        page.page_name[match.start()] not in config.chars_upper and \
                         page.page_name[match.start()-1] in config.chars_lower:
                     continue
 
@@ -408,15 +420,25 @@
             return []
 
     def xapian_wanted(self):
-        return not self.use_re
+        return True             # only easy regexps possible
+
+    def xapian_need_postproc(self):
+        return self.case
 
     def xapian_term(self, request, allterms):
         if self.use_re:
             # basic regex matching per term
-            terms = [term for term in allterms() if
-                    self.search_re.match(term)]
+            terms = []
+            found = False
+            for term in allterms():
+                if term[:4] == 'XFT:':
+                    found = True
+                    if self.search_re.findall(term[4:]):
+                        terms.append(term)
+                elif found:
+                    break
             if not terms:
-                return None
+                return Query()
             queries = [Query(Query.OP_OR, terms)]
         else:
             analyzer = Xapian.WikiAnalyzer(request=request,
@@ -444,7 +466,7 @@
 
                 queries.append(Query(Query.OP_AND, t))
 
-            if stemmed:
+            if not self.case and stemmed:
                 self._build_re(' '.join(stemmed), use_re=False,
                         case=self.case, stemmed=True)
 
@@ -522,7 +544,10 @@
             return []
 
     def xapian_wanted(self):
-        return not self.use_re
+        return True             # only easy regexps possible
+
+    def xapian_need_postproc(self):
+        return self.case
 
     def xapian_term(self, request, allterms):
         prefix = Xapian.Index.prefixMap['linkto']
@@ -540,7 +565,7 @@
                     continue
 
             if not terms:
-                return None
+                return Query()
             return Query(Query.OP_OR, terms)
         else:
             return UnicodeQuery('%s:%s' % (prefix, self.pattern))
@@ -560,7 +585,7 @@
         self._pattern = pattern.lower()
         self.negated = 0
         self.use_re = use_re
-        self.case = case
+        self.case = False       # not case-sensitive!
         self.xapian_called = False
         self._build_re(self._pattern, use_re=use_re, case=case)
 
@@ -582,7 +607,10 @@
             return [Match()]
 
     def xapian_wanted(self):
-        return not self.use_re
+        return True             # only easy regexps possible
+
+    def xapian_need_postproc(self):
+        return False            # case-sensitivity would make no sense
 
     def xapian_term(self, request, allterms):
         self.xapian_called = True
@@ -601,12 +629,63 @@
                     continue
 
             if not terms:
-                return None
+                return Query()
             return Query(Query.OP_OR, terms)
         else:
             pattern = self.pattern
             return UnicodeQuery('%s%s' % (prefix, pattern))
 
+class CategorySearch(TextSearch):
+    """ Search the pages belonging to a category """
+
+    def __init__(self, *args, **kwargs):
+        TextSearch.__init__(self, *args, **kwargs)
+        self.titlesearch = None
+
+    def _build_re(self, pattern, **kwargs):
+        kwargs['use_re'] = True
+        TextSearch._build_re(self,
+                r'(----(-*)(\r)?\n)(.*)Category%s\b' % pattern, **kwargs)
+
+    def costs(self):
+        return 5000 # cheaper than a TextSearch
+
+    def __unicode__(self):
+        neg = self.negated and '-' or ''
+        return u'%s!"%s"' % (neg, unicode(self._pattern))
+
+    def highlight_re(self):
+        return ""
+
+    def xapian_wanted(self):
+        return True             # only easy regexps possible
+
+    def xapian_need_postproc(self):
+        return self.case
+
+    def xapian_term(self, request, allterms):
+        self.xapian_called = True
+        prefix = Xapian.Index.prefixMap['category']
+        if self.use_re:
+            # basic regex matching per term
+            terms = []
+            found = None
+            n = len(prefix)
+            for term in allterms():
+                if prefix == term[:n]:
+                    found = True
+                    if self.search_re.match(term[n+1:]):
+                        terms.append(term)
+                elif found:
+                    continue
+
+            if not terms:
+                return Query()
+            return Query(Query.OP_OR, terms)
+        else:
+            pattern = self._pattern.lower()
+            return UnicodeQuery('%s:%s' % (prefix, pattern))
+
 
 ##############################################################################
 ### Parse Query
@@ -693,6 +772,7 @@
         case = self.case
         linkto = False
         lang = False
+        category = False
 
         for m in modifiers:
             if "title".startswith(m):
@@ -705,8 +785,21 @@
                 linkto = True
             elif "language".startswith(m):
                 lang = True
+            elif "category".startswith(m):
+                category = True
 
-        if lang:
+        # oh, let's better call xapian if we encouter this nasty regexp ;)
+        if not category:
+            cat_re = re.compile(r'----\(-\*\)\(\\r\)\?\\n\)\(\.\*\)Category(.*)\\b', re.U)
+            cat_match = cat_re.search(text)
+            if cat_match:
+                text = cat_match.groups()[0]
+                category = True
+                regex = False
+
+        if category:
+            obj = CategorySearch(text, use_re=regex, case=case)
+        elif lang:
             obj = LanguageSearch(text, use_re=regex, case=False)
         elif linkto:
             obj = LinkSearch(text, use_re=regex, case=case)
--- a/MoinMoin/search/results.py	Sat Aug 05 00:15:06 2006 +0200
+++ b/MoinMoin/search/results.py	Sat Aug 05 20:24:25 2006 +0200
@@ -10,7 +10,7 @@
     @license: GNU GPL, see COPYING for details
 """
 
-import StringIO, time
+import StringIO, time, re
 from MoinMoin import config, wikiutil
 from MoinMoin.Page import Page
 
@@ -266,31 +266,43 @@
         self.hits = [item[1] for item in tmp]
         self.sort = 'page_name'
         
-    def stats(self, request, formatter):
+    def stats(self, request, formatter, hitsFrom):
         """ Return search statistics, formatted with formatter
 
         @param request: current request
         @param formatter: formatter to use
+        @param hitsFrom: current position in the hits
         @rtype: unicode
         @return formatted statistics
         """
         _ = request.getText
         output = [
-            formatter.paragraph(1),
-            formatter.text(_("%(hits)d results out of about %(pages)d pages.") %
-                   {'hits': len(self.hits), 'pages': self.pages}),
-            u' (%s)' % formatter.text(_("%.2f seconds") % self.elapsed),
+            formatter.paragraph(1, attr={'class': 'searchstats'}),
+            _("Results %(bs)s%(hitsFrom)d -%(hitsTo)d%(be)s "
+                    "of about %(bs)s%(hits)d%(be)s results out of about "
+                    "%(pages)d pages.") %
+                   {'hits': len(self.hits), 'pages': self.pages,
+                    'hitsFrom': hitsFrom + 1,
+                    'hitsTo': hitsFrom + request.cfg.search_results_per_page,
+                    'bs': formatter.strong(1), 'be': formatter.strong(0)},
+            u' (%s %s)' % (''.join([formatter.strong(1),
+                formatter.text("%.2f" % self.elapsed),
+                formatter.strong(0)]),
+                formatter.text(_("seconds"))),
             formatter.paragraph(0),
             ]
         return ''.join(output)
 
-    def pageList(self, request, formatter, info=0, numbered=1):
+    def pageList(self, request, formatter, info=0, numbered=1,
+            paging=True, hitsFrom=0):
         """ Format a list of found pages
 
         @param request: current request
         @param formatter: formatter to use
         @param info: show match info in title
         @param numbered: use numbered list for display
+        @param paging: toggle paging
+        @param hitsFrom: current position in the hits
         @rtype: unicode
         @return formatted page list
         """
@@ -298,15 +310,22 @@
         f = formatter
         write = self.buffer.write
         if numbered:
-            list = f.number_list
+            list = lambda on: f.number_list(on, start=hitsFrom+1)
         else:
             list = f.bullet_list
 
         # Add pages formatted as list
         if self.hits:
             write(list(1))
+            
+            # XXX: Do some xapian magic here
+            if paging:
+                hitsTo = hitsFrom + request.cfg.search_results_per_page
+                displayHits = self.hits[hitsFrom:hitsTo]
+            else:
+                displayHits = self.hits
 
-            for page in self.hits:
+            for page in displayHits:
                 if page.attachment:
                     querydict = {
                         'action': 'AttachFile',
@@ -330,11 +349,15 @@
                     ]
                 write(''.join(item))
             write(list(0))
+            if paging:
+                write(self.formatPrevNextPageLinks(hitsFrom=hitsFrom,
+                    hitsPerPage=request.cfg.search_results_per_page,
+                    hitsNum=len(self.hits)))
 
         return self.getvalue()
 
     def pageListWithContext(self, request, formatter, info=1, context=180,
-                            maxlines=1):
+                            maxlines=1, paging=True, hitsFrom=0):
         """ Format a list of found pages with context
 
         The default parameter values will create Google-like search
@@ -345,20 +368,30 @@
         @param request: current request
         @param formatter: formatter to use
         @param info: show match info near the page link
-        @param context: how many characters to show around each match. 
-        @param maxlines: how many contexts lines to show. 
+        @param context: how many characters to show around each match.
+        @param maxlines: how many contexts lines to show.
+        @param paging: toggle paging
+        @param hitsFrom: current position in the hits
         @rtype: unicode
         @return formatted page list with context
         """
         self._reset(request, formatter)
         f = formatter
         write = self.buffer.write
+        _ = request.getText
         
         # Add pages formatted as definition list
         if self.hits:
             write(f.definition_list(1))
 
-            for page in self.hits:
+            # XXX: Do some xapian magic here
+            if paging:
+                hitsTo = hitsFrom+request.cfg.search_results_per_page
+                displayHits = self.hits[hitsFrom:hitsTo]
+            else:
+                displayHits = self.hits
+
+            for page in displayHits:
                 matchInfo = ''
                 if info:
                     matchInfo = self.formatInfo(f, page)
@@ -386,9 +419,24 @@
                     f.definition_desc(1),
                     fmt_context,
                     f.definition_desc(0),
+                    f.definition_desc(1, attr={'class': 'searchresinfobar'}),
+                    f.text('%.1fk - ' % (page.page.size()/1024.0)),
+                    f.text('rev: %d %s- ' % (page.page.get_real_rev(),
+                        not page.page.rev and '(%s) ' % _('current') or '')),
+                    f.text('last modified: %(time)s - ' % page.page.lastEditInfo()),
+                    # XXX: proper metadata
+                    #f.text('lang: %s - ' % page.page.language),
+                    f.url(1, href='#'),
+                    f.text(_('Similar pages')),
+                    f.url(0),
+                    f.definition_desc(0),
                     ]
                 write(''.join(item))
             write(f.definition_list(0))
+            if paging:
+                write(self.formatPrevNextPageLinks(hitsFrom=hitsFrom,
+                    hitsPerPage=request.cfg.search_results_per_page,
+                    hitsNum=len(self.hits)))
         
         return self.getvalue()
 
@@ -596,6 +644,39 @@
             return ''.join(output)
         return ''
 
+    def formatPrevNextPageLinks(self, hitsFrom, hitsPerPage, hitsNum):
+        """ Format previous and next page links in page
+
+        @param hitsFrom: current position in the hits
+        @param hitsPerPage: number of hits per page
+        @param hitsNum: number of hits
+        @rtype: unicode
+        @return: links to previous and next pages (if exist)
+        """
+        _ = self.request.getText
+        f = self.formatter
+        from_re = r'\&from=[\d]+'
+        uri = re.sub(from_re, '', self.request.request_uri)
+        from_uri = lambda n: '%s&from=%i' % (uri, n)
+        l = []
+        if hitsFrom > 0:                        # previous page available
+            n = hitsFrom - hitsPerPage
+            if n < 0: n = 0
+            l.append(''.join([
+                f.url(1, href=from_uri(n)),
+                f.text(_('Previous Page')),
+                f.url(0)
+            ]))
+        if hitsFrom + hitsPerPage < hitsNum:    # next page available
+            n = hitsFrom + hitsPerPage
+            if n >= hitsNum: n = hitsNum - 1
+            l.append(''.join([
+                f.url(1, href=from_uri(n)),
+                f.text(_('Next Page')),
+                f.url(0)
+            ]))
+        return f.text(' | ').join(l)
+
     def querystring(self, querydict=None):
         """ Return query string, used in the page link """
         if querydict is None:
--- a/docs/CHANGES.fpletz	Sat Aug 05 00:15:06 2006 +0200
+++ b/docs/CHANGES.fpletz	Sat Aug 05 20:24:25 2006 +0200
@@ -4,35 +4,45 @@
   Known main issues:
     * Only term-based regex searching possible, modifier or heuristic to
       enable usage of _moinSearch for full compatibility?
-    * HACK: MoinMoin.Xapian.Index._get_languages (wait for proper metadata)
-    * Positions saved in Xapian aren't always correct, check. Code
-      generally needs some more love.
+    * HACK: MoinMoin.search.Xapian.Index._get_languages (wait for proper
+      metadata)
 
   ToDo:
     * Implement the new search UI
     * Write/update documentation for all the new search stuff
-    * Indexing and searching of categories (new term prefix)
     * Reevaluate Xapwrap, possibly drop it and rip out usable stuff
       (i.e. ExceptionTranslator)
     * Add stemming support for highlighting stuff:
         1. regexp for whole word (all lowercase), or
         2. just the root of the word
 
+  ToDo (low priority):
+    * Case-sensitive searches / Regexp on multiple terms: Graceful
+      fallback to and/or merge with moinSearch based on nodes xapian can
+      handle in the search term tree
+      * currently, xapian will fetch relevant pages and feed those into
+        _moinSearch for doing the real hard stuff it can't handle
+      -> need for a query optimizer, after SoC?
+
   New Features:
     * Faster search thanks to Xapian
     * Searching for languages with new prefix lang/language, i.e. lang:de
       Note: Currently only available when Xapian is used
+    * CategorySearch with prefix category or with the regexp previously
+      used (autodetected as CategorySearch)
     * New config options:
         xapian_search        0      enables xapian-powered search
         xapian_index_dir     None   directory for xapian indices
-        xapian_stemming      True   Toggles usage of stemmer, fallback
+        xapian_stemming      True   toggles usage of stemmer, fallback
                                     to False if no stemmer installed
+        search_results_per_page 10  determines how many hits should be
+                                    shown on a fullsearch action
   
   Bugfixes (only stuff that is buggy in moin/1.6 main branch):
     * ...
 
   Other Changes:
-    * ...
+    * Some whitespace fixes in miscellaneous code
   
   Developer notes:
     * ...
@@ -152,4 +162,49 @@
 2006-07-17
     * SystemInfo macro now also shows if xapian is being used (index
       available) and more graceful fallback to moinSearch
+    * Explored and evaluated the current framework for macros,
+      formatters and stuff which we need to touch for the new search UI
 
+2006-07-18
+    * Fixed some bugs, whitespaces at EOL, better i18n for SystemInfo
+    * Implemented paging support for searches, needs some style
+      adjustments
+
+2006-07-19
+    * student didn't work on the project -- ThomasWaldmann
+
+2006-07-20
+    * Fixed some bugs found while testing regexp and case-sensitive searches
+    * Conclusion after tinkering with the current code to allow
+      cooperation between moinSearch and Xapian for case-sensitive
+      searches (code buried): We probably need a rather big rewrite!
+
+2006-07-21
+2006-07-22
+    * Final thoughts: No query optimizer for now. Case-sensitive
+      sensitive search is done by querying Xapian with the lowercased
+      terms and run _moinSearch over the relevant pages with the same
+      query.
+    * Indexing of categories
+
+2006-07-23
+    * CategorySearch is live
+    * Subpage issue does not need changes: Can be done with regex magic
+      I.e.: - subpages of MyPage: re:^MyPage/
+            - subpages called SubPage: re:/SubPage
+            - subpages called Subpage (1st level): re:[^/]*/SubPage
+            - subpages called Subpage (last level): re:/Subpage$
+
+2006-07-24
+    * SystemInfo macro update (mtime)
+    * nicer regexp support for TitleSearch
+
+2006-07-25 .. 2006-07-30
+    * student did not work on project
+
+2006-08-01 .. 2006-07-02
+    * Reformatted search statistics to use CSS and be more google-like
+      (only in modern theme for now)
+    * Added "search result info bar", showing revision, size, mtime,
+      links for further searches (-> ToDo) etc.
+
--- a/wiki/htdocs/modern/css/common.css	Sat Aug 05 00:15:06 2006 +0200
+++ b/wiki/htdocs/modern/css/common.css	Sat Aug 05 20:24:25 2006 +0200
@@ -334,11 +334,25 @@
 
 .searchresults dt {
     margin-top: 1em;
-	font-weight: normal;
+    font-weight: normal;
 }
 
 .searchresults dd {
-	font-size: 0.85em;
+    font-size: 0.85em;
+}
+
+.searchresults dd.searchresinfobar {
+    color: #008000;
+    margin-left: 15px;
+}
+
+p.searchstats {
+    font-size: 0.8em;
+    text-align: right;
+    width: 100%;
+    background-color: #E6EAF0;
+    border-top: 1px solid #9088DC;
+    padding: 2px;
 }
 
 /* MonthCalendar css */