changeset 1441:05482b439f89

optional history indexing and search is working
author Franz Pletz <fpletz AT franz-pletz DOT org>
date Mon, 21 Aug 2006 13:17:22 +0200
parents f127f9e09608
children ce3a2506c433
files MoinMoin/action/fullsearch.py MoinMoin/config/multiconfig.py MoinMoin/macro/AdvancedSearch.py MoinMoin/search/Xapian.py MoinMoin/search/__init__.py MoinMoin/search/builtin.py docs/CHANGES.fpletz
diffstat 7 files changed, 56 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/action/fullsearch.py	Mon Aug 21 12:20:23 2006 +0200
+++ b/MoinMoin/action/fullsearch.py	Mon Aug 21 13:17:22 2006 +0200
@@ -57,6 +57,7 @@
     hitsFrom = int(request.form.get('from', [0])[0])
     mtime = None
     msg = ''
+    historysearch = 0
 
     max_context = 1 # only show first `max_context` contexts XXX still unused
 
@@ -72,6 +73,7 @@
         mimetype = request.form.get('mimetype', [0])[0]
         includeunderlay = request.form.get('includeunderlay', [0])[0]
         onlysystempages = request.form.get('onlysystempages', [0])[0]
+        historysearch = request.form.get('historysearch', [0])[0]
 
         mtime = request.form.get('mtime', [''])[0]
         if mtime:
@@ -128,7 +130,7 @@
     from MoinMoin.search import searchPages, QueryParser
     query = QueryParser(case=case, regex=regex,
             titlesearch=titlesearch).parse_query(needle)
-    results = searchPages(request, query, sort, mtime)
+    results = searchPages(request, query, sort, mtime, historysearch)
 
     # directly show a single hit
     # XXX won't work with attachment search
--- a/MoinMoin/config/multiconfig.py	Mon Aug 21 12:20:23 2006 +0200
+++ b/MoinMoin/config/multiconfig.py	Mon Aug 21 13:17:22 2006 +0200
@@ -301,6 +301,7 @@
     xapian_search = False # disabled until xapian is finished
     xapian_index_dir = None
     xapian_stemming = True
+    xapian_index_history = True
     search_results_per_page = 10
 
     mail_login = None # or "user pwd" if you need to use SMTP AUTH
--- a/MoinMoin/macro/AdvancedSearch.py	Mon Aug 21 12:20:23 2006 +0200
+++ b/MoinMoin/macro/AdvancedSearch.py	Mon Aug 21 13:17:22 2006 +0200
@@ -96,6 +96,11 @@
                     '</input>' % _('Include underlay')),
                 ('', '<input type="checkbox" name="onlysystempages" value="1">%s'
                     '</input>' % _('Only system pages')),
+                ('', '<input type="checkbox" name="historysearch"value="1"%s>%s'
+                    '</input>' %
+                    (not macro.request.cfg.xapian_index_history and
+                        ' disabled="disabled"' or '',
+                     _('Search in all page revisions')))
             )
     ])
     
--- a/MoinMoin/search/Xapian.py	Mon Aug 21 12:20:23 2006 +0200
+++ b/MoinMoin/search/Xapian.py	Mon Aug 21 13:17:22 2006 +0200
@@ -147,6 +147,7 @@
         'attachment': 3,
         'mtime': 4,
         'wikiname': 5,
+        'revision': 6,
     }
     prefixMap = {
         # http://svn.xapian.org/*checkout*/trunk/xapian-applications/omega/docs/termprefixes.txt
@@ -220,7 +221,7 @@
             kw['sortKey'] = 'pagename'
 
         hits = searcher.search(query, valuesWanted=['pagename',
-            'attachment', 'mtime', 'wikiname'], **kw)
+            'attachment', 'mtime', 'wikiname', 'revision'], **kw)
         self.request.cfg.xapian_searchers.append((searcher, timestamp))
         return hits
     
@@ -283,13 +284,15 @@
                 xpname = xapdoc.SortKey('pagename', fs_rootpage)
                 xattachment = xapdoc.SortKey('attachment', filename) # XXX we should treat files like real pages, not attachments
                 xmtime = xapdoc.SortKey('mtime', mtime)
+                xrev = xapdoc.SortKey('revision', '0')
                 title = " ".join(os.path.join(fs_rootpage, filename).split("/"))
                 xtitle = xapdoc.Keyword('title', title)
                 xmimetype = xapdoc.TextField('mimetype', mimetype, True)
                 xcontent = xapdoc.TextField('content', file_content)
                 doc = xapdoc.Document(textFields=(xcontent, xmimetype, ),
                                       keywords=(xtitle, xitemid, ),
-                                      sortFields=(xpname, xattachment, xmtime, xwname, ),
+                                      sortFields=(xpname, xattachment,
+                                          xmtime, xwname, xrev, ),
                                      )
                 doc.analyzerFactory = getWikiAnalyzerFactory()
                 if mode == 'update':
@@ -366,8 +369,8 @@
         wikiname = request.cfg.interwikiname or "Self"
         pagename = page.page_name
         mtime = page.mtime_usecs()
-        itemid = "%s:%s" % (wikiname, pagename)
         revision = str(page.get_real_rev())
+        itemid = "%s:%s:%s" % (wikiname, pagename, revision)
         author = page.last_edit(request)['editor']
         # XXX: Hack until we get proper metadata
         language, stem_language = self._get_languages(page)
@@ -397,7 +400,8 @@
             xwname = xapdoc.SortKey('wikiname', request.cfg.interwikiname or "Self")
             xpname = xapdoc.SortKey('pagename', pagename)
             xattachment = xapdoc.SortKey('attachment', '') # this is a real page, not an attachment
-            xmtime = xapdoc.SortKey('mtime', mtime)
+            xmtime = xapdoc.SortKey('mtime', str(mtime))
+            xrev = xapdoc.SortKey('revision', revision)
             xtitle = xapdoc.TextField('title', pagename, True) # prefixed
             xkeywords = [xapdoc.Keyword('itemid', itemid),
                     xapdoc.Keyword('lang', language),
@@ -415,7 +419,8 @@
             xcontent = xapdoc.TextField('content', page.get_raw_body())
             doc = xapdoc.Document(textFields=(xcontent, xtitle),
                                   keywords=xkeywords,
-                                  sortFields=(xpname, xattachment, xmtime, xwname, ),
+                                  sortFields=(xpname, xattachment,
+                                      xmtime, xwname, xrev),
                                  )
             doc.analyzerFactory = getWikiAnalyzerFactory(request,
                     stem_language)
@@ -456,6 +461,7 @@
                 xpname = xapdoc.SortKey('pagename', pagename)
                 xattachment = xapdoc.SortKey('attachment', att) # this is an attachment, store its filename
                 xmtime = xapdoc.SortKey('mtime', mtime)
+                xrev = xapdoc.SortKey('revision', '0')
                 xtitle = xapdoc.Keyword('title', '%s/%s' % (pagename, att))
                 xlanguage = xapdoc.Keyword('lang', language)
                 xstem_language = xapdoc.Keyword('stem_lang', stem_language)
@@ -467,7 +473,7 @@
                                           xlanguage, xstem_language,
                                           xmimetype, ),
                                       sortFields=(xpname, xattachment, xmtime,
-                                          xwname, ),
+                                          xwname, xrev, ),
                                      )
                 doc.analyzerFactory = getWikiAnalyzerFactory(request,
                         stem_language)
@@ -506,7 +512,11 @@
             request.log("indexing all (%d) pages..." % len(pages))
             for pagename in pages:
                 p = Page(request, pagename)
-                self._index_page(writer, p, mode)
+                if request.cfg.xapian_index_history:
+                    for rev in p.getRevList():
+                        self._index_page(writer,
+                                Page(request, pagename, rev=rev),
+                                mode)
             if files:
                 request.log("indexing all files...")
                 for fname in files:
--- a/MoinMoin/search/__init__.py	Mon Aug 21 12:20:23 2006 +0200
+++ b/MoinMoin/search/__init__.py	Mon Aug 21 13:17:22 2006 +0200
@@ -13,7 +13,8 @@
 from MoinMoin.search.queryparser import QueryParser
 from MoinMoin.search.builtin import Search
 
-def searchPages(request, query, sort='weight', mtime=None, **kw):
+def searchPages(request, query, sort='weight', mtime=None,
+        historysearch=None, **kw):
     """ Search the text of all pages for query.
     
     @param request: current request
@@ -23,5 +24,6 @@
     """
     if isinstance(query, str) or isinstance(query, unicode):
         query = QueryParser(**kw).parse_query(query)
-    return Search(request, query, sort, mtime=mtime).run()
+    return Search(request, query, sort, mtime=mtime,
+            historysearch=historysearch).run()
 
--- a/MoinMoin/search/builtin.py	Mon Aug 21 12:20:23 2006 +0200
+++ b/MoinMoin/search/builtin.py	Mon Aug 21 13:17:22 2006 +0200
@@ -352,11 +352,13 @@
 class Search:
     """ A search run """
     
-    def __init__(self, request, query, sort='weight', mtime=None):
+    def __init__(self, request, query, sort='weight', mtime=None,
+            historysearch=0):
         self.request = request
         self.query = query
         self.sort = sort
         self.mtime = mtime
+        self.historysearch = historysearch
         self.filtered = False
         self.fs_rootpage = "FS" # XXX FS hardcoded
 
@@ -516,8 +518,17 @@
             wikiname = valuedict['wikiname']
             pagename = valuedict['pagename']
             attachment = valuedict['attachment']
+
+            if 'revision' in valuedict:
+                revision = int(valuedict['revision'])
+            else:
+                revision = None
+
             if wikiname in (self.request.cfg.interwikiname, 'Self'): # THIS wiki
-                page = Page(self.request, pagename)
+                page = Page(self.request, pagename, rev=revision)
+                if not self.historysearch and revision and \
+                        page.getRevList()[0] != revision:
+                    continue
                 if attachment:
                     if pagename == fs_rootpage: # not really an attachment
                         page = Page(self.request, "%s/%s" % (fs_rootpage, attachment))
--- a/docs/CHANGES.fpletz	Mon Aug 21 12:20:23 2006 +0200
+++ b/docs/CHANGES.fpletz	Mon Aug 21 13:17:22 2006 +0200
@@ -9,8 +9,6 @@
 
   ToDo:
     * Write/update documentation for all the new search stuff
-    * Search based on mtime
-    * Index all revisions and let users search in them (rev, mtime)
 
   ToDo (low priority):
     * Reevaluate Xapwrap, possibly drop it and rip out usable stuff
@@ -29,7 +27,8 @@
         - CategorySearch: category:Homepage
         - MimetypeSearch: mimetype:image/png (for attachments/files)
         - DomainSearch: domain:underlay
-      Note: Currently only available when Xapian is used
+        - History Search: available in adanved ui
+      Note: Some currently only available when Xapian is used
     * New config options:
         xapian_search        0      enables xapian-powered search
         xapian_index_dir     None   directory for xapian indices
@@ -37,6 +36,8 @@
                                     to False if no stemmer installed
         search_results_per_page 10  determines how many hits should be
                                     shown on a fullsearch action
+        xapian_index_history True   indexes all revisions of pages to
+                                    allow searching in their history
   
   Bugfixes (only stuff that is buggy in moin/1.6 main branch):
     * ...
@@ -257,3 +258,12 @@
     * minor bugfixes (i18n etc.)
     * domain-specific search (underlay -> system pages)
 
+2006-08-20
+    * major fixes mimetype & mtime search and language sorting (neither
+      really worked)
+
+2006-08-21
+    * indexing the history of pages (all revisions) if requested:
+        xapian_index_history
+    * implemented optional history search in advanced ui, defaults to off
+