changeset 1199:5ce3bea2e66c

index categories
author Franz Pletz <fpletz AT franz-pletz DOT org>
date Sat, 22 Jul 2006 22:11:11 +0200
parents 277b97ba0700
children b953b5ff4877
files MoinMoin/search/Xapian.py MoinMoin/search/queryparser.py docs/CHANGES.fpletz
diffstat 3 files changed, 17 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/Xapian.py	Sat Jul 22 17:56:50 2006 +0200
+++ b/MoinMoin/search/Xapian.py	Sat Jul 22 22:11:11 2006 +0200
@@ -170,7 +170,8 @@
                        #  the D term, and changing the last digit to a '2' if it's a '3')
                        #X   longer prefix for user-defined use
         'linkto': 'XLINKTO', # this document links to that document
-        'stem_lang': 'XSTEMLANG', # ISO Language code this document was stemmed in 
+        'stem_lang': 'XSTEMLANG', # ISO Language code this document was stemmed in
+        'category': 'XCAT', # category this document belongs to
                        #Y   year (four digits)
     }
 
@@ -316,6 +317,15 @@
         # return actual lang and lang to stem in
         return (lang, default_lang)
 
+    def _get_categories(self, page):
+        body = page.get_raw_body()
+
+        sep = re.search(r'----*\r?\n', body)
+        if not sep:
+            return []
+        
+        return re.findall('Category(.*)\r?\n', body[sep.end():])
+
     def _index_page(self, writer, page, mode='update'):
         """ Index a page - assumes that the write lock is acquired
             @arg writer: the index writer object
@@ -331,6 +341,7 @@
         itemid = "%s:%s" % (wikiname, pagename)
         # XXX: Hack until we get proper metadata
         language, stem_language = self._get_languages(page)
+        categories = self._get_categories(page)
         updated = False
 
         if mode == 'update':
@@ -362,6 +373,8 @@
                     xapdoc.Keyword('stem_lang', stem_language)]
             for pagelink in page.getPageLinks(request):
                 xkeywords.append(xapdoc.Keyword('linkto', pagelink))
+            for category in categories:
+                xkeywords.append(xapdoc.Keyword('category', category))
             xcontent = xapdoc.TextField('content', page.get_raw_body())
             doc = xapdoc.Document(textFields=(xcontent, xtitle),
                                   keywords=xkeywords,
--- a/MoinMoin/search/queryparser.py	Sat Jul 22 17:56:50 2006 +0200
+++ b/MoinMoin/search/queryparser.py	Sat Jul 22 22:11:11 2006 +0200
@@ -20,6 +20,8 @@
 except ImportError:
     pass
 
+CATEGORY_RE = re.compile('----\(-\*\)\(\\r\)\?\\n\)\(\.\*\)Category(.*)\\b', re.U)
+
 #############################################################################
 ### query objects
 #############################################################################
--- a/docs/CHANGES.fpletz	Sat Jul 22 17:56:50 2006 +0200
+++ b/docs/CHANGES.fpletz	Sat Jul 22 22:11:11 2006 +0200
@@ -6,8 +6,6 @@
       enable usage of _moinSearch for full compatibility?
     * HACK: MoinMoin.search.Xapian.Index._get_languages (wait for proper
       metadata)
-    * Positions saved in Xapian aren't always correct, check. Code
-      generally needs some more love.
 
   ToDo:
     * Implement the new search UI
@@ -19,7 +17,6 @@
         1. regexp for whole word (all lowercase), or
         2. just the root of the word
     * Subpages: Add positions for complete (!) pagenames into the index
-    * Check if permissions/acls are always obeyed
 
   ToDo (low priority):
     * Case-sensitive searches / Regexp on multiple terms: Graceful
@@ -188,4 +185,5 @@
       sensitive search is done by querying Xapian with the lowercased
       terms and run _moinSearch over the relevant pages with the same
       query.
+    * Indexing of categories