Mercurial > moin > 1.9
changeset 1199:5ce3bea2e66c
index categories
author | Franz Pletz <fpletz AT franz-pletz DOT org> |
---|---|
date | Sat, 22 Jul 2006 22:11:11 +0200 |
parents | 277b97ba0700 |
children | b953b5ff4877 |
files | MoinMoin/search/Xapian.py MoinMoin/search/queryparser.py docs/CHANGES.fpletz |
diffstat | 3 files changed, 17 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/MoinMoin/search/Xapian.py Sat Jul 22 17:56:50 2006 +0200 +++ b/MoinMoin/search/Xapian.py Sat Jul 22 22:11:11 2006 +0200 @@ -170,7 +170,8 @@ # the D term, and changing the last digit to a '2' if it's a '3') #X longer prefix for user-defined use 'linkto': 'XLINKTO', # this document links to that document - 'stem_lang': 'XSTEMLANG', # ISO Language code this document was stemmed in + 'stem_lang': 'XSTEMLANG', # ISO Language code this document was stemmed in + 'category': 'XCAT', # category this document belongs to #Y year (four digits) } @@ -316,6 +317,15 @@ # return actual lang and lang to stem in return (lang, default_lang) + def _get_categories(self, page): + body = page.get_raw_body() + + sep = re.search(r'----*\r?\n', body) + if not sep: + return [] + + return re.findall('Category(.*)\r?\n', body[sep.end():]) + def _index_page(self, writer, page, mode='update'): """ Index a page - assumes that the write lock is acquired @arg writer: the index writer object @@ -331,6 +341,7 @@ itemid = "%s:%s" % (wikiname, pagename) # XXX: Hack until we get proper metadata language, stem_language = self._get_languages(page) + categories = self._get_categories(page) updated = False if mode == 'update': @@ -362,6 +373,8 @@ xapdoc.Keyword('stem_lang', stem_language)] for pagelink in page.getPageLinks(request): xkeywords.append(xapdoc.Keyword('linkto', pagelink)) + for category in categories: + xkeywords.append(xapdoc.Keyword('category', category)) xcontent = xapdoc.TextField('content', page.get_raw_body()) doc = xapdoc.Document(textFields=(xcontent, xtitle), keywords=xkeywords,
--- a/MoinMoin/search/queryparser.py Sat Jul 22 17:56:50 2006 +0200 +++ b/MoinMoin/search/queryparser.py Sat Jul 22 22:11:11 2006 +0200 @@ -20,6 +20,8 @@ except ImportError: pass +CATEGORY_RE = re.compile('----\(-\*\)\(\\r\)\?\\n\)\(\.\*\)Category(.*)\\b', re.U) + ############################################################################# ### query objects #############################################################################
--- a/docs/CHANGES.fpletz Sat Jul 22 17:56:50 2006 +0200 +++ b/docs/CHANGES.fpletz Sat Jul 22 22:11:11 2006 +0200 @@ -6,8 +6,6 @@ enable usage of _moinSearch for full compatibility? * HACK: MoinMoin.search.Xapian.Index._get_languages (wait for proper metadata) - * Positions saved in Xapian aren't always correct, check. Code - generally needs some more love. ToDo: * Implement the new search UI @@ -19,7 +17,6 @@ 1. regexp for whole word (all lowercase), or 2. just the root of the word * Subpages: Add positions for complete (!) pagenames into the index - * Check if permissions/acls are always obeyed ToDo (low priority): * Case-sensitive searches / Regexp on multiple terms: Graceful @@ -188,4 +185,5 @@ sensitive search is done by querying Xapian with the lowercased terms and run _moinSearch over the relevant pages with the same query. + * Indexing of categories