Mercurial > moin > 1.9
changeset 3858:5079d2246367
merged moin/1.7
author | Thomas Waldmann <tw AT waldmann-edv DOT de> |
---|---|
date | Mon, 14 Jul 2008 23:28:38 +0200 |
parents | 5e35dd32656f (current diff) ee74cf49c1ca (diff) |
children | 8cb2f4ebd45f |
files | MoinMoin/wikiutil.py |
diffstat | 1 files changed, 8 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/MoinMoin/search/Xapian.py Mon Jul 14 19:53:24 2008 +0200 +++ b/MoinMoin/search/Xapian.py Mon Jul 14 23:28:38 2008 +0200 @@ -103,16 +103,21 @@ def raw_tokenize_word(self, word, pos): """ try to further tokenize some word starting at pos """ + yield (word, pos) if self.wikiword_re.match(word): - yield (word, pos) # if it is a CamelCaseWord, we additionally try to tokenize Camel, Case and Word for m in re.finditer(self.singleword_re, word): - for w, p in self.raw_tokenize_word(m.group(), pos + m.start()): + mw, mp = m.group(), pos + m.start() + for w, p in self.raw_tokenize_word(mw, mp): yield (w, p) else: # if we have Foo42, yield Foo and 42 for m in re.finditer(self.alpha_num_re, word): - yield (m.group(), pos + m.start()) + mw, mp = m.group(), pos + m.start() + if mw != word: + for w, p in self.raw_tokenize_word(mw, mp): + yield (w, p) + def raw_tokenize(self, value): """ Yield a stream of words from a string.