changeset 3858:5079d2246367

merged moin/1.7
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Mon, 14 Jul 2008 23:28:38 +0200
parents 5e35dd32656f (current diff) ee74cf49c1ca (diff)
children 8cb2f4ebd45f
files MoinMoin/wikiutil.py
diffstat 1 files changed, 8 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/Xapian.py	Mon Jul 14 19:53:24 2008 +0200
+++ b/MoinMoin/search/Xapian.py	Mon Jul 14 23:28:38 2008 +0200
@@ -103,16 +103,21 @@
 
     def raw_tokenize_word(self, word, pos):
         """ try to further tokenize some word starting at pos """
+        yield (word, pos)
         if self.wikiword_re.match(word):
-            yield (word, pos)
             # if it is a CamelCaseWord, we additionally try to tokenize Camel, Case and Word
             for m in re.finditer(self.singleword_re, word):
-                for w, p in self.raw_tokenize_word(m.group(), pos + m.start()):
+                mw, mp = m.group(), pos + m.start()
+                for w, p in self.raw_tokenize_word(mw, mp):
                     yield (w, p)
         else:
             # if we have Foo42, yield Foo and 42
             for m in re.finditer(self.alpha_num_re, word):
-                yield (m.group(), pos + m.start())
+                mw, mp = m.group(), pos + m.start()
+                if mw != word:
+                    for w, p in self.raw_tokenize_word(mw, mp):
+                        yield (w, p)
+
 
     def raw_tokenize(self, value):
         """ Yield a stream of words from a string.