changeset 3838:592fe02ed976

Xapian indexing: remove crappy hostname tokenization, works better without
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sun, 13 Jul 2008 21:41:12 +0200
parents 85f1d2d650e5
children a37ed69fafed
files MoinMoin/search/Xapian.py docs/CHANGES
diffstat 2 files changed, 2 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/search/Xapian.py	Sun Jul 13 19:58:45 2008 +0200
+++ b/MoinMoin/search/Xapian.py	Sun Jul 13 21:41:12 2008 +0200
@@ -74,7 +74,6 @@
     token_re = re.compile(
         r"(?P<company>\w+[&@]\w+)|" + # company names like AT&T and Excite@Home.
         r"(?P<email>\w+([.-]\w+)*@\w+([.-]\w+)*)|" +    # email addresses
-        r"(?P<hostname>\w+(\.\w+)+)|" +                 # hostnames
         r"(?P<acronym>(\w\.)+)|" +          # acronyms: U.S.A., I.B.M., etc.
         r"(?P<word>\w+)",                   # words (including WikiWords)
         re.U)
@@ -137,11 +136,6 @@
                         if word:
                             yield (word, m.start() + displ)
                             displ += len(word) + 1
-                elif m.group("hostname"):
-                    displ = 0
-                    for word in self.dot_re.split(m.group("hostname")):
-                        yield (word, m.start() + displ)
-                        displ += len(word) + 1
                 elif m.group("word"):
                     for word, pos in self.raw_tokenize_word(m.group("word"), m.start()):
                         yield word, pos
--- a/docs/CHANGES	Sun Jul 13 19:58:45 2008 +0200
+++ b/docs/CHANGES	Sun Jul 13 21:41:12 2008 +0200
@@ -43,6 +43,8 @@
       those directories for a while just for the case.
     * Standalone server: fix --pidfile option
     * Search:
+      * Xapian indexing: Removed crappy "hostname" tokenization.
+        Fixes MoinMoinBugs/1.7 XapianNotWorkingWithLeadingNumbersInTitle.
       * Make query parser reject more invalid input.
       * If query parsing raises a BracketError, at least tell what the problem
         is (and not just raise empty  ValueError).