Mercurial > moin > 1.9
changeset 3838:592fe02ed976
Xapian indexing: remove crappy hostname tokenization, works better without
author | Thomas Waldmann <tw AT waldmann-edv DOT de> |
---|---|
date | Sun, 13 Jul 2008 21:41:12 +0200 |
parents | 85f1d2d650e5 |
children | a37ed69fafed |
files | MoinMoin/search/Xapian.py docs/CHANGES |
diffstat | 2 files changed, 2 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/MoinMoin/search/Xapian.py Sun Jul 13 19:58:45 2008 +0200 +++ b/MoinMoin/search/Xapian.py Sun Jul 13 21:41:12 2008 +0200 @@ -74,7 +74,6 @@ token_re = re.compile( r"(?P<company>\w+[&@]\w+)|" + # company names like AT&T and Excite@Home. r"(?P<email>\w+([.-]\w+)*@\w+([.-]\w+)*)|" + # email addresses - r"(?P<hostname>\w+(\.\w+)+)|" + # hostnames r"(?P<acronym>(\w\.)+)|" + # acronyms: U.S.A., I.B.M., etc. r"(?P<word>\w+)", # words (including WikiWords) re.U) @@ -137,11 +136,6 @@ if word: yield (word, m.start() + displ) displ += len(word) + 1 - elif m.group("hostname"): - displ = 0 - for word in self.dot_re.split(m.group("hostname")): - yield (word, m.start() + displ) - displ += len(word) + 1 elif m.group("word"): for word, pos in self.raw_tokenize_word(m.group("word"), m.start()): yield word, pos
--- a/docs/CHANGES Sun Jul 13 19:58:45 2008 +0200 +++ b/docs/CHANGES Sun Jul 13 21:41:12 2008 +0200 @@ -43,6 +43,8 @@ those directories for a while just for the case. * Standalone server: fix --pidfile option * Search: + * Xapian indexing: Removed crappy "hostname" tokenization. + Fixes MoinMoinBugs/1.7 XapianNotWorkingWithLeadingNumbersInTitle. * Make query parser reject more invalid input. * If query parsing raises a BracketError, at least tell what the problem is (and not just raise empty ValueError).