Mercurial > moin > 1.9
changeset 3573:124d0ef138aa
change page_*_regex processing, see docs/CHANGES (fixes Xapian category search for non-english)
author | Thomas Waldmann <tw AT waldmann-edv DOT de> |
---|---|
date | Sat, 10 May 2008 23:37:00 +0200 |
parents | 870cc4c47705 |
children | 0b7eb697e952 |
files | MoinMoin/PageEditor.py MoinMoin/PageGraphicalEditor.py MoinMoin/_tests/test_wikidicts.py MoinMoin/_tests/test_wikiutil.py MoinMoin/config/multiconfig.py MoinMoin/events/wikidictsrescan.py MoinMoin/macro/AdvancedSearch.py MoinMoin/macro/EditTemplates.py MoinMoin/search/Xapian.py MoinMoin/security/__init__.py MoinMoin/userform/admin.py MoinMoin/wikidicts.py MoinMoin/wikiutil.py MoinMoin/xmlrpc/UpdateGroup.py docs/CHANGES |
diffstat | 15 files changed, 48 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/MoinMoin/PageEditor.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/PageEditor.py Sat May 10 23:37:00 2008 +0200 @@ -446,7 +446,7 @@ request.write("</p>") # Category selection - filterfn = self.cfg.cache.page_category_regex.search + filterfn = self.cfg.cache.page_category_regexact.search cat_pages = request.rootpage.getPageList(filter=filterfn) cat_pages.sort() cat_pages = [wikiutil.pagelinkmarkup(p) for p in cat_pages]
--- a/MoinMoin/PageGraphicalEditor.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/PageGraphicalEditor.py Sat May 10 23:37:00 2008 +0200 @@ -366,7 +366,7 @@ request.write("</p>") # Category selection - filterfn = self.cfg.cache.page_category_regex.search + filterfn = self.cfg.cache.page_category_regexact.search cat_pages = request.rootpage.getPageList(filter=filterfn) cat_pages.sort() cat_pages = [wikiutil.pagelinkmarkup(p) for p in cat_pages]
--- a/MoinMoin/_tests/test_wikidicts.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/_tests/test_wikidicts.py Sat May 10 23:37:00 2008 +0200 @@ -128,7 +128,7 @@ page.renamePage('AnotherGroup') group = wikidicts.Group(request, '') - isgroup = request.cfg.cache.page_group_regex.search + isgroup = request.cfg.cache.page_group_regexact.search grouppages = request.rootpage.getPageList(user='', filter=isgroup) members, groups = request.dicts.expand_group(u'AnotherGroup') @@ -147,7 +147,7 @@ page.copyPage(u'OtherGroup') group = wikidicts.Group(request, '') - isgroup = request.cfg.cache.page_group_regex.search + isgroup = request.cfg.cache.page_group_regexact.search grouppages = request.rootpage.getPageList(user='', filter=isgroup) members, groups = request.dicts.expand_group(u'OtherGroup')
--- a/MoinMoin/_tests/test_wikiutil.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/_tests/test_wikiutil.py Sat May 10 23:37:00 2008 +0200 @@ -118,7 +118,7 @@ ) bad = ( 'Template', - 'ATemplate', + 'I want a Template', 'TemplateInFront', 'xTemplateInFront', 'XTemplateInFront',
--- a/MoinMoin/config/multiconfig.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/config/multiconfig.py Sat May 10 23:37:00 2008 +0200 @@ -441,10 +441,15 @@ page_front_page = u'HelpOnLanguages' # this will make people choose a sane config page_local_spelling_words = u'LocalSpellingWords' - page_category_regex = u'^Category[A-Z]' - page_dict_regex = u'[a-z0-9]Dict$' - page_group_regex = u'[a-z0-9]Group$' - page_template_regex = u'[a-z0-9]Template$' + + # the following regexes should match the complete name when used in free text + # the group 'all' shall match all, while the group 'key' shall match the key only + # e.g. CategoryFoo -> group 'all' == CategoryFoo, group 'key' == Foo + # moin's code will add ^ / $ at beginning / end when needed + page_category_regex = ur'(?P<all>Category(?P<key>\S+))' + page_dict_regex = ur'(?P<all>(?P<key>\S+)Dict)' + page_group_regex = ur'(?P<all>(?P<key>\S+)Group)' + page_template_regex = ur'(?P<all>(?P<key>\S+)Template)' page_license_enabled = False page_license_page = u'WikiLicense' @@ -734,6 +739,13 @@ self.cache.page_dict_regex = re.compile(self.page_dict_regex, re.UNICODE) self.cache.page_group_regex = re.compile(self.page_group_regex, re.UNICODE) self.cache.page_template_regex = re.compile(self.page_template_regex, re.UNICODE) + + # the ..._regexact versions only match if nothing is left (exact match) + self.cache.page_category_regexact = re.compile(u'^%s$' % self.page_category_regex, re.UNICODE) + self.cache.page_dict_regexact = re.compile(u'^%s$' % self.page_dict_regex, re.UNICODE) + self.cache.page_group_regexact = re.compile(u'^%s$' % self.page_group_regex, re.UNICODE) + self.cache.page_template_regexact = re.compile(u'^%s$' % self.page_template_regex, re.UNICODE) + self.cache.ua_spiders = self.ua_spiders and re.compile(self.ua_spiders, re.I) self._check_directories()
--- a/MoinMoin/events/wikidictsrescan.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/events/wikidictsrescan.py Sat May 10 23:37:00 2008 +0200 @@ -20,8 +20,8 @@ isinstance(event, ev.PageCopiedEvent) or isinstance(event, ev.TrivialPageChangedEvent)): cfg = event.request.cfg pagename = event.page.page_name - if cfg.cache.page_dict_regex.search(pagename) or \ - cfg.cache.page_group_regex.search(pagename): + if cfg.cache.page_dict_regexact.search(pagename) or \ + cfg.cache.page_group_regexact.search(pagename): return handle_groupsdicts_changed(event)
--- a/MoinMoin/macro/AdvancedSearch.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/macro/AdvancedSearch.py Sat May 10 23:37:00 2008 +0200 @@ -32,7 +32,7 @@ def getCategories(request): # This will return all pages with "Category" in the title - cat_filter = request.cfg.cache.page_category_regex.search + cat_filter = request.cfg.cache.page_category_regexact.search pages = request.rootpage.getPageList(filter=cat_filter) pages.sort() return pages
--- a/MoinMoin/macro/EditTemplates.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/macro/EditTemplates.py Sat May 10 23:37:00 2008 +0200 @@ -12,7 +12,7 @@ # we don't want to spend much CPU for spiders requesting nonexisting pages if not macro.request.isSpiderAgent: # Get list of template pages readable by current user - filterfn = macro.request.cfg.cache.page_template_regex.search + filterfn = macro.request.cfg.cache.page_template_regexact.search templates = macro.request.rootpage.getPageList(filter=filterfn) result = [] if templates:
--- a/MoinMoin/search/Xapian.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/search/Xapian.py Sat May 10 23:37:00 2008 +0200 @@ -436,10 +436,8 @@ if not prev or prev == 1: return [] - - return [cat.lower() - for cat in re.findall(r'Category[^\s]+', body[pos:])] # XXX needs i18n / configurability - # we have page_category_regex there, but it doesn't match the complete category tag + # for CategoryFoo, group 'all' matched CategoryFoo, group 'key' matched just Foo + return [m.group('all').lower() for m in self.request.cfg.cache.page_category_regex.finditer(body[pos:])] def _get_domains(self, page): """ Returns a generator with all the domains the page belongs to
--- a/MoinMoin/security/__init__.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/security/__init__.py Sat May 10 23:37:00 2008 +0200 @@ -308,7 +308,7 @@ else: # we have a #acl on the page (self.acl can be [] if #acl is empty!) acl = self.acl is_group_member = request.dicts.has_member - group_re = request.cfg.cache.page_group_regex + group_re = request.cfg.cache.page_group_regexact allowed = None for entry, rightsdict in acl: if entry in self.special_users:
--- a/MoinMoin/userform/admin.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/userform/admin.py Sat May 10 23:37:00 2008 +0200 @@ -25,7 +25,7 @@ Column('action', label=_('Action')), ] - isgroup = request.cfg.cache.page_group_regex.search + isgroup = request.cfg.cache.page_group_regexact.search grouppages = request.rootpage.getPageList(user='', filter=isgroup) # Iterate over users
--- a/MoinMoin/wikidicts.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/wikidicts.py Sat May 10 23:37:00 2008 +0200 @@ -337,12 +337,12 @@ # Get all pages in the wiki - without user filtering using filter # function - this makes the page list about 10 times faster. - isdict = self.cfg.cache.page_dict_regex.search + isdict = self.cfg.cache.page_dict_regexact.search dictpages = request.rootpage.getPageList(user='', filter=isdict) for pagename in dictpages: self.adddict(request, pagename) - isgroup = self.cfg.cache.page_group_regex.search + isgroup = self.cfg.cache.page_group_regexact.search grouppages = request.rootpage.getPageList(user='', filter=isgroup) for pagename in grouppages: self.addgroup(request, pagename)
--- a/MoinMoin/wikiutil.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/wikiutil.py Sat May 10 23:37:00 2008 +0200 @@ -706,7 +706,7 @@ @rtype: bool @return: true if page is a template page """ - return request.cfg.cache.page_template_regex.search(pagename) is not None + return request.cfg.cache.page_template_regexact.search(pagename) is not None def isGroupPage(request, pagename): @@ -716,7 +716,7 @@ @rtype: bool @return: true if page is a form page """ - return request.cfg.cache.page_group_regex.search(pagename) is not None + return request.cfg.cache.page_group_regexact.search(pagename) is not None def filterCategoryPages(request, pagelist): @@ -733,7 +733,7 @@ @rtype: list @return: only the category pages of pagelist """ - func = request.cfg.cache.page_category_regex.search + func = request.cfg.cache.page_category_regexact.search return [pn for pn in pagelist if func(pn)]
--- a/MoinMoin/xmlrpc/UpdateGroup.py Sat May 10 17:28:44 2008 +0200 +++ b/MoinMoin/xmlrpc/UpdateGroup.py Sat May 10 23:37:00 2008 +0200 @@ -31,7 +31,7 @@ return xmlrpclib.Fault(1, "You are not allowed to edit this page") # check if groupname matches page_group_regex - if not self.request.cfg.cache.page_group_regex.match(groupname): + if not self.request.cfg.cache.page_group_regexact.search(groupname): return xmlrpclib.Fault(2, "The groupname %s does not match your page_group_regex (%s)" % ( groupname, self.request.cfg.page_group_regex))
--- a/docs/CHANGES Sat May 10 17:28:44 2008 +0200 +++ b/docs/CHANGES Sat May 10 23:37:00 2008 +0200 @@ -58,6 +58,19 @@ data when a page had attachment uploads after the last page edit) * returns a Fault if it did not find a edit-log entry + * HINT: page_*_regex processing had to be changed to fix Xapian category + search. If you don't use the english defaults, you will have to change + your configuration: + old (default): page_category_regex = u'^Category[A-Z]' + new (default): page_category_regex = ur'(?P<all>Category(?P<key>\S+))' + As you see, the old regex did work for detecting whether a pagename is + a category, but it could not be used to search for a category tag in the + page text. The new regex can be used for both and identifies the complete + category tag (match in group 'all', e.g. "CategoryFoo") as well as the + category key (match in group 'key', e.g. "Foo") by using named regex + groups. \S+ means the category key can be anything non-blank. + If you like to simultaneously support multiple languages, use something + like this: ur'(?P<all>(Kategorie|Category)(?P<key>\S+))' Developer notes (these should be moved to the end in the release): * Page.last_edit() is DEPRECATED, please use Page.edit_info().