changeset 3573:124d0ef138aa

change page_*_regex processing, see docs/CHANGES (fixes Xapian category search for non-english)
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sat, 10 May 2008 23:37:00 +0200
parents 870cc4c47705
children 0b7eb697e952
files MoinMoin/PageEditor.py MoinMoin/PageGraphicalEditor.py MoinMoin/_tests/test_wikidicts.py MoinMoin/_tests/test_wikiutil.py MoinMoin/config/multiconfig.py MoinMoin/events/wikidictsrescan.py MoinMoin/macro/AdvancedSearch.py MoinMoin/macro/EditTemplates.py MoinMoin/search/Xapian.py MoinMoin/security/__init__.py MoinMoin/userform/admin.py MoinMoin/wikidicts.py MoinMoin/wikiutil.py MoinMoin/xmlrpc/UpdateGroup.py docs/CHANGES
diffstat 15 files changed, 48 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/PageEditor.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/PageEditor.py	Sat May 10 23:37:00 2008 +0200
@@ -446,7 +446,7 @@
         request.write("</p>")
 
         # Category selection
-        filterfn = self.cfg.cache.page_category_regex.search
+        filterfn = self.cfg.cache.page_category_regexact.search
         cat_pages = request.rootpage.getPageList(filter=filterfn)
         cat_pages.sort()
         cat_pages = [wikiutil.pagelinkmarkup(p) for p in cat_pages]
--- a/MoinMoin/PageGraphicalEditor.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/PageGraphicalEditor.py	Sat May 10 23:37:00 2008 +0200
@@ -366,7 +366,7 @@
         request.write("</p>")
 
         # Category selection
-        filterfn = self.cfg.cache.page_category_regex.search
+        filterfn = self.cfg.cache.page_category_regexact.search
         cat_pages = request.rootpage.getPageList(filter=filterfn)
         cat_pages.sort()
         cat_pages = [wikiutil.pagelinkmarkup(p) for p in cat_pages]
--- a/MoinMoin/_tests/test_wikidicts.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/_tests/test_wikidicts.py	Sat May 10 23:37:00 2008 +0200
@@ -128,7 +128,7 @@
         page.renamePage('AnotherGroup')
 
         group = wikidicts.Group(request, '')
-        isgroup = request.cfg.cache.page_group_regex.search
+        isgroup = request.cfg.cache.page_group_regexact.search
         grouppages = request.rootpage.getPageList(user='', filter=isgroup)
 
         members, groups = request.dicts.expand_group(u'AnotherGroup')
@@ -147,7 +147,7 @@
         page.copyPage(u'OtherGroup')
 
         group = wikidicts.Group(request, '')
-        isgroup = request.cfg.cache.page_group_regex.search
+        isgroup = request.cfg.cache.page_group_regexact.search
         grouppages = request.rootpage.getPageList(user='', filter=isgroup)
 
         members, groups = request.dicts.expand_group(u'OtherGroup')
--- a/MoinMoin/_tests/test_wikiutil.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/_tests/test_wikiutil.py	Sat May 10 23:37:00 2008 +0200
@@ -118,7 +118,7 @@
     )
     bad = (
         'Template',
-        'ATemplate',
+        'I want a Template',
         'TemplateInFront',
         'xTemplateInFront',
         'XTemplateInFront',
--- a/MoinMoin/config/multiconfig.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/config/multiconfig.py	Sat May 10 23:37:00 2008 +0200
@@ -441,10 +441,15 @@
 
     page_front_page = u'HelpOnLanguages' # this will make people choose a sane config
     page_local_spelling_words = u'LocalSpellingWords'
-    page_category_regex = u'^Category[A-Z]'
-    page_dict_regex = u'[a-z0-9]Dict$'
-    page_group_regex = u'[a-z0-9]Group$'
-    page_template_regex = u'[a-z0-9]Template$'
+
+    # the following regexes should match the complete name when used in free text
+    # the group 'all' shall match all, while the group 'key' shall match the key only
+    # e.g. CategoryFoo -> group 'all' ==  CategoryFoo, group 'key' == Foo
+    # moin's code will add ^ / $ at beginning / end when needed
+    page_category_regex = ur'(?P<all>Category(?P<key>\S+))'
+    page_dict_regex = ur'(?P<all>(?P<key>\S+)Dict)'
+    page_group_regex = ur'(?P<all>(?P<key>\S+)Group)'
+    page_template_regex = ur'(?P<all>(?P<key>\S+)Template)'
 
     page_license_enabled = False
     page_license_page = u'WikiLicense'
@@ -734,6 +739,13 @@
         self.cache.page_dict_regex = re.compile(self.page_dict_regex, re.UNICODE)
         self.cache.page_group_regex = re.compile(self.page_group_regex, re.UNICODE)
         self.cache.page_template_regex = re.compile(self.page_template_regex, re.UNICODE)
+
+        # the ..._regexact versions only match if nothing is left (exact match)
+        self.cache.page_category_regexact = re.compile(u'^%s$' % self.page_category_regex, re.UNICODE)
+        self.cache.page_dict_regexact = re.compile(u'^%s$' % self.page_dict_regex, re.UNICODE)
+        self.cache.page_group_regexact = re.compile(u'^%s$' % self.page_group_regex, re.UNICODE)
+        self.cache.page_template_regexact = re.compile(u'^%s$' % self.page_template_regex, re.UNICODE)
+
         self.cache.ua_spiders = self.ua_spiders and re.compile(self.ua_spiders, re.I)
 
         self._check_directories()
--- a/MoinMoin/events/wikidictsrescan.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/events/wikidictsrescan.py	Sat May 10 23:37:00 2008 +0200
@@ -20,8 +20,8 @@
         isinstance(event, ev.PageCopiedEvent) or isinstance(event, ev.TrivialPageChangedEvent)):
         cfg = event.request.cfg
         pagename = event.page.page_name
-        if cfg.cache.page_dict_regex.search(pagename) or \
-           cfg.cache.page_group_regex.search(pagename):
+        if cfg.cache.page_dict_regexact.search(pagename) or \
+           cfg.cache.page_group_regexact.search(pagename):
             return handle_groupsdicts_changed(event)
 
 
--- a/MoinMoin/macro/AdvancedSearch.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/macro/AdvancedSearch.py	Sat May 10 23:37:00 2008 +0200
@@ -32,7 +32,7 @@
 
 def getCategories(request):
     # This will return all pages with "Category" in the title
-    cat_filter = request.cfg.cache.page_category_regex.search
+    cat_filter = request.cfg.cache.page_category_regexact.search
     pages = request.rootpage.getPageList(filter=cat_filter)
     pages.sort()
     return pages
--- a/MoinMoin/macro/EditTemplates.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/macro/EditTemplates.py	Sat May 10 23:37:00 2008 +0200
@@ -12,7 +12,7 @@
     # we don't want to spend much CPU for spiders requesting nonexisting pages
     if not macro.request.isSpiderAgent:
         # Get list of template pages readable by current user
-        filterfn = macro.request.cfg.cache.page_template_regex.search
+        filterfn = macro.request.cfg.cache.page_template_regexact.search
         templates = macro.request.rootpage.getPageList(filter=filterfn)
         result = []
         if templates:
--- a/MoinMoin/search/Xapian.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/search/Xapian.py	Sat May 10 23:37:00 2008 +0200
@@ -436,10 +436,8 @@
 
         if not prev or prev == 1:
             return []
-
-        return [cat.lower()
-                for cat in re.findall(r'Category[^\s]+', body[pos:])] # XXX needs i18n / configurability
-                # we have page_category_regex there, but it doesn't match the complete category tag
+        # for CategoryFoo, group 'all' matched CategoryFoo, group 'key' matched just Foo
+        return [m.group('all').lower() for m in self.request.cfg.cache.page_category_regex.finditer(body[pos:])]
 
     def _get_domains(self, page):
         """ Returns a generator with all the domains the page belongs to
--- a/MoinMoin/security/__init__.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/security/__init__.py	Sat May 10 23:37:00 2008 +0200
@@ -308,7 +308,7 @@
         else: # we have a #acl on the page (self.acl can be [] if #acl is empty!)
             acl = self.acl
         is_group_member = request.dicts.has_member
-        group_re = request.cfg.cache.page_group_regex
+        group_re = request.cfg.cache.page_group_regexact
         allowed = None
         for entry, rightsdict in acl:
             if entry in self.special_users:
--- a/MoinMoin/userform/admin.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/userform/admin.py	Sat May 10 23:37:00 2008 +0200
@@ -25,7 +25,7 @@
         Column('action', label=_('Action')),
     ]
 
-    isgroup = request.cfg.cache.page_group_regex.search
+    isgroup = request.cfg.cache.page_group_regexact.search
     grouppages = request.rootpage.getPageList(user='', filter=isgroup)
 
     # Iterate over users
--- a/MoinMoin/wikidicts.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/wikidicts.py	Sat May 10 23:37:00 2008 +0200
@@ -337,12 +337,12 @@
 
         # Get all pages in the wiki - without user filtering using filter
         # function - this makes the page list about 10 times faster.
-        isdict = self.cfg.cache.page_dict_regex.search
+        isdict = self.cfg.cache.page_dict_regexact.search
         dictpages = request.rootpage.getPageList(user='', filter=isdict)
         for pagename in dictpages:
             self.adddict(request, pagename)
 
-        isgroup = self.cfg.cache.page_group_regex.search
+        isgroup = self.cfg.cache.page_group_regexact.search
         grouppages = request.rootpage.getPageList(user='', filter=isgroup)
         for pagename in grouppages:
             self.addgroup(request, pagename)
--- a/MoinMoin/wikiutil.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/wikiutil.py	Sat May 10 23:37:00 2008 +0200
@@ -706,7 +706,7 @@
     @rtype: bool
     @return: true if page is a template page
     """
-    return request.cfg.cache.page_template_regex.search(pagename) is not None
+    return request.cfg.cache.page_template_regexact.search(pagename) is not None
 
 
 def isGroupPage(request, pagename):
@@ -716,7 +716,7 @@
     @rtype: bool
     @return: true if page is a form page
     """
-    return request.cfg.cache.page_group_regex.search(pagename) is not None
+    return request.cfg.cache.page_group_regexact.search(pagename) is not None
 
 
 def filterCategoryPages(request, pagelist):
@@ -733,7 +733,7 @@
     @rtype: list
     @return: only the category pages of pagelist
     """
-    func = request.cfg.cache.page_category_regex.search
+    func = request.cfg.cache.page_category_regexact.search
     return [pn for pn in pagelist if func(pn)]
 
 
--- a/MoinMoin/xmlrpc/UpdateGroup.py	Sat May 10 17:28:44 2008 +0200
+++ b/MoinMoin/xmlrpc/UpdateGroup.py	Sat May 10 23:37:00 2008 +0200
@@ -31,7 +31,7 @@
         return xmlrpclib.Fault(1, "You are not allowed to edit this page")
 
     # check if groupname matches page_group_regex
-    if not self.request.cfg.cache.page_group_regex.match(groupname):
+    if not self.request.cfg.cache.page_group_regexact.search(groupname):
         return xmlrpclib.Fault(2, "The groupname %s does not match your page_group_regex (%s)" % (
                                groupname, self.request.cfg.page_group_regex))
 
--- a/docs/CHANGES	Sat May 10 17:28:44 2008 +0200
+++ b/docs/CHANGES	Sat May 10 23:37:00 2008 +0200
@@ -58,6 +58,19 @@
         data when a page had attachment uploads after the last page
         edit)
       * returns a Fault if it did not find a edit-log entry
+    * HINT: page_*_regex processing had to be changed to fix Xapian category
+      search. If you don't use the english defaults, you will have to change
+      your configuration:
+        old (default): page_category_regex = u'^Category[A-Z]'
+        new (default): page_category_regex = ur'(?P<all>Category(?P<key>\S+))'
+      As you see, the old regex did work for detecting whether a pagename is
+      a category, but it could not be used to search for a category tag in the
+      page text. The new regex can be used for both and identifies the complete
+      category tag (match in group 'all', e.g. "CategoryFoo") as well as the
+      category key (match in group 'key', e.g. "Foo") by using named regex
+      groups. \S+ means the category key can be anything non-blank.
+      If you like to simultaneously support multiple languages, use something
+      like this: ur'(?P<all>(Kategorie|Category)(?P<key>\S+))'
 
   Developer notes (these should be moved to the end in the release):
     * Page.last_edit() is DEPRECATED, please use Page.edit_info().