changeset 1339:544b931cd965

new setting url_prefix_action, cleanup page.url/link_to, wikiutil.link_tag
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sun, 20 Aug 2006 13:50:27 +0200
parents ea342b114f07
children 6786dc227674
files MoinMoin/Page.py MoinMoin/action/RenderAsDocbook.py MoinMoin/action/SubscribeUser.py MoinMoin/action/__init__.py MoinMoin/action/fullsearch.py MoinMoin/action/newpage.py MoinMoin/config/multiconfig.py MoinMoin/macro/MonthCalendar.py MoinMoin/request/__init__.py MoinMoin/theme/__init__.py MoinMoin/wikiutil.py docs/CHANGES wiki/htdocs/robots.txt
diffstat 13 files changed, 100 insertions(+), 60 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/Page.py	Fri Aug 18 23:41:37 2006 +0200
+++ b/MoinMoin/Page.py	Sun Aug 20 13:50:27 2006 +0200
@@ -845,7 +845,7 @@
         self._raw_body = body
         self._raw_body_modified = modified
 
-    def url(self, request, querystr=None, escape=1):
+    def url(self, request, querystr=None, escape=1, anchor=None, relative=True):
         """ Return complete URL for this page, including scriptname
 
         @param request: the request object
@@ -853,13 +853,18 @@
             (str or dict, see wikiutil.makeQueryString)
         @param escape: escape url for html, to be backward compatible
             with old code (bool)
+        @param anchor: if specified, make a link to this anchor
         @rtype: str
         @return: complete url of this page, including scriptname
         """
-        url = '%s/%s' % (request.getScriptname(),
-                     wikiutil.quoteWikinameURL(self.page_name))
+        # Create url, excluding scriptname
+        url = wikiutil.quoteWikinameURL(self.page_name)
+        if querystr:
+            if isinstance(querystr, dict):
+                action = querystr.get('action', None)
+            else:
+                action = None # XXX we don't support getting the action out of a str
 
-        if querystr:
             querystr = wikiutil.makeQueryString(querystr)
 
             # TODO: remove in 2.0
@@ -872,8 +877,17 @@
                               " http://moinmoin.wikiwikiweb.de/ApiChanges")
                 querystr = wikiutil.escape(querystr)
 
+            # make action URLs denyable by robots.txt:
+            if action is not None and request.cfg.url_prefix_action is not None:
+                url = "%s/%s/%s" % (request.cfg.url_prefix_action, action, url)
             url = '%s?%s' % (url, querystr)
 
+        # Add anchor
+        if anchor:
+            url = "%s#%s" % (url, wikiutil.url_quote_plus(anchor))
+
+        if not relative:
+            url = '%s/%s' % (request.getScriptname(), url)
         return url
 
     def link_to(self, request, text=None, querystr=None, anchor=None, **kw):
@@ -894,19 +908,8 @@
         if not text:
             text = self.split_title(request)
 
-        # Create url, excluding scriptname
-        url = wikiutil.quoteWikinameURL(self.page_name)
-        if querystr:
-            if not isinstance(querystr, type({})):
-                # makeQueryString does not escape strings any more
-                querystr = wikiutil.escape(querystr)
-                
-            querystr = wikiutil.makeQueryString(querystr)
-            url = "%s?%s" % (url, querystr)
-
-        # Add anchor
-        if anchor:
-            url = "%s#%s" % (url, wikiutil.url_quote_plus(anchor))
+        url = self.url(request, querystr, escape=0, anchor=anchor)
+        # escaping is done by link_tag -> formatter.url -> ._open()
 
         # Add css class for non existing page
         if not self.exists():
@@ -1200,11 +1203,12 @@
 
             # send the page header
             if self.default_formatter:
-                full_text_query = 'linkto:"%s"' % self.page_name
-                link = '%s/%s?action=fullsearch&amp;value=%s&amp;context=180' % (
-                    request.getScriptname(),
-                    wikiutil.quoteWikinameURL(self.page_name),
-                    wikiutil.url_quote_plus(full_text_query))
+                querydict = {
+                    'action': 'fullsearch',
+                    'value': 'linkto:"%s"' % self.page_name,
+                    'context' : '180',
+                }
+                link = self.url(request, querydict)
 
                 title = self.split_title(request)
                 if self.rev:
--- a/MoinMoin/action/RenderAsDocbook.py	Fri Aug 18 23:41:37 2006 +0200
+++ b/MoinMoin/action/RenderAsDocbook.py	Sun Aug 20 13:50:27 2006 +0200
@@ -8,7 +8,6 @@
 from MoinMoin.Page import Page
 
 def execute(pagename, request):
-    url = Page(request, pagename).url(request, {'action': 'show',
-                                                'mimetype': 'text/docbook'}, 0)
+    url = Page(request, pagename).url(request, {'action': 'show', 'mimetype': 'text/docbook'}, escape=0)
     request.http_redirect(url)
 
--- a/MoinMoin/action/SubscribeUser.py	Fri Aug 18 23:41:37 2006 +0200
+++ b/MoinMoin/action/SubscribeUser.py	Sun Aug 20 13:50:27 2006 +0200
@@ -74,7 +74,7 @@
             else:
                 result.append(formatter.smiley('{X}'))
                 result.append(formatter.text(" "))
-            result.append(formatter.url(1, Page(request, userobj.name).url(request)))
+            result.append(formatter.url(1, Page(request, userobj.name).url(request, escape=0)))
             result.append(formatter.text(userobj.name))
             result.append(formatter.url(0))
             result.append(formatter.linebreak(preformatted=0))
--- a/MoinMoin/action/__init__.py	Fri Aug 18 23:41:37 2006 +0200
+++ b/MoinMoin/action/__init__.py	Sun Aug 20 13:50:27 2006 +0200
@@ -277,7 +277,7 @@
 def do_goto(pagename, request):
     """ redirect to another page """
     target = request.form.get('target', [''])[0]
-    request.http_redirect(Page(request, target).url(request))
+    request.http_redirect(Page(request, target).url(request, escape=0))
 
 def do_userform(pagename, request):
     """ save data posted from UserPreferences """
--- a/MoinMoin/action/fullsearch.py	Fri Aug 18 23:41:37 2006 +0200
+++ b/MoinMoin/action/fullsearch.py	Sun Aug 20 13:50:27 2006 +0200
@@ -77,8 +77,7 @@
         if not page.attachment: # we did not find an attachment
             page = Page(request, page.page_name)
             # TODO: remove escape=0 in 2.0
-            url = page.url(request, querystr={'highlight': query.highlight_re()},
-                           escape=0)
+            url = page.url(request, querystr={'highlight': query.highlight_re()}, escape=0)
             request.http_redirect(url)
             return
 
--- a/MoinMoin/action/newpage.py	Fri Aug 18 23:41:37 2006 +0200
+++ b/MoinMoin/action/newpage.py	Sun Aug 20 13:50:27 2006 +0200
@@ -89,7 +89,7 @@
             if parent:
                 pagename = "%s/%s" % (parent, pagename)
 
-            url = Page(self.request, pagename).url(self.request, query, 0)
+            url = Page(self.request, pagename).url(self.request, query, escape=0)
             self.request.http_redirect(url)
 
         return ''
--- a/MoinMoin/config/multiconfig.py	Fri Aug 18 23:41:37 2006 +0200
+++ b/MoinMoin/config/multiconfig.py	Sun Aug 20 13:50:27 2006 +0200
@@ -420,8 +420,7 @@
     url_prefix_static = '/moin_static160'
 
     # we need to prefix actions to be able to exclude them by robots.txt:
-    # TODO:
-    # url_prefix_action = '/action'
+    url_prefix_action = 'action' # no leading or trailing '/'
 
     logo_string = None
     interwikiname = None
@@ -583,6 +582,10 @@
         if self.url_prefix is not None: # remove this code when url_prefix setting is removed
             self.url_prefix_static = self.url_prefix
 
+        action_prefix = self.url_prefix_action
+        if action_prefix is not None and action_prefix.endswith('/'): # make sure there is no trailing '/'
+            self.url_prefix_action = action_prefix[:-1]
+
     def load_meta_dict(self):
         """ The meta_dict contains meta data about the wiki instance. """
         if getattr(self, "_meta_dict", None) is None:
--- a/MoinMoin/macro/MonthCalendar.py	Fri Aug 18 23:41:37 2006 +0200
+++ b/MoinMoin/macro/MonthCalendar.py	Sun Aug 20 13:50:27 2006 +0200
@@ -312,10 +312,10 @@
     qpagenames = '*'.join(map(wikiutil.quoteWikinameURL, parmpagename))
     qtemplate = wikiutil.quoteWikinameURL(parmtemplate)
     querystr = "calparms=%%s,%d,%d,%d,%%d,%%s" % (parmyear, parmmonth, parmoffset)
-    prevlink = p.url(request, querystr % (qpagenames, parmoffset2 - 1, qtemplate), 0)
-    nextlink = p.url(request, querystr % (qpagenames, parmoffset2 + 1, qtemplate), 0)
-    prevylink = p.url(request, querystr % (qpagenames, parmoffset2 - 12, qtemplate), 0)
-    nextylink = p.url(request, querystr % (qpagenames, parmoffset2 + 12, qtemplate), 0)
+    prevlink = p.url(request, querystr % (qpagenames, parmoffset2 - 1, qtemplate), escape=0)
+    nextlink = p.url(request, querystr % (qpagenames, parmoffset2 + 1, qtemplate), escape=0)
+    prevylink = p.url(request, querystr % (qpagenames, parmoffset2 - 12, qtemplate), escape=0)
+    nextylink = p.url(request, querystr % (qpagenames, parmoffset2 + 12, qtemplate), escape=0)
     prevmonth = formatter.url(1, prevlink, 'cal-link') + '&lt;' + formatter.url(0)
     nextmonth = formatter.url(1, nextlink, 'cal-link') + '&gt;' + formatter.url(0)
     prevyear = formatter.url(1, prevylink, 'cal-link') + '&lt;&lt;' + formatter.url(0)
@@ -428,7 +428,7 @@
                             r, g, b = (r, g+colorstep, b)
                 r, g, b = cliprgb(r, g, b)
                 style = 'background-color:#%02x%02x%02x' % (r, g, b)
-                fmtlink = formatter.url(1, daypage.url(request, query), csslink, **onmouse) + str(day) + formatter.url(0)
+                fmtlink = formatter.url(1, daypage.url(request, query, escape=0), csslink, **onmouse) + str(day) + formatter.url(0)
                 if day == currentday and month == currentmonth and year == currentyear:
                     cssday = "cal-today"
                     fmtlink = "<b>%s</b>" % fmtlink # for browser with CSS probs
--- a/MoinMoin/request/__init__.py	Fri Aug 18 23:41:37 2006 +0200
+++ b/MoinMoin/request/__init__.py	Sun Aug 20 13:50:27 2006 +0200
@@ -1060,6 +1060,18 @@
 
             # The last component in path_info is the page name, if any
             path = self.getPathinfo()
+
+            # we can have all action URLs like this: /action/ActionName/PageName?action=ActionName&...
+            # this is just for robots.txt being able to forbid them for crawlers
+            prefix = self.cfg.url_prefix_action
+            if prefix is not None:
+                prefix = '/%s/' % prefix # e.g. '/action/'
+                if path.startswith(prefix):
+                    # remove prefix and action name
+                    path = path[len(prefix):]
+                    action, path = path.split('/', 1)
+                    path = '/' + path
+
             if path.startswith('/'):
                 pagename = self.normalizePagename(path)
             else:
@@ -1094,10 +1106,10 @@
                         wikitag, wikiurl, wikitail, error = wikiutil.resolve_wiki(self, pagetrail[-1])
                         url = wikiurl + wikiutil.quoteWikinameURL(wikitail)
                     else:
-                        url = Page(self, pagetrail[-1]).url(self)
+                        url = Page(self, pagetrail[-1]).url(self, escape=0)
                 else:
                     # Or to localized FrontPage
-                    url = wikiutil.getFrontPage(self).url(self)
+                    url = wikiutil.getFrontPage(self).url(self, escape=0)
                 self.http_redirect(url)
                 return self.finish()
 
--- a/MoinMoin/theme/__init__.py	Fri Aug 18 23:41:37 2006 +0200
+++ b/MoinMoin/theme/__init__.py	Sun Aug 20 13:50:27 2006 +0200
@@ -524,9 +524,7 @@
 
         if isinstance(msg, (str, unicode)):
             # Render simple strings with a close link
-            close = d['page'].link_to(self.request,
-                                      text=_('Clear message'),
-                                      querystr={'action': 'show'})
+            close = d['page'].link_to(self.request, text=_('Clear message'))
             html = u'<p>%s</p>\n<div class="buttons">%s</div>\n' % (msg, close)
         else:
             # msg is a widget
--- a/MoinMoin/wikiutil.py	Fri Aug 18 23:41:37 2006 +0200
+++ b/MoinMoin/wikiutil.py	Sun Aug 20 13:50:27 2006 +0200
@@ -191,7 +191,7 @@
     """
     if qstr is None:
         qstr = {}
-    if isinstance(qstr, type({})):
+    if isinstance(qstr, dict):
         qstr.update(kw)
         items = ['%s=%s' % (url_quote_plus(key, want_unicode=want_unicode), url_quote_plus(value, want_unicode=want_unicode)) for key, value in qstr.items()]
         qstr = '&'.join(items)
@@ -1458,6 +1458,8 @@
     @rtype: string
     @return: formatted link tag
     """
+    if formatter is None:
+        formatter = request.html_formatter
     if kw.has_key('css_class'):
         css_class = kw['css_class']
         del kw['css_class'] # one time is enough
@@ -1469,26 +1471,29 @@
         text = params # default
     if formatter:
         url = "%s/%s" % (request.getScriptname(), params)
+        # formatter.url will escape the url part
         if on is not None:
-            return formatter.url(on, url, css_class, **kw)
-        return (formatter.url(1, url, css_class, **kw) +
+            tag = formatter.url(on, url, css_class, **kw)
+        else:
+            tag = (formatter.url(1, url, css_class, **kw) +
                 formatter.rawHTML(text) +
                 formatter.url(0))
-    if on is not None and not on:
-        return '</a>'
-
-    attrs = ''
-    if css_class:
-        attrs += ' class="%s"' % css_class
-    if id:
-        attrs += ' id="%s"' % id
-    if name:
-        attrs += ' name="%s"' % name
-    result = '<a%s href="%s/%s">' % (attrs, request.getScriptname(), params)
-    if on:
-        return result
-    else:
-        return "%s%s</a>" % (result, text)
+    else: # this shouldn't be used any more:
+        if on is not None and not on:
+            tag = '</a>'
+        else:
+            attrs = ''
+            if css_class:
+                attrs += ' class="%s"' % css_class
+            if id:
+                attrs += ' id="%s"' % id
+            if name:
+                attrs += ' name="%s"' % name
+            tag = '<a%s href="%s/%s">' % (attrs, request.getScriptname(), params) # XXX wikiutil.escape(params) !?
+            if not on:
+                tag = "%s%s</a>" % (tag, text)
+        request.log("Warning: wikiutil.link_tag called without formatter and without request.html_formatter. tag=%r" % (tag, ))
+    return tag
 
 def containsConflictMarker(text):
     """ Returns true if there is a conflict marker in the text. """
--- a/docs/CHANGES	Fri Aug 18 23:41:37 2006 +0200
+++ b/docs/CHANGES	Sun Aug 20 13:50:27 2006 +0200
@@ -235,6 +235,23 @@
          For Apache, Lighttpd and other "external" servers, you have to care
          for configuring them to use a long expiry and change url_prefix_static
          related configuration on upgrade.
+    * url_prefix_action ['action'] was introduced for lowering load and traffic
+      caused by searchengine crawlers. Up to now, crawlers where causing a high
+      load in internet moin wikis because they tried to get about everything,
+      including all actions linked from the user interface.
+      Known crawlers only get 403 for most actions, but nevertheless they first
+      tried. There was no means keeping them away from actions due to the rather
+      braindead robots.txt standard. You can only disallow pathes there, but
+      moin's actions were querystring based, not path based (this would need
+      regex support in robots.txt, but there is no such thing).
+      This changed now. Moin will now generate action URLs you can handle in
+      robots.txt, like /action/info/PageName?action=info. So if you don't want
+      bots triggering actions, just disallow /action/ there. Keep in mind that
+      attachments are handled by /action/AttachFile, so if you want attached
+      files and pictures indexed by search engine, don't disallow
+      /action/AttachFile/ in your robots.txt.
+    * We don't use ...?action=show any more for the "Clear message" links shown
+      in the message boxes, but directly link to the page.
 
 Version 1.5-current:
    * moin.fcg improved - if you use FastCGI, you must use the new file:
--- a/wiki/htdocs/robots.txt	Fri Aug 18 23:41:37 2006 +0200
+++ b/wiki/htdocs/robots.txt	Sun Aug 20 13:50:27 2006 +0200
@@ -2,5 +2,8 @@
 
 User-agent: *
 Crawl-delay: 20
-Disallow:
+# This has to match script url + cfg.url_prefix_action - it
+# saves lots of search engine load and traffic by disallowing crawlers
+# to request action related URLs:
+Disallow: /action/