changeset 447:e0e016a553bd

only check once for spiders imported from: moin--main--1.5--patch-451
author Thomas Waldmann <tw@waldmann-edv.de>
date Sat, 18 Feb 2006 14:21:40 +0000
parents 4c984229abb9
children 45924beef130
files ChangeLog MoinMoin/logfile/eventlog.py MoinMoin/request.py MoinMoin/util/web.py
diffstat 4 files changed, 37 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog	Thu Feb 16 22:09:47 2006 +0000
+++ b/ChangeLog	Sat Feb 18 14:21:40 2006 +0000
@@ -2,6 +2,21 @@
 # arch-tag: automatic-ChangeLog--arch@arch.thinkmo.de--2003-archives/moin--main--1.5
 #
 
+2006-02-18 15:21:40 GMT	Thomas Waldmann <tw@waldmann-edv.de>	patch-451
+
+    Summary:
+      only check once for spiders
+    Revision:
+      moin--main--1.5--patch-451
+
+    only check once for spiders
+    
+
+    modified files:
+     ChangeLog MoinMoin/logfile/eventlog.py MoinMoin/request.py
+     MoinMoin/util/web.py
+
+
 2006-02-16 23:09:47 GMT	Thomas Waldmann <tw@waldmann-edv.de>	patch-450
 
     Summary:
--- a/MoinMoin/logfile/eventlog.py	Thu Feb 16 22:09:47 2006 +0000
+++ b/MoinMoin/logfile/eventlog.py	Sat Feb 18 14:21:40 2006 +0000
@@ -26,8 +26,7 @@
         """ Write an event of type `eventtype, with optional key/value
         pairs appended (i.e. you have to pass a dict).
         """
-        # Dont log spiders XXX TODO: does it make sense? 
-        if web.isSpiderAgent(request):
+        if request.isSpiderAgent:
             return
         
         if mtime_usecs is None:
--- a/MoinMoin/request.py	Thu Feb 16 22:09:47 2006 +0000
+++ b/MoinMoin/request.py	Sat Feb 18 14:21:40 2006 +0000
@@ -7,7 +7,7 @@
     @license: GNU GPL, see COPYING for details.
 """
 
-import os, time, sys, cgi, StringIO
+import os, re, time, sys, cgi, StringIO
 import copy
 from MoinMoin import config, wikiutil, user, caching
 from MoinMoin.util import MoinMoinNoFooter, IsWin9x
@@ -112,6 +112,8 @@
             self.__dict__.update(properties)
             self._load_multi_cfg()
             
+            self.isSpiderAgent = self.check_spider()
+        
             # Set decode charsets.  Input from the user is always in
             # config.charset, which is the page charsets. Except
             # path_info, which may use utf-8, and handled by decodePagename.
@@ -846,20 +848,29 @@
         """ Flush output stream.
         """
         raise NotImplementedError
-        
+
+    def check_spider(self):
+        """ check if the user agent for current request is a spider/bot """
+        isSpider = False
+        spiders = self.cfg.ua_spiders
+        if spiders:
+            ua = self.getUserAgent()
+            if ua:
+                isSpider = re.search(spiders, ua, re.I) is not None
+        return isSpider
+
     def isForbidden(self):
         """ check for web spiders and refuse anything except viewing """
         forbidden = 0
-        # we do not have a parsed query string here
-        # so we can just do simple matching
-        if ((self.query_string != '' or self.request_method != 'GET') and
-            self.query_string != 'action=rss_rc' and not
+        # we do not have a parsed query string here, so we can just do simple matching
+        qs = self.query_string
+        if ((qs != '' or self.request_method != 'GET') and
+            not 'action=rss_rc' in qs and
             # allow spiders to get attachments and do 'show'
-            (self.query_string.find('action=AttachFile') >= 0 and self.query_string.find('do=get') >= 0) and not
-            (self.query_string.find('action=show') >= 0)
+            not ('action=AttachFile' in qs and 'do=get' in qs) and
+            not 'action=show' in qs
             ):
-            from MoinMoin.util import web
-            forbidden = web.isSpiderAgent(self)
+            forbidden = self.isSpiderAgent
 
         if not forbidden and self.cfg.hosts_deny:
             ip = self.remote_addr
--- a/MoinMoin/util/web.py	Thu Feb 16 22:09:47 2006 +0000
+++ b/MoinMoin/util/web.py	Sat Feb 18 14:21:40 2006 +0000
@@ -9,19 +9,6 @@
 import re
 from MoinMoin import config
 
-def isSpiderAgent(request):
-    """ Return True if user agent appears to be a spider.
-    """
-    if not request.cfg.ua_spiders:
-        return 0
-
-    ua = request.getUserAgent()
-    if not ua:
-        return 0
-
-    return re.search(request.cfg.ua_spiders, ua, re.I) is not None
-
-
 def getIntegerInput(request, fieldname, default=None, minval=None, maxval=None):
     """ Get an integer value from a request parameter. If the value
         is out of bounds, it's made to fit into those bounds.