changeset 2207:7ae581d79352

updated bot useragents list, reduce bot cpu usage of some macros (ported from 1.5 repo)
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Mon, 14 May 2007 22:03:21 +0200
parents b9fa45f5c47f
children 2f7f195f4dd2
files MoinMoin/config/multiconfig.py MoinMoin/macro/OrphanedPages.py MoinMoin/macro/PageHits.py MoinMoin/macro/PageSize.py MoinMoin/macro/SystemInfo.py MoinMoin/macro/WantedPages.py MoinMoin/macro/__init__.py
diffstat 7 files changed, 16 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/config/multiconfig.py	Mon May 14 21:50:37 2007 +0200
+++ b/MoinMoin/config/multiconfig.py	Mon May 14 22:03:21 2007 +0200
@@ -481,7 +481,7 @@
     # a regex of HTTP_USER_AGENTS that should be excluded from logging
     # and receive a FORBIDDEN for anything except viewing a page
     ua_spiders = ('archiver|cfetch|crawler|curl|gigabot|googlebot|holmes|htdig|httrack|httpunit|jeeves|larbin|leech|'
-                  'linkbot|linkmap|linkwalk|mercator|mirror|msnbot|neomo|nutbot|omniexplorer|puf|robot|scooter|seekbot|'
+                  'linkbot|linkmap|linkwalk|mercator|mirror|msnbot|msrbot|neomo|nutbot|omniexplorer|puf|robot|scooter|seekbot|'
                   'sherlock|slurp|sitecheck|spider|teleport|voyager|webreaper|wget')
 
     # Wiki identity
--- a/MoinMoin/macro/OrphanedPages.py	Mon May 14 21:50:37 2007 +0200
+++ b/MoinMoin/macro/OrphanedPages.py	Mon May 14 22:03:21 2007 +0200
@@ -13,6 +13,8 @@
 
     if macro.request.mode_getpagelinks: # prevent recursion
         return ''
+    if macro.request.isSpiderAgent: # reduce bot cpu usage
+        return ''
 
     # delete all linked pages from a dict of all pages
     pages = macro.request.rootpage.getPageDict()
--- a/MoinMoin/macro/PageHits.py	Mon May 14 21:50:37 2007 +0200
+++ b/MoinMoin/macro/PageHits.py	Mon May 14 22:03:21 2007 +0200
@@ -21,6 +21,8 @@
 
     def execute(self):
         """ Execute the macro and return output """
+        if self.request.isSpiderAgent: # reduce bot cpu usage
+            return ''
         cacheDate, hits = self.cachedHits()
         self.addHitsFromLog(hits, cacheDate)
         self.filterReadableHits(hits)
--- a/MoinMoin/macro/PageSize.py	Mon May 14 21:50:37 2007 +0200
+++ b/MoinMoin/macro/PageSize.py	Mon May 14 22:03:21 2007 +0200
@@ -9,6 +9,9 @@
 Dependencies = ["pages"]
 
 def execute(macro, args):
+    if macro.request.isSpiderAgent: # reduce bot cpu usage
+        return ''
+
     # get list of pages and their objects
     pages = macro.request.rootpage.getPageDict()
 
--- a/MoinMoin/macro/SystemInfo.py	Mon May 14 21:50:37 2007 +0200
+++ b/MoinMoin/macro/SystemInfo.py	Mon May 14 22:03:21 2007 +0200
@@ -164,4 +164,7 @@
         return buf.getvalue()
 
 def execute(macro, args):
-        return SystemInfo(macro, args).render()
+    if self.request.isSpiderAgent: # reduce bot cpu usage
+        return ''
+    return SystemInfo(macro, args).render()
+
--- a/MoinMoin/macro/WantedPages.py	Mon May 14 21:50:37 2007 +0200
+++ b/MoinMoin/macro/WantedPages.py	Mon May 14 22:03:21 2007 +0200
@@ -17,6 +17,8 @@
     # prevent recursion
     if request.mode_getpagelinks:
         return ''
+    if request.isSpiderAgent: # reduce bot cpu usage
+        return ''
 
     # Get allpages switch from the form
     allpages = int(request.form.get('allpages', [0])[0]) != 0
--- a/MoinMoin/macro/__init__.py	Mon May 14 21:50:37 2007 +0200
+++ b/MoinMoin/macro/__init__.py	Mon May 14 22:03:21 2007 +0200
@@ -252,6 +252,8 @@
         return self._make_index(args)
 
     def _macro_WordIndex(self, args):
+        if self.request.isSpiderAgent: # reduce bot cpu usage
+            return ''
         word_re = u'[%s][%s]+' % (config.chars_upper, config.chars_lower)
         return self._make_index(args, word_re=word_re)