MoinMoin/security/antispam.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Tue, 27 Jan 2009 21:17:55 +0100
changeset 4260 89b91bf87dad
parent 3159 915a431b663c
permissions -rw-r--r--
Fixed XSS issue in antispam
     1 # -*- coding: iso-8859-1 -*-
     2 """
     3     This implements a global (and a local) blacklist against wiki spammers.
     4 
     5     @copyright: 2005-2008 MoinMoin:ThomasWaldmann
     6     @license: GNU GPL, see COPYING for details
     7 """
     8 
     9 import re, time, datetime
    10 
    11 from MoinMoin import log
    12 logging = log.getLogger(__name__)
    13 
    14 from MoinMoin.support.python_compatibility import frozenset
    15 from MoinMoin.security import Permissions
    16 from MoinMoin import caching, wikiutil
    17 
    18 # Errors ---------------------------------------------------------------
    19 
    20 class Error(Exception):
    21     """Base class for antispam errors."""
    22 
    23     def __str__(self):
    24         return repr(self)
    25 
    26 class WikirpcError(Error):
    27     """ Raised when we get xmlrpclib.Fault """
    28 
    29     def __init__(self, msg, fault):
    30         """ Init with msg and xmlrpclib.Fault dict """
    31         self.msg = msg
    32         self.fault = fault
    33 
    34     def __str__(self):
    35         """ Format the using description and data from the fault """
    36         return self.msg + ": [%(faultCode)s]  %(faultString)s" % self.fault
    37 
    38 
    39 # Functions ------------------------------------------------------------
    40 
    41 def makelist(text):
    42     """ Split text into lines, strip them, skip # comments """
    43     lines = text.splitlines()
    44     result = []
    45     for line in lines:
    46         line = line.split(' # ', 1)[0] # rest of line comment
    47         line = line.strip()
    48         if line and not line.startswith('#'):
    49             result.append(line)
    50     return result
    51 
    52 
    53 def getblacklist(request, pagename, do_update):
    54     """ Get blacklist, possibly downloading new copy
    55 
    56     @param request: current request (request instance)
    57     @param pagename: bad content page name (unicode)
    58     @rtype: list
    59     @return: list of blacklisted regular expressions
    60     """
    61     from MoinMoin.PageEditor import PageEditor
    62     p = PageEditor(request, pagename, uid_override="Antispam subsystem")
    63     mymtime = wikiutil.version2timestamp(p.mtime_usecs())
    64     if do_update:
    65         tooold = time.time() - 1800
    66         failure = caching.CacheEntry(request, "antispam", "failure", scope='wiki')
    67         fail_time = failure.mtime() # only update if no failure in last hour
    68         if (mymtime < tooold) and (fail_time < tooold):
    69             logging.info("%d *BadContent too old, have to check for an update..." % tooold)
    70             import xmlrpclib
    71             import socket
    72 
    73             timeout = 15 # time out for reaching the master server via xmlrpc
    74             old_timeout = socket.getdefaulttimeout()
    75             socket.setdefaulttimeout(timeout)
    76 
    77             master_url = request.cfg.antispam_master_url
    78             master = xmlrpclib.ServerProxy(master_url)
    79             try:
    80                 # Get BadContent info
    81                 master.putClientInfo('ANTISPAM-CHECK',
    82                                      request.http_host+request.script_name)
    83                 response = master.getPageInfo(pagename)
    84 
    85                 # It seems that response is always a dict
    86                 if isinstance(response, dict) and 'faultCode' in response:
    87                     raise WikirpcError("failed to get BadContent information",
    88                                        response)
    89 
    90                 # Compare date against local BadContent copy
    91                 masterdate = response['lastModified']
    92 
    93                 if isinstance(masterdate, datetime.datetime):
    94                     # for python 2.5
    95                     mydate = datetime.datetime(*tuple(time.gmtime(mymtime))[0:6])
    96                 else:
    97                     # for python <= 2.4.x
    98                     mydate = xmlrpclib.DateTime(tuple(time.gmtime(mymtime)))
    99 
   100                 logging.debug("master: %s mine: %s" % (masterdate, mydate))
   101                 if mydate < masterdate:
   102                     # Get new copy and save
   103                     logging.info("Fetching page from %s..." % master_url)
   104                     master.putClientInfo('ANTISPAM-FETCH', request.http_host + request.script_name)
   105                     response = master.getPage(pagename)
   106                     if isinstance(response, dict) and 'faultCode' in response:
   107                         raise WikirpcError("failed to get BadContent data", response)
   108                     p._write_file(response)
   109                     mymtime = wikiutil.version2timestamp(p.mtime_usecs())
   110                 else:
   111                     failure.update("") # we didn't get a modified version, this avoids
   112                                        # permanent polling for every save when there
   113                                        # is no updated master page
   114 
   115             except (socket.error, xmlrpclib.ProtocolError), err:
   116                 logging.error('Timeout / socket / protocol error when accessing %s: %s' % (master_url, str(err)))
   117                 # update cache to wait before the next try
   118                 failure.update("")
   119 
   120             except (xmlrpclib.Fault, ), err:
   121                 logging.error('Fault on %s: %s' % (master_url, str(err)))
   122                 # update cache to wait before the next try
   123                 failure.update("")
   124 
   125             except Error, err:
   126                 # In case of Error, we log the error and use the local BadContent copy.
   127                 logging.error(str(err))
   128 
   129             # set back socket timeout
   130             socket.setdefaulttimeout(old_timeout)
   131 
   132     blacklist = p.get_raw_body()
   133     return mymtime, makelist(blacklist)
   134 
   135 
   136 class SecurityPolicy(Permissions):
   137     """ Extend the default security policy with antispam feature """
   138 
   139     def save(self, editor, newtext, rev, **kw):
   140         BLACKLISTPAGES = ["BadContent", "LocalBadContent"]
   141         if not editor.page_name in BLACKLISTPAGES:
   142             request = editor.request
   143 
   144             # Start timing of antispam operation
   145             request.clock.start('antispam')
   146 
   147             blacklist = []
   148             latest_mtime = 0
   149             for pn in BLACKLISTPAGES:
   150                 do_update = (pn != "LocalBadContent" and
   151                              request.cfg.interwikiname != 'MoinMaster') # MoinMaster wiki shall not fetch updates from itself
   152                 blacklist_mtime, blacklist_entries = getblacklist(request, pn, do_update)
   153                 blacklist += blacklist_entries
   154                 latest_mtime = max(latest_mtime, blacklist_mtime)
   155 
   156             if blacklist:
   157                 invalid_cache = not getattr(request.cfg.cache, "antispam_blacklist", None)
   158                 if invalid_cache or request.cfg.cache.antispam_blacklist[0] < latest_mtime:
   159                     mmblcache = []
   160                     for blacklist_re in blacklist:
   161                         try:
   162                             mmblcache.append(re.compile(blacklist_re, re.I))
   163                         except re.error, err:
   164                             logging.error("Error in regex '%s': %s. Please check the pages %s." % (
   165                                           blacklist_re,
   166                                           str(err),
   167                                           ', '.join(BLACKLISTPAGES)))
   168                     request.cfg.cache.antispam_blacklist = (latest_mtime, mmblcache)
   169 
   170                 from MoinMoin.Page import Page
   171 
   172                 oldtext = ""
   173                 if rev > 0: # rev is the revision of the old page
   174                     page = Page(request, editor.page_name, rev=rev)
   175                     oldtext = page.get_raw_body()
   176 
   177                 newset = frozenset(newtext.splitlines(1))
   178                 oldset = frozenset(oldtext.splitlines(1))
   179                 difference = newset - oldset
   180                 addedtext = kw.get('comment', u'') + u''.join(difference)
   181 
   182                 for blacklist_re in request.cfg.cache.antispam_blacklist[1]:
   183                     match = blacklist_re.search(addedtext)
   184                     if match:
   185                         # Log error and raise SaveError, PageEditor should handle this.
   186                         _ = editor.request.getText
   187                         msg = _('Sorry, can not save page because "%(content)s" is not allowed in this wiki.') % {
   188                                   'content': wikiutil.escape(match.group())
   189                               }
   190                         logging.info(msg)
   191                         raise editor.SaveError(msg)
   192             request.clock.stop('antispam')
   193 
   194         # No problem to save if my base class agree
   195         return Permissions.save(self, editor, newtext, rev, **kw)
   196