1 # -*- coding: iso-8859-1 -*-
3 This implements a global (and a local) blacklist against wiki spammers.
6 @license: GNU GPL, see COPYING for details
9 import re, time, datetime
11 from MoinMoin import log
12 logging = log.getLogger(__name__)
14 from MoinMoin.support.python_compatibility import frozenset
15 from MoinMoin.security import Permissions
16 from MoinMoin import caching, wikiutil
18 # Errors ---------------------------------------------------------------
20 class Error(Exception):
21 """Base class for antispam errors."""
26 class WikirpcError(Error):
27 """ Raised when we get xmlrpclib.Fault """
29 def __init__(self, msg, fault):
30 """ Init with msg and xmlrpclib.Fault dict """
35 """ Format the using description and data from the fault """
36 return self.msg + ": [%(faultCode)s] %(faultString)s" % self.fault
39 # Functions ------------------------------------------------------------
42 """ Split text into lines, strip them, skip # comments """
43 lines = text.splitlines()
46 line = line.split(' # ', 1)[0] # rest of line comment
48 if line and not line.startswith('#'):
53 def getblacklist(request, pagename, do_update):
54 """ Get blacklist, possibly downloading new copy
56 @param request: current request (request instance)
57 @param pagename: bad content page name (unicode)
59 @return: list of blacklisted regular expressions
61 from MoinMoin.PageEditor import PageEditor
62 p = PageEditor(request, pagename, uid_override="Antispam subsystem")
63 mymtime = wikiutil.version2timestamp(p.mtime_usecs())
65 tooold = time.time() - 1800
66 failure = caching.CacheEntry(request, "antispam", "failure", scope='wiki')
67 fail_time = failure.mtime() # only update if no failure in last hour
68 if (mymtime < tooold) and (fail_time < tooold):
69 logging.info("%d *BadContent too old, have to check for an update..." % tooold)
73 timeout = 15 # time out for reaching the master server via xmlrpc
74 old_timeout = socket.getdefaulttimeout()
75 socket.setdefaulttimeout(timeout)
77 master_url = request.cfg.antispam_master_url
78 master = xmlrpclib.ServerProxy(master_url)
81 master.putClientInfo('ANTISPAM-CHECK',
82 request.http_host+request.script_name)
83 response = master.getPageInfo(pagename)
85 # It seems that response is always a dict
86 if isinstance(response, dict) and 'faultCode' in response:
87 raise WikirpcError("failed to get BadContent information",
90 # Compare date against local BadContent copy
91 masterdate = response['lastModified']
93 if isinstance(masterdate, datetime.datetime):
95 mydate = datetime.datetime(*tuple(time.gmtime(mymtime))[0:6])
98 mydate = xmlrpclib.DateTime(tuple(time.gmtime(mymtime)))
100 logging.debug("master: %s mine: %s" % (masterdate, mydate))
101 if mydate < masterdate:
102 # Get new copy and save
103 logging.info("Fetching page from %s..." % master_url)
104 master.putClientInfo('ANTISPAM-FETCH', request.http_host + request.script_name)
105 response = master.getPage(pagename)
106 if isinstance(response, dict) and 'faultCode' in response:
107 raise WikirpcError("failed to get BadContent data", response)
108 p._write_file(response)
109 mymtime = wikiutil.version2timestamp(p.mtime_usecs())
111 failure.update("") # we didn't get a modified version, this avoids
112 # permanent polling for every save when there
113 # is no updated master page
115 except (socket.error, xmlrpclib.ProtocolError), err:
116 logging.error('Timeout / socket / protocol error when accessing %s: %s' % (master_url, str(err)))
117 # update cache to wait before the next try
120 except (xmlrpclib.Fault, ), err:
121 logging.error('Fault on %s: %s' % (master_url, str(err)))
122 # update cache to wait before the next try
126 # In case of Error, we log the error and use the local BadContent copy.
127 logging.error(str(err))
129 # set back socket timeout
130 socket.setdefaulttimeout(old_timeout)
132 blacklist = p.get_raw_body()
133 return mymtime, makelist(blacklist)
136 class SecurityPolicy(Permissions):
137 """ Extend the default security policy with antispam feature """
139 def save(self, editor, newtext, rev, **kw):
140 BLACKLISTPAGES = ["BadContent", "LocalBadContent"]
141 if not editor.page_name in BLACKLISTPAGES:
142 request = editor.request
144 # Start timing of antispam operation
145 request.clock.start('antispam')
149 for pn in BLACKLISTPAGES:
150 do_update = (pn != "LocalBadContent" and
151 request.cfg.interwikiname != 'MoinMaster') # MoinMaster wiki shall not fetch updates from itself
152 blacklist_mtime, blacklist_entries = getblacklist(request, pn, do_update)
153 blacklist += blacklist_entries
154 latest_mtime = max(latest_mtime, blacklist_mtime)
157 invalid_cache = not getattr(request.cfg.cache, "antispam_blacklist", None)
158 if invalid_cache or request.cfg.cache.antispam_blacklist[0] < latest_mtime:
160 for blacklist_re in blacklist:
162 mmblcache.append(re.compile(blacklist_re, re.I))
163 except re.error, err:
164 logging.error("Error in regex '%s': %s. Please check the pages %s." % (
167 ', '.join(BLACKLISTPAGES)))
168 request.cfg.cache.antispam_blacklist = (latest_mtime, mmblcache)
170 from MoinMoin.Page import Page
173 if rev > 0: # rev is the revision of the old page
174 page = Page(request, editor.page_name, rev=rev)
175 oldtext = page.get_raw_body()
177 newset = frozenset(newtext.splitlines(1))
178 oldset = frozenset(oldtext.splitlines(1))
179 difference = newset - oldset
180 addedtext = kw.get('comment', u'') + u''.join(difference)
182 for blacklist_re in request.cfg.cache.antispam_blacklist[1]:
183 match = blacklist_re.search(addedtext)
185 # Log error and raise SaveError, PageEditor should handle this.
186 _ = editor.request.getText
187 msg = _('Sorry, can not save page because "%(content)s" is not allowed in this wiki.') % {
188 'content': wikiutil.escape(match.group())
191 raise editor.SaveError(msg)
192 request.clock.stop('antispam')
194 # No problem to save if my base class agree
195 return Permissions.save(self, editor, newtext, rev, **kw)