changeset 5977:f662e5f7ca82

cfg.log_events_format to configure event-log format, add a tool to export event-log to CSV
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Fri, 22 Mar 2013 01:59:02 +0100
parents 27e9d4805e74
children 810aee12a186
files MoinMoin/config/multiconfig.py MoinMoin/logfile/eventlog.py MoinMoin/script/export/eventlog.py docs/CHANGES
diffstat 4 files changed, 92 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/config/multiconfig.py	Sun Mar 17 20:09:15 2013 +0100
+++ b/MoinMoin/config/multiconfig.py	Fri Mar 22 01:59:02 2013 +0100
@@ -1112,6 +1112,8 @@
      "if True, do a reverse DNS lookup on page SAVE. If your DNS is broken, set this to False to speed up SAVE."),
     ('log_timing', False,
      "if True, add timing infos to the log output to analyse load conditions"),
+    ('log_events_format', 1,
+     "0 = no events logging, 1 = standard format (like <= 1.9.7) [default], 2 = extended format"),
 
     # some dangerous mimetypes (we don't use "content-disposition: inline" for them when a user
     # downloads such attachments, because the browser might execute e.g. Javascript contained
--- a/MoinMoin/logfile/eventlog.py	Sun Mar 17 20:09:15 2013 +0100
+++ b/MoinMoin/logfile/eventlog.py	Fri Mar 22 01:59:02 2013 +0100
@@ -29,7 +29,9 @@
         """ Write an event of type `eventtype, with optional key/value
             pairs appended (i.e. you have to pass a dict).
         """
-        if request.isSpiderAgent:
+        cfg = request.cfg
+        if cfg.log_events_format == 0 or request.isSpiderAgent:
+            # no event logging enabled or user agent is a bot / spider
             return
 
         if mtime_usecs is None:
@@ -37,7 +39,7 @@
 
         if values is None:
             values = {}
-        if request.cfg.log_remote_addr and add_http_info:
+        if cfg.log_remote_addr and add_http_info:
             # if cfg.log_remote_addr is False (usually for privacy reasons),
             # we likely do not want to log user agent and http referer either.
             for key in ['remote_addr', 'http_user_agent', 'http_referer']:
@@ -45,6 +47,12 @@
                 if value:
                     # Save those http headers in UPPERcase
                     values[key.upper()] = value
+
+        if cfg.log_events_format == 2:
+            values['username'] = request.user.name
+            values['wikiname'] = cfg.interwikiname
+            values['url'] = request.url
+
         # Encode values in a query string TODO: use more readable format
         values = wikiutil.makeQueryString(values)
         self._add(u"%d\t%s\t%s\n" % (mtime_usecs, eventtype, values))
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/script/export/eventlog.py	Fri Mar 22 01:59:02 2013 +0100
@@ -0,0 +1,65 @@
+# -*- coding: iso-8859-1 -*-
+"""
+MoinMoin - Dump event-log to CSV
+
+@copyright: 2013 MoinMoin:ThomasWaldmann
+@license: GNU GPL, see COPYING for details.
+"""
+
+import sys
+import csv
+
+from MoinMoin import script
+from MoinMoin.logfile.eventlog import EventLog
+
+
+class PluginScript(script.MoinScript):
+    """\
+Purpose:
+========
+This tool allows you to dump a MoinMoin wiki event-log to CSV.
+
+Detailed Instructions:
+======================
+General syntax: moin [options] export eventlog [eventlog-options]
+
+[options] usually should be:
+    --config-dir=/path/to/my/cfg/ --wiki-url=http://wiki.example.org/
+
+[eventlog-options] see below:
+    To write into a file (default: stdout):
+    --file=filename.csv
+"""
+
+    def __init__(self, argv=None, def_values=None):
+        script.MoinScript.__init__(self, argv, def_values)
+        self.parser.add_option(
+            "-f", "--file", dest="csv_fname",
+            help="CSV output filename [default: stdout]"
+        )
+
+    def mainloop(self):
+        self.init_request()
+        request = self.request
+
+        if self.options.csv_fname:
+            csv_file = open(self.options.csv_fname, "w")
+        else:
+            csv_file = sys.stdout
+
+        columns = ['time', 'event', 'username', 'ip', 'wikiname', 'pagename', 'url', 'referrer', 'ua', ]
+        csv_out = csv.DictWriter(csv_file, columns, restval='', extrasaction='ignore')
+        for time, event, kv in EventLog(request):
+            kv = kv.to_dict()  # convert from MultiDict to dict
+            # convert usecs to secs
+            time = time / 1000000.0
+            # change some key names to simpler ones:
+            ip = kv.pop('REMOTE_ADDR', '')
+            ua = kv.pop('HTTP_USER_AGENT', '')
+            referrer = kv.pop('HTTP_REFERER', '')
+            kv.update(dict(time=unicode(time), event=event, ip=ip, referrer=referrer, ua=ua))
+            # csv can't handle unicode, encode to utf-8:
+            kv = dict([(k, v.encode('utf-8')) for k, v in kv.iteritems()])
+            csv_out.writerow(kv)
+        csv_file.close()
+
--- a/docs/CHANGES	Sun Mar 17 20:09:15 2013 +0100
+++ b/docs/CHANGES	Fri Mar 22 01:59:02 2013 +0100
@@ -16,7 +16,7 @@
     editor_force = True
     editor_default = 'text'  # internal default, just for completeness
 
-Version 1.9.7:
+Version 1.9.current:
   SECURITY HINT: make sure you have allow_xslt = False (or just do not use
   allow_xslt at all in your wiki configs, False is the internal default).
   Allowing XSLT/4suite is very dangerous, see HelpOnConfiguration wiki page.
@@ -24,6 +24,20 @@
   HINT: Python >= 2.5 is maybe required! See docs/REQUIREMENTS for details.
 
   New features:
+  * cfg.log_events_format can be used to configure the format of the records
+    written to <data_dir>/event-log:
+    0 = dot not create event-log entries (saves disk space, disk I/O)
+    1 = standard (like in moin <= 1.9.7) [default]
+    2 = extended (add infos about username, wikiname, url)
+  * add a tool to output the contents of the event-log to CSV:
+    moin export eventlog --file=output.csv
+    Output encoding is utf-8, columns are in this order:
+    time, event, username, ip, wikiname, pagename, url, referrer, ua
+    time: UNIX timestamp (float)
+
+
+Version 1.9.7:
+  New features:
   * passlib support - enhanced password hash security. Special thanks go to
     the Python Software Foundation (PSF) for sponsoring development of this!