changeset 2584:efac33f4b9e4

1.6 converter: rename module
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Wed, 01 Aug 2007 01:23:54 +0200
parents dd005fd66306
children cb899abca9a8
files MoinMoin/script/migration/1050800.py MoinMoin/script/migration/1059999.py MoinMoin/script/migration/_conv160.py MoinMoin/script/migration/conv160.py
diffstat 4 files changed, 492 insertions(+), 492 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/script/migration/1050800.py	Wed Aug 01 00:46:59 2007 +0200
+++ b/MoinMoin/script/migration/1050800.py	Wed Aug 01 01:23:54 2007 +0200
@@ -6,7 +6,7 @@
     @license: GNU GPL, see COPYING for details.
 """
 
-from conv160 import DataConverter
+from _conv160 import DataConverter
 
 def execute(script, data_dir, rev):
     # the first pass just creates <data_dir>/rename1.txt
--- a/MoinMoin/script/migration/1059999.py	Wed Aug 01 00:46:59 2007 +0200
+++ b/MoinMoin/script/migration/1059999.py	Wed Aug 01 01:23:54 2007 +0200
@@ -8,7 +8,7 @@
 
 import os, shutil
 
-from conv160 import DataConverter
+from _conv160 import DataConverter
 
 def execute(script, data_dir, rev):
     rename_map1 = os.path.join(data_dir, 'rename1.txt')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/script/migration/_conv160.py	Wed Aug 01 01:23:54 2007 +0200
@@ -0,0 +1,490 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - migration from base rev 105xxyy
+
+    What it should do when it is ready:
+
+    a) reverse underscore == blank stuff in pagenames (introducing this was a fault)
+
+                   pagename            quoted pagename
+       -----------------------------------------------------
+       old         MainPage/Sub_Page   MainPage(2f)Sub_Page
+       new         MainPage/Sub Page   MainPage(2f)Sub(20)Page    or
+       new         MainPage/Sub_Page   MainPage(2f)Sub_Page       (user has to decide by editing rename.txt)
+
+
+                   markup
+       ----------------------------------------------------
+       old         MoinMoin:MainPage/Sub_Page    ../Sub_Page2
+       new         MoinMoin:"MainPage/Sub Page"  "../Sub Page2"???? (TODO check if this works)
+
+
+    b) decode url encoded chars in attachment names (and quote the whole fname):
+
+                   markup
+       ----------------------------------------------------
+       old         attachment:file%20with%20blanks.txt
+       new         attachment:"file with blanks.txt"
+
+
+    TODO:
+        * process page content / convert markup
+        * rename pages in user subscribed pages
+        * rename pages in user quicklinks
+
+    DONE:
+        pass 1
+        * creating the rename.txt works
+        pass 2
+        * renaming of pagedirs works
+         * renamed page names in global edit-log
+         * renamed page names in local edit-log
+         * renamed page names in event-log
+        * renaming of attachments works
+         * renamed attachment names in global edit-log
+         * renamed attachment names in local edit-log
+
+    @copyright: 2007 by Thomas Waldmann
+    @license: GNU GPL, see COPYING for details.
+"""
+
+import os.path, sys
+import codecs, urllib, glob
+
+# Insert THIS moin dir first into sys path, or you would run another version of moin!
+sys.path.insert(0, '../../..')
+
+from MoinMoin import config, wikiutil
+from MoinMoin.script.migration.migutil import opj, listdir, copy_file, move_file, copy_dir
+
+import mimetypes # this MUST be after wikiutil import!
+
+def markup_converter(text, renames):
+    """ Convert the <text> content of some Page, using <renames> dict to rename
+        links correctly. Additionally, convert some changed markup.
+    """
+    if "#format wiki" not in text and "#format" in text:
+        return text # this is not a wiki page, leave it as is
+    # TODO convert markup of page
+    return text
+
+
+class EventLog:
+    def __init__(self, fname):
+        self.fname = fname
+        self.data = None
+        self.renames = {}
+
+    def read(self):
+        """ read complete event-log from disk """
+        data = []
+        f = file(self.fname, 'r')
+        for line in f:
+            line = line.replace('\r', '').replace('\n', '')
+            if not line.strip(): # skip empty lines
+                continue
+            fields = line.split('\t')
+            timestamp, action, kvpairs = fields
+            timestamp = int(timestamp)
+            kvpairs = kvpairs.split('&')
+            kvdict = {}
+            for kvpair in kvpairs:
+                key, val = kvpair.split('=')
+                key = urllib.unquote(key).decode('utf-8')
+                val = urllib.unquote(val).decode('utf-8')
+                kvdict[key] = val
+            data.append((timestamp, action, kvdict))
+        self.data = data
+
+    def write(self, fname):
+        """ write complete event-log to disk """
+        f = file(fname, 'w')
+        for timestamp, action, kvdict in self.data:
+            kvlist = []
+            for k, v in kvdict.items():
+                if k == 'pagename' and ('PAGE', v) in self.renames:
+                    v = self.renames[('PAGE', v)]
+                k = urllib.quote(k.encode('utf-8'))
+                v = urllib.quote(v.encode('utf-8'))
+                kvlist.append("%s=%s" % (k, v))
+            fields = str(timestamp), action, '&'.join(kvlist)
+            line = '\t'.join(fields) + '\n'
+            f.write(line)
+        f.close()
+
+    def copy(self, destfname, renames):
+        self.renames = renames
+        self.read()
+        self.write(destfname)
+
+
+class EditLog:
+    def __init__(self, fname):
+        self.fname = fname
+        self.data = None
+        self.renames = {}
+
+    def read(self):
+        """ read complete edit-log from disk """
+        data = {}
+        f = file(self.fname, 'r')
+        for line in f:
+            line = line.replace('\r', '').replace('\n', '')
+            if not line.strip(): # skip empty lines
+                continue
+            fields = line.split('\t') + [''] * 9
+            timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields[:9]
+            timestamp = int(timestamp)
+            rev = int(rev)
+            pagename = wikiutil.unquoteWikiname(pagename)
+            data[(timestamp, rev, pagename)] = (timestamp, rev, action, pagename, ip, hostname, userid, extra, comment)
+        self.data = data
+
+    def write(self, fname):
+        """ write complete edit-log to disk """
+        editlog = self.data.items()
+        editlog.sort()
+        f = file(fname, "w")
+        for key, fields in editlog:
+            timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields
+            if action.startswith('ATT'):
+                try:
+                    fname = urllib.unquote(extra).decode('utf-8')
+                except UnicodeDecodeError:
+                    fname = urllib.unquote(extra).decode('iso-8859-1')
+                if ('FILE', pagename, fname) in self.renames:
+                    fname = self.renames[('FILE', pagename, fname)]
+                extra = urllib.quote(fname.encode('utf-8'))
+            if ('PAGE', pagename) in self.renames:
+                pagename = self.renames[('PAGE', pagename)]
+            timestamp = str(timestamp)
+            rev = '%08d' % rev
+            pagename = wikiutil.quoteWikinameFS(pagename)
+            fields = timestamp, rev, action, pagename, ip, hostname, userid, extra, comment
+            log_str = '\t'.join(fields) + '\n'
+            f.write(log_str)
+        f.close()
+
+    def copy(self, destfname, renames):
+        self.renames = renames
+        self.read()
+        self.write(destfname)
+
+
+class PageRev:
+    """ a single revision of a page
+        TODO: add some magic, that reads data from disk on first access
+              and frees memory after the write() call has written it out
+    """
+    def __init__(self, rev_dir, rev):
+        self.rev_dir = rev_dir
+        self.rev = rev
+
+    def read(self):
+        fname = opj(self.rev_dir, '%08d' % self.rev)
+        f = file(fname, "rb")
+        data = f.read()
+        f.close()
+        data = data.decode(config.charset)
+        return data
+
+    def write(self, data, rev_dir, rev=None):
+        if rev is None:
+            rev = self.rev
+        data = markup_converter(data, self.renames)
+        fname = opj(rev_dir, '%08d' % rev)
+        data = data.encode(config.charset)
+        f = file(fname, "wb")
+        f.write(data)
+        f.close()
+
+    def copy(self, rev_dir, renames):
+        self.renames = renames
+        data = self.read()
+        self.write(data, rev_dir)
+
+
+class Attachment:
+    """ a single attachment """
+    def __init__(self, attach_dir, attfile):
+        self.path = opj(attach_dir, attfile)
+        self.name = attfile.decode('utf-8')
+
+    def copy(self, attach_dir):
+        """ copy attachment file from orig path to new destination """
+        attfile = self.name.encode('utf-8')
+        dest = opj(attach_dir, attfile)
+        copy_file(self.path, dest)
+
+
+class Page:
+    """ represents a page with all related data """
+    def __init__(self, pages_dir, qpagename):
+        self.name = wikiutil.unquoteWikiname(qpagename)
+        self.name_old = self.name # renaming: still original name when self.name has the new name
+        self.page_dir = opj(pages_dir, qpagename)
+        self.current = None # int current
+        self.editlog = None # dict (see read_editlog)
+        self.revlist = None # list of ints (page text revisions)
+        self.revisions = None # dict int: pagerev obj
+        self.attachments = None # dict of unicode fname: full path
+        self.renames = {} # info for renaming pages/attachments
+
+    def read(self):
+        """ read a page, including revisions, log, attachments from disk """
+        page_dir = self.page_dir
+        # read current file
+        current_fname = opj(page_dir, 'current')
+        if os.path.exists(current_fname):
+            current_file = file(current_fname, "r")
+            current_rev = current_file.read()
+            current_file.close()
+            self.current = int(current_rev)
+        # read edit-log
+        editlog_fname = opj(page_dir, 'edit-log')
+        if os.path.exists(editlog_fname):
+            self.editlog = EditLog(editlog_fname)
+        # read page revisions
+        rev_dir = opj(page_dir, 'revisions')
+        if os.path.exists(rev_dir):
+            revlist = listdir(rev_dir)
+            revlist = [int(rev) for rev in revlist]
+            revlist.sort()
+            self.revlist = revlist
+            self.revisions = {}
+            for rev in revlist:
+                self.revisions[rev] = PageRev(rev_dir, rev)
+        # read attachment filenames
+        attach_dir = opj(page_dir, 'attachments')
+        if os.path.exists(attach_dir):
+            self.attachments = {}
+            attlist = listdir(attach_dir)
+            for attfile in attlist:
+                a = Attachment(attach_dir, attfile)
+                self.attachments[a.name] = a
+
+    def write(self, pages_dir):
+        """ write a page, including revisions, log, attachments to disk """
+        if ('PAGE', self.name) in self.renames:
+            name_new = self.renames[('PAGE', self.name)]
+            if name_new != self.name:
+                print "Renaming page %r -> %r" % (self.name, name_new)
+                self.name_old = self.name
+                self.name = name_new
+        qpagename = wikiutil.quoteWikinameFS(self.name)
+        page_dir = opj(pages_dir, qpagename)
+        os.makedirs(page_dir)
+        # write current file
+        if self.current is not None:
+            current_fname = opj(page_dir, 'current')
+            current_file = file(current_fname, "w")
+            current_str = '%08d\n' % self.current
+            current_file.write(current_str)
+            current_file.close()
+        # copy edit-log
+        if self.editlog is not None:
+            editlog_fname = opj(page_dir, 'edit-log')
+            self.editlog.copy(editlog_fname, self.renames)
+        # copy page revisions
+        if self.revisions is not None:
+            rev_dir = opj(page_dir, 'revisions')
+            os.makedirs(rev_dir)
+            for rev in self.revlist:
+                self.revisions[rev].copy(rev_dir, self.renames)
+        # copy attachments
+        if self.attachments is not None:
+            attach_dir = opj(page_dir, 'attachments')
+            os.makedirs(attach_dir)
+            for fn, att in self.attachments.items():
+                # we have to check for renames here because we need the (old) pagename, too:
+                if ('FILE', self.name_old, fn) in self.renames:
+                    fn_new = self.renames[('FILE', self.name_old, fn)]
+                    if fn_new != fn:
+                        print "Renaming file %r %r -> %r" % (self.name_old, fn, fn_new)
+                        att.name = fn_new
+                att.copy(attach_dir)
+
+    def copy(self, pages_dir, renames):
+            self.renames = renames
+            self.read()
+            self.write(pages_dir)
+
+
+class User:
+    """ represents a user with all related data """
+    def __init__(self, users_dir, uid):
+        self.uid = uid
+        self.users_dir = users_dir
+        self.profile = None
+        self.bookmarks = None
+
+    def read(self):
+        """ read profile and bookmarks data from disk """
+        self.profile = {}
+        fname = opj(self.users_dir, self.uid)
+        # read user profile
+        f = file(fname, "r")
+        for line in f:
+            line = line.replace('\r', '').replace('\n', '')
+            if not line.strip() or line.startswith('#'): # skip empty or comment lines
+                continue
+            key, value = line.split('=', 1)
+            self.profile[key] = value
+        f.close()
+        # read bookmarks
+        self.bookmarks = {}
+        fname_pattern = opj(self.users_dir, "%s.*.bookmark" % self.uid)
+        for fname in glob.glob(fname_pattern):
+            f = file(fname, "r")
+            bookmark = f.read()
+            f.close()
+            wiki = fname.replace('.bookmark', '').replace(opj(self.users_dir, self.uid+'.'), '')
+            self.bookmarks[wiki] = int(bookmark)
+        # don't care about trail
+
+    def write(self, users_dir):
+        """ write profile and bookmarks data to disk """
+        fname = opj(users_dir, self.uid)
+        f = file(fname, "w")
+        for key, value in self.profile.items():
+            f.write("%s=%s\n" % (key, value))
+        f.close()
+        # write bookmarks
+        for wiki, bookmark in self.bookmarks.items():
+            fname = opj(users_dir, "%s.%s.bookmark" % (self.uid, wiki))
+            f = file(fname, "w")
+            f.write("%d\n" % bookmark)
+            f.close()
+        # don't care about trail
+
+    def copy(self, users_dir, renames):
+        self.renames = renames
+        self.read()
+        self.write(users_dir)
+
+
+class DataConverter(object):
+    def __init__(self, src_data_dir, dest_data_dir):
+        self.sdata = src_data_dir
+        self.ddata = dest_data_dir
+        self.pages = {}
+        self.users = {}
+        self.renames = {}
+        self.rename_fname1 = opj(self.sdata, 'rename1.txt')
+        self.rename_fname2 = opj(self.sdata, 'rename2.txt')
+
+    def pass1(self):
+        """ First create the rename list - the user has to review/edit it as
+            we can't decide about page/attachment names automatically.
+        """
+        self.read_src()
+        # pages
+        for pn, p in self.pages.items():
+            p.read()
+            if not p.revisions:
+                continue # we don't care for pages with no revisions (trash)
+            if "_" in pn:
+                # log all pagenames with underscores
+                self.renames[('PAGE', pn)] = None
+            if p.attachments is not None:
+                for fn in p.attachments:
+                    try:
+                        fn_str = fn.encode('ascii')
+                        log = False # pure ascii filenames are no problem
+                    except UnicodeEncodeError:
+                        log = True # this file maybe has a strange representation in wiki markup
+                    else:
+                        if ' ' in fn_str or '%' in fn_str: # files with blanks need quoting
+                            log = True
+                    if log:
+                        # log all strange attachment filenames
+                        fn_str = fn.encode('utf-8')
+                        self.renames[('FILE', pn, fn)] = None
+        self.save_renames()
+
+    def save_renames(self):
+        f = codecs.open(self.rename_fname1, 'w', 'utf-8')
+        for k in self.renames:
+            rtype, pn, fn = (k + (None, ))[:3]
+            if rtype == 'PAGE':
+                line = u"%s\t%s\t%s\r\n" % (rtype, pn, pn)
+            elif rtype == 'FILE':
+                line = u"%s\t%s\t%s\t%s\r\n" % (rtype, pn, fn, fn)
+            f.write(line)
+        f.close()
+
+    def load_renames(self):
+        f = codecs.open(self.rename_fname2, 'r', 'utf-8')
+        for line in f:
+            line = line.rstrip()
+            if not line:
+                continue
+            t = line.split(u'\t')
+            rtype, p1, p2, p3 = (t + [None]*3)[:4]
+            if rtype == u'PAGE':
+                self.renames[(str(rtype), p1)] = p2
+            elif rtype == u'FILE':
+                self.renames[(str(rtype), p1, p2)] = p3
+        f.close()
+
+    def pass2(self):
+        """ Second, read the (user edited) rename list and do the renamings everywhere. """
+        self.read_src()
+        self.load_renames()
+        self.write_dest()
+
+    def read_src(self):
+        # create Page objects in memory
+        pages_dir = opj(self.sdata, 'pages')
+        pagelist = listdir(pages_dir)
+        for qpagename in pagelist:
+            p = Page(pages_dir, qpagename)
+            self.pages[p.name] = p
+
+        # create User objects in memory
+        users_dir = opj(self.sdata, 'user')
+        userlist = listdir(users_dir)
+        userlist = [fn for fn in userlist if not fn.endswith(".trail") and not fn.endswith(".bookmark")]
+        for userid in userlist:
+            u = User(users_dir, userid)
+            self.users[u.uid] = u
+
+        # create log objects in memory
+        self.editlog = EditLog(opj(self.sdata, 'edit-log'))
+        self.eventlog = EventLog(opj(self.sdata, 'event-log'))
+
+    def write_dest(self):
+        self.init_dest()
+        # copy pages
+        pages_dir = opj(self.ddata, 'pages')
+        for page in self.pages.values():
+            page.copy(pages_dir, self.renames)
+
+        # copy users
+        users_dir = opj(self.ddata, 'user')
+        for user in self.users.values():
+            user.copy(users_dir, self.renames)
+
+        # copy logs
+        self.editlog.copy(opj(self.ddata, 'edit-log'), self.renames)
+        self.eventlog.copy(opj(self.ddata, 'event-log'), self.renames)
+
+    def init_dest(self):
+        os.makedirs(self.ddata)
+        os.makedirs(opj(self.ddata, 'pages'))
+        os.makedirs(opj(self.ddata, 'user'))
+        copy_dir(opj(self.sdata, 'plugin'), opj(self.ddata, 'plugin'))
+        copy_file(opj(self.sdata, 'intermap.txt'), opj(self.ddata, 'intermap.txt'))
+
+
+if __name__ == '__main__':
+    origdir = 'data'
+    destdir = 'data-new'
+    dc = DataConverter(origdir, destdir)
+    passno = int(sys.argv[1])
+    if passno == 1:
+        dc.pass1()
+    elif passno == 2:
+        dc.pass2()
+
--- a/MoinMoin/script/migration/conv160.py	Wed Aug 01 00:46:59 2007 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,490 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: iso-8859-1 -*-
-"""
-    MoinMoin - migration from base rev 105xxyy
-
-    What it should do when it is ready:
-
-    a) reverse underscore == blank stuff in pagenames (introducing this was a fault)
-
-                   pagename            quoted pagename
-       -----------------------------------------------------
-       old         MainPage/Sub_Page   MainPage(2f)Sub_Page
-       new         MainPage/Sub Page   MainPage(2f)Sub(20)Page    or
-       new         MainPage/Sub_Page   MainPage(2f)Sub_Page       (user has to decide by editing rename.txt)
-
-
-                   markup
-       ----------------------------------------------------
-       old         MoinMoin:MainPage/Sub_Page    ../Sub_Page2
-       new         MoinMoin:"MainPage/Sub Page"  "../Sub Page2"???? (TODO check if this works)
-
-
-    b) decode url encoded chars in attachment names (and quote the whole fname):
-
-                   markup
-       ----------------------------------------------------
-       old         attachment:file%20with%20blanks.txt
-       new         attachment:"file with blanks.txt"
-
-
-    TODO:
-        * process page content / convert markup
-        * rename pages in user subscribed pages
-        * rename pages in user quicklinks
-
-    DONE:
-        pass 1
-        * creating the rename.txt works
-        pass 2
-        * renaming of pagedirs works
-         * renamed page names in global edit-log
-         * renamed page names in local edit-log
-         * renamed page names in event-log
-        * renaming of attachments works
-         * renamed attachment names in global edit-log
-         * renamed attachment names in local edit-log
-
-    @copyright: 2007 by Thomas Waldmann
-    @license: GNU GPL, see COPYING for details.
-"""
-
-import os.path, sys
-import codecs, urllib, glob
-
-# Insert THIS moin dir first into sys path, or you would run another version of moin!
-sys.path.insert(0, '../../..')
-
-from MoinMoin import config, wikiutil
-from MoinMoin.script.migration.migutil import opj, listdir, copy_file, move_file, copy_dir
-
-import mimetypes # this MUST be after wikiutil import!
-
-def markup_converter(text, renames):
-    """ Convert the <text> content of some Page, using <renames> dict to rename
-        links correctly. Additionally, convert some changed markup.
-    """
-    if "#format wiki" not in text and "#format" in text:
-        return text # this is not a wiki page, leave it as is
-    # TODO convert markup of page
-    return text
-
-
-class EventLog:
-    def __init__(self, fname):
-        self.fname = fname
-        self.data = None
-        self.renames = {}
-
-    def read(self):
-        """ read complete event-log from disk """
-        data = []
-        f = file(self.fname, 'r')
-        for line in f:
-            line = line.replace('\r', '').replace('\n', '')
-            if not line.strip(): # skip empty lines
-                continue
-            fields = line.split('\t')
-            timestamp, action, kvpairs = fields
-            timestamp = int(timestamp)
-            kvpairs = kvpairs.split('&')
-            kvdict = {}
-            for kvpair in kvpairs:
-                key, val = kvpair.split('=')
-                key = urllib.unquote(key).decode('utf-8')
-                val = urllib.unquote(val).decode('utf-8')
-                kvdict[key] = val
-            data.append((timestamp, action, kvdict))
-        self.data = data
-
-    def write(self, fname):
-        """ write complete event-log to disk """
-        f = file(fname, 'w')
-        for timestamp, action, kvdict in self.data:
-            kvlist = []
-            for k, v in kvdict.items():
-                if k == 'pagename' and ('PAGE', v) in self.renames:
-                    v = self.renames[('PAGE', v)]
-                k = urllib.quote(k.encode('utf-8'))
-                v = urllib.quote(v.encode('utf-8'))
-                kvlist.append("%s=%s" % (k, v))
-            fields = str(timestamp), action, '&'.join(kvlist)
-            line = '\t'.join(fields) + '\n'
-            f.write(line)
-        f.close()
-
-    def copy(self, destfname, renames):
-        self.renames = renames
-        self.read()
-        self.write(destfname)
-
-
-class EditLog:
-    def __init__(self, fname):
-        self.fname = fname
-        self.data = None
-        self.renames = {}
-
-    def read(self):
-        """ read complete edit-log from disk """
-        data = {}
-        f = file(self.fname, 'r')
-        for line in f:
-            line = line.replace('\r', '').replace('\n', '')
-            if not line.strip(): # skip empty lines
-                continue
-            fields = line.split('\t') + [''] * 9
-            timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields[:9]
-            timestamp = int(timestamp)
-            rev = int(rev)
-            pagename = wikiutil.unquoteWikiname(pagename)
-            data[(timestamp, rev, pagename)] = (timestamp, rev, action, pagename, ip, hostname, userid, extra, comment)
-        self.data = data
-
-    def write(self, fname):
-        """ write complete edit-log to disk """
-        editlog = self.data.items()
-        editlog.sort()
-        f = file(fname, "w")
-        for key, fields in editlog:
-            timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields
-            if action.startswith('ATT'):
-                try:
-                    fname = urllib.unquote(extra).decode('utf-8')
-                except UnicodeDecodeError:
-                    fname = urllib.unquote(extra).decode('iso-8859-1')
-                if ('FILE', pagename, fname) in self.renames:
-                    fname = self.renames[('FILE', pagename, fname)]
-                extra = urllib.quote(fname.encode('utf-8'))
-            if ('PAGE', pagename) in self.renames:
-                pagename = self.renames[('PAGE', pagename)]
-            timestamp = str(timestamp)
-            rev = '%08d' % rev
-            pagename = wikiutil.quoteWikinameFS(pagename)
-            fields = timestamp, rev, action, pagename, ip, hostname, userid, extra, comment
-            log_str = '\t'.join(fields) + '\n'
-            f.write(log_str)
-        f.close()
-
-    def copy(self, destfname, renames):
-        self.renames = renames
-        self.read()
-        self.write(destfname)
-
-
-class PageRev:
-    """ a single revision of a page
-        TODO: add some magic, that reads data from disk on first access
-              and frees memory after the write() call has written it out
-    """
-    def __init__(self, rev_dir, rev):
-        self.rev_dir = rev_dir
-        self.rev = rev
-
-    def read(self):
-        fname = opj(self.rev_dir, '%08d' % self.rev)
-        f = file(fname, "rb")
-        data = f.read()
-        f.close()
-        data = data.decode(config.charset)
-        return data
-
-    def write(self, data, rev_dir, rev=None):
-        if rev is None:
-            rev = self.rev
-        data = markup_converter(data, self.renames)
-        fname = opj(rev_dir, '%08d' % rev)
-        data = data.encode(config.charset)
-        f = file(fname, "wb")
-        f.write(data)
-        f.close()
-
-    def copy(self, rev_dir, renames):
-        self.renames = renames
-        data = self.read()
-        self.write(data, rev_dir)
-
-
-class Attachment:
-    """ a single attachment """
-    def __init__(self, attach_dir, attfile):
-        self.path = opj(attach_dir, attfile)
-        self.name = attfile.decode('utf-8')
-
-    def copy(self, attach_dir):
-        """ copy attachment file from orig path to new destination """
-        attfile = self.name.encode('utf-8')
-        dest = opj(attach_dir, attfile)
-        copy_file(self.path, dest)
-
-
-class Page:
-    """ represents a page with all related data """
-    def __init__(self, pages_dir, qpagename):
-        self.name = wikiutil.unquoteWikiname(qpagename)
-        self.name_old = self.name # renaming: still original name when self.name has the new name
-        self.page_dir = opj(pages_dir, qpagename)
-        self.current = None # int current
-        self.editlog = None # dict (see read_editlog)
-        self.revlist = None # list of ints (page text revisions)
-        self.revisions = None # dict int: pagerev obj
-        self.attachments = None # dict of unicode fname: full path
-        self.renames = {} # info for renaming pages/attachments
-
-    def read(self):
-        """ read a page, including revisions, log, attachments from disk """
-        page_dir = self.page_dir
-        # read current file
-        current_fname = opj(page_dir, 'current')
-        if os.path.exists(current_fname):
-            current_file = file(current_fname, "r")
-            current_rev = current_file.read()
-            current_file.close()
-            self.current = int(current_rev)
-        # read edit-log
-        editlog_fname = opj(page_dir, 'edit-log')
-        if os.path.exists(editlog_fname):
-            self.editlog = EditLog(editlog_fname)
-        # read page revisions
-        rev_dir = opj(page_dir, 'revisions')
-        if os.path.exists(rev_dir):
-            revlist = listdir(rev_dir)
-            revlist = [int(rev) for rev in revlist]
-            revlist.sort()
-            self.revlist = revlist
-            self.revisions = {}
-            for rev in revlist:
-                self.revisions[rev] = PageRev(rev_dir, rev)
-        # read attachment filenames
-        attach_dir = opj(page_dir, 'attachments')
-        if os.path.exists(attach_dir):
-            self.attachments = {}
-            attlist = listdir(attach_dir)
-            for attfile in attlist:
-                a = Attachment(attach_dir, attfile)
-                self.attachments[a.name] = a
-
-    def write(self, pages_dir):
-        """ write a page, including revisions, log, attachments to disk """
-        if ('PAGE', self.name) in self.renames:
-            name_new = self.renames[('PAGE', self.name)]
-            if name_new != self.name:
-                print "Renaming page %r -> %r" % (self.name, name_new)
-                self.name_old = self.name
-                self.name = name_new
-        qpagename = wikiutil.quoteWikinameFS(self.name)
-        page_dir = opj(pages_dir, qpagename)
-        os.makedirs(page_dir)
-        # write current file
-        if self.current is not None:
-            current_fname = opj(page_dir, 'current')
-            current_file = file(current_fname, "w")
-            current_str = '%08d\n' % self.current
-            current_file.write(current_str)
-            current_file.close()
-        # copy edit-log
-        if self.editlog is not None:
-            editlog_fname = opj(page_dir, 'edit-log')
-            self.editlog.copy(editlog_fname, self.renames)
-        # copy page revisions
-        if self.revisions is not None:
-            rev_dir = opj(page_dir, 'revisions')
-            os.makedirs(rev_dir)
-            for rev in self.revlist:
-                self.revisions[rev].copy(rev_dir, self.renames)
-        # copy attachments
-        if self.attachments is not None:
-            attach_dir = opj(page_dir, 'attachments')
-            os.makedirs(attach_dir)
-            for fn, att in self.attachments.items():
-                # we have to check for renames here because we need the (old) pagename, too:
-                if ('FILE', self.name_old, fn) in self.renames:
-                    fn_new = self.renames[('FILE', self.name_old, fn)]
-                    if fn_new != fn:
-                        print "Renaming file %r %r -> %r" % (self.name_old, fn, fn_new)
-                        att.name = fn_new
-                att.copy(attach_dir)
-
-    def copy(self, pages_dir, renames):
-            self.renames = renames
-            self.read()
-            self.write(pages_dir)
-
-
-class User:
-    """ represents a user with all related data """
-    def __init__(self, users_dir, uid):
-        self.uid = uid
-        self.users_dir = users_dir
-        self.profile = None
-        self.bookmarks = None
-
-    def read(self):
-        """ read profile and bookmarks data from disk """
-        self.profile = {}
-        fname = opj(self.users_dir, self.uid)
-        # read user profile
-        f = file(fname, "r")
-        for line in f:
-            line = line.replace('\r', '').replace('\n', '')
-            if not line.strip() or line.startswith('#'): # skip empty or comment lines
-                continue
-            key, value = line.split('=', 1)
-            self.profile[key] = value
-        f.close()
-        # read bookmarks
-        self.bookmarks = {}
-        fname_pattern = opj(self.users_dir, "%s.*.bookmark" % self.uid)
-        for fname in glob.glob(fname_pattern):
-            f = file(fname, "r")
-            bookmark = f.read()
-            f.close()
-            wiki = fname.replace('.bookmark', '').replace(opj(self.users_dir, self.uid+'.'), '')
-            self.bookmarks[wiki] = int(bookmark)
-        # don't care about trail
-
-    def write(self, users_dir):
-        """ write profile and bookmarks data to disk """
-        fname = opj(users_dir, self.uid)
-        f = file(fname, "w")
-        for key, value in self.profile.items():
-            f.write("%s=%s\n" % (key, value))
-        f.close()
-        # write bookmarks
-        for wiki, bookmark in self.bookmarks.items():
-            fname = opj(users_dir, "%s.%s.bookmark" % (self.uid, wiki))
-            f = file(fname, "w")
-            f.write("%d\n" % bookmark)
-            f.close()
-        # don't care about trail
-
-    def copy(self, users_dir, renames):
-        self.renames = renames
-        self.read()
-        self.write(users_dir)
-
-
-class DataConverter(object):
-    def __init__(self, src_data_dir, dest_data_dir):
-        self.sdata = src_data_dir
-        self.ddata = dest_data_dir
-        self.pages = {}
-        self.users = {}
-        self.renames = {}
-        self.rename_fname1 = opj(self.sdata, 'rename1.txt')
-        self.rename_fname2 = opj(self.sdata, 'rename2.txt')
-
-    def pass1(self):
-        """ First create the rename list - the user has to review/edit it as
-            we can't decide about page/attachment names automatically.
-        """
-        self.read_src()
-        # pages
-        for pn, p in self.pages.items():
-            p.read()
-            if not p.revisions:
-                continue # we don't care for pages with no revisions (trash)
-            if "_" in pn:
-                # log all pagenames with underscores
-                self.renames[('PAGE', pn)] = None
-            if p.attachments is not None:
-                for fn in p.attachments:
-                    try:
-                        fn_str = fn.encode('ascii')
-                        log = False # pure ascii filenames are no problem
-                    except UnicodeEncodeError:
-                        log = True # this file maybe has a strange representation in wiki markup
-                    else:
-                        if ' ' in fn_str or '%' in fn_str: # files with blanks need quoting
-                            log = True
-                    if log:
-                        # log all strange attachment filenames
-                        fn_str = fn.encode('utf-8')
-                        self.renames[('FILE', pn, fn)] = None
-        self.save_renames()
-
-    def save_renames(self):
-        f = codecs.open(self.rename_fname1, 'w', 'utf-8')
-        for k in self.renames:
-            rtype, pn, fn = (k + (None, ))[:3]
-            if rtype == 'PAGE':
-                line = u"%s\t%s\t%s\r\n" % (rtype, pn, pn)
-            elif rtype == 'FILE':
-                line = u"%s\t%s\t%s\t%s\r\n" % (rtype, pn, fn, fn)
-            f.write(line)
-        f.close()
-
-    def load_renames(self):
-        f = codecs.open(self.rename_fname2, 'r', 'utf-8')
-        for line in f:
-            line = line.rstrip()
-            if not line:
-                continue
-            t = line.split(u'\t')
-            rtype, p1, p2, p3 = (t + [None]*3)[:4]
-            if rtype == u'PAGE':
-                self.renames[(str(rtype), p1)] = p2
-            elif rtype == u'FILE':
-                self.renames[(str(rtype), p1, p2)] = p3
-        f.close()
-
-    def pass2(self):
-        """ Second, read the (user edited) rename list and do the renamings everywhere. """
-        self.read_src()
-        self.load_renames()
-        self.write_dest()
-
-    def read_src(self):
-        # create Page objects in memory
-        pages_dir = opj(self.sdata, 'pages')
-        pagelist = listdir(pages_dir)
-        for qpagename in pagelist:
-            p = Page(pages_dir, qpagename)
-            self.pages[p.name] = p
-
-        # create User objects in memory
-        users_dir = opj(self.sdata, 'user')
-        userlist = listdir(users_dir)
-        userlist = [fn for fn in userlist if not fn.endswith(".trail") and not fn.endswith(".bookmark")]
-        for userid in userlist:
-            u = User(users_dir, userid)
-            self.users[u.uid] = u
-
-        # create log objects in memory
-        self.editlog = EditLog(opj(self.sdata, 'edit-log'))
-        self.eventlog = EventLog(opj(self.sdata, 'event-log'))
-
-    def write_dest(self):
-        self.init_dest()
-        # copy pages
-        pages_dir = opj(self.ddata, 'pages')
-        for page in self.pages.values():
-            page.copy(pages_dir, self.renames)
-
-        # copy users
-        users_dir = opj(self.ddata, 'user')
-        for user in self.users.values():
-            user.copy(users_dir, self.renames)
-
-        # copy logs
-        self.editlog.copy(opj(self.ddata, 'edit-log'), self.renames)
-        self.eventlog.copy(opj(self.ddata, 'event-log'), self.renames)
-
-    def init_dest(self):
-        os.makedirs(self.ddata)
-        os.makedirs(opj(self.ddata, 'pages'))
-        os.makedirs(opj(self.ddata, 'user'))
-        copy_dir(opj(self.sdata, 'plugin'), opj(self.ddata, 'plugin'))
-        copy_file(opj(self.sdata, 'intermap.txt'), opj(self.ddata, 'intermap.txt'))
-
-
-if __name__ == '__main__':
-    origdir = 'data'
-    destdir = 'data-new'
-    dc = DataConverter(origdir, destdir)
-    passno = int(sys.argv[1])
-    if passno == 1:
-        dc.pass1()
-    elif passno == 2:
-        dc.pass2()
-