MoinMoin/script/migration/_conv160.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Wed, 11 Feb 2009 02:34:33 +0100
changeset 4569 3caaa8c74c41
parent 3175 2a3a6cb34e45
child 5152 cf16b1116dde
permissions -rw-r--r--
wikiutil: replace moin's cgi/urllib wrappers by calls to werkzeug.utils code
tw@2583
     1
# -*- coding: iso-8859-1 -*-
tw@2583
     2
"""
tw@2730
     3
    MoinMoin - migration from 1.5.8 to 1.6.0 (creole link style)
tw@2583
     4
tw@2805
     5
    What it does:
tw@2583
     6
tw@2583
     7
    a) reverse underscore == blank stuff in pagenames (introducing this was a fault)
tw@2583
     8
tw@2583
     9
                   pagename            quoted pagename
tw@2583
    10
       -----------------------------------------------------
tw@2583
    11
       old         MainPage/Sub_Page   MainPage(2f)Sub_Page
tw@2583
    12
       new         MainPage/Sub Page   MainPage(2f)Sub(20)Page    or
tw@2586
    13
       new         MainPage/Sub_Page   MainPage(2f)Sub_Page       (user has to decide by editing rename1.txt)
tw@2583
    14
tw@2583
    15
tw@2583
    16
                   markup
tw@2583
    17
       ----------------------------------------------------
tw@2753
    18
       old         MoinMoin:MainPage/Sub_Page      ../Sub_Page2
tw@2753
    19
       new         [[MoinMoin:MainPage/Sub Page]]  [[../Sub Page2]]
tw@2583
    20
tw@2583
    21
tw@2583
    22
    b) decode url encoded chars in attachment names (and quote the whole fname):
tw@2583
    23
tw@2583
    24
                   markup
tw@2583
    25
       ----------------------------------------------------
tw@2583
    26
       old         attachment:file%20with%20blanks.txt
tw@2753
    27
       new         [[attachment:file with blanks.txt]]
tw@2583
    28
tw@2589
    29
    c) users: move bookmarks from separate files into user profile
tw@2589
    30
    d) users: generate new name[] for lists and name{} for dicts
tw@2583
    31
tw@2805
    32
    e) kill all */MoinEditorBackup pages (replaced by drafts functionality)
tw@2583
    33
tw@2583
    34
    @copyright: 2007 by Thomas Waldmann
tw@2583
    35
    @license: GNU GPL, see COPYING for details.
tw@2583
    36
"""
tw@2583
    37
tw@2599
    38
import os.path
tw@2751
    39
import re
tw@2758
    40
import time
tw@2583
    41
import codecs, urllib, glob
tw@2583
    42
tw@2583
    43
from MoinMoin import config, wikiutil
tw@2583
    44
from MoinMoin.script.migration.migutil import opj, listdir, copy_file, move_file, copy_dir
tw@2583
    45
tw@2583
    46
import mimetypes # this MUST be after wikiutil import!
tw@2583
    47
tw@2599
    48
from _conv160_wiki import convert_wiki
tw@2599
    49
tw@2758
    50
create_rev = True # create a <new> rev with the converted content of <new-1> rev?
tw@2758
    51
tw@2604
    52
def markup_converter(request, pagename, text, renames):
tw@2602
    53
    """ Convert the <text> content of page <pagename>, using <renames> dict
tw@2602
    54
        to rename links correctly. Additionally, convert some changed markup.
tw@2583
    55
    """
tw@3018
    56
    if text.startswith('<?xml'):
tw@3018
    57
        # would be done with xslt processor
tw@3018
    58
        return text
tw@3018
    59
tw@3018
    60
    pis, body = wikiutil.get_processing_instructions(text)
tw@3018
    61
    for pi, val in pis:
tw@3018
    62
        if pi == 'format' and val != 'wiki':
tw@3018
    63
            # not wiki page
tw@3018
    64
            return text
tw@2599
    65
tw@2604
    66
    text = convert_wiki(request, pagename, text, renames)
tw@2583
    67
    return text
tw@2583
    68
tw@2583
    69
tw@2583
    70
class EventLog:
tw@2590
    71
    def __init__(self, request, fname):
tw@2590
    72
        self.request = request
tw@2583
    73
        self.fname = fname
tw@2583
    74
        self.data = None
tw@2583
    75
        self.renames = {}
tw@2583
    76
tw@2583
    77
    def read(self):
tw@2583
    78
        """ read complete event-log from disk """
tw@2583
    79
        data = []
tw@2751
    80
        try:
tw@3119
    81
            lineno = 0
tw@2751
    82
            f = file(self.fname, 'r')
tw@2751
    83
            for line in f:
tw@3119
    84
                lineno += 1
tw@2751
    85
                line = line.replace('\r', '').replace('\n', '')
tw@2751
    86
                if not line.strip(): # skip empty lines
tw@2751
    87
                    continue
tw@2751
    88
                fields = line.split('\t')
tw@3119
    89
                try:
tw@3119
    90
                    timestamp, action, kvpairs = fields[:3]
tw@3119
    91
                    timestamp = int(timestamp)
tw@3119
    92
                    kvdict = wikiutil.parseQueryString(kvpairs)
tw@3119
    93
                    data.append((timestamp, action, kvdict))
tw@3119
    94
                except ValueError, err:
tw@3119
    95
                    # corrupt event log line, log error and skip it
tw@3119
    96
                    print "Error: invalid event log (%s) line %d, err: %s, SKIPPING THIS LINE!" % (self.fname, lineno, str(err))
tw@2751
    97
            f.close()
tw@2751
    98
        except IOError, err:
tw@2751
    99
            # no event-log
tw@2751
   100
            pass
tw@2583
   101
        self.data = data
tw@2583
   102
tw@2583
   103
    def write(self, fname):
tw@2583
   104
        """ write complete event-log to disk """
tw@2751
   105
        if self.data:
tw@2751
   106
            f = file(fname, 'w')
tw@2751
   107
            for timestamp, action, kvdict in self.data:
tw@2751
   108
                pagename = kvdict.get('pagename')
tw@2751
   109
                if pagename and ('PAGE', pagename) in self.renames:
tw@2751
   110
                    kvdict['pagename'] = self.renames[('PAGE', pagename)]
tw@4569
   111
                kvpairs = wikiutil.makeQueryString(kvdict)
tw@2751
   112
                fields = str(timestamp), action, kvpairs
tw@2751
   113
                line = '\t'.join(fields) + '\n'
tw@2751
   114
                f.write(line)
tw@2751
   115
            f.close()
tw@2583
   116
tw@2583
   117
    def copy(self, destfname, renames):
tw@2583
   118
        self.renames = renames
tw@2583
   119
        self.read()
tw@2583
   120
        self.write(destfname)
tw@2583
   121
tw@2583
   122
tw@2583
   123
class EditLog:
tw@2590
   124
    def __init__(self, request, fname):
tw@2590
   125
        self.request = request
tw@2583
   126
        self.fname = fname
tw@2583
   127
        self.data = None
tw@2583
   128
        self.renames = {}
tw@2583
   129
tw@2583
   130
    def read(self):
tw@2583
   131
        """ read complete edit-log from disk """
tw@2583
   132
        data = {}
tw@2751
   133
        try:
tw@2751
   134
            f = file(self.fname, 'r')
tw@2751
   135
            for line in f:
tw@2751
   136
                line = line.replace('\r', '').replace('\n', '')
tw@2751
   137
                if not line.strip(): # skip empty lines
tw@2751
   138
                    continue
tw@2751
   139
                fields = line.split('\t') + [''] * 9
tw@2751
   140
                timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields[:9]
tw@2751
   141
                timestamp = int(timestamp)
tw@2751
   142
                rev = int(rev)
tw@2751
   143
                pagename = wikiutil.unquoteWikiname(pagename)
tw@2751
   144
                data[(timestamp, rev, pagename)] = (timestamp, rev, action, pagename, ip, hostname, userid, extra, comment)
tw@2751
   145
            f.close()
tw@2751
   146
        except IOError, err:
tw@2751
   147
            # no edit-log
tw@2751
   148
            pass
tw@2583
   149
        self.data = data
tw@2583
   150
tw@2758
   151
    def write(self, fname, deleted=False):
tw@2583
   152
        """ write complete edit-log to disk """
tw@2751
   153
        if self.data:
tw@2751
   154
            editlog = self.data.items()
tw@2751
   155
            editlog.sort()
tw@2751
   156
            f = file(fname, "w")
tw@3018
   157
            max_rev = 0
tw@2751
   158
            for key, fields in editlog:
tw@2751
   159
                timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields
tw@2751
   160
                if action.startswith('ATT'):
tw@2751
   161
                    try:
tw@2751
   162
                        fname = urllib.unquote(extra).decode('utf-8')
tw@2751
   163
                    except UnicodeDecodeError:
tw@2751
   164
                        fname = urllib.unquote(extra).decode('iso-8859-1')
tw@2751
   165
                    if ('FILE', pagename, fname) in self.renames:
tw@2751
   166
                        fname = self.renames[('FILE', pagename, fname)]
tw@2751
   167
                    extra = urllib.quote(fname.encode('utf-8'))
tw@2751
   168
                if ('PAGE', pagename) in self.renames:
tw@2751
   169
                    pagename = self.renames[('PAGE', pagename)]
tw@2751
   170
                timestamp = str(timestamp)
tw@3018
   171
                if rev != 99999999:
tw@3018
   172
                    max_rev = max(rev, max_rev)
tw@2758
   173
                revstr = '%08d' % rev
tw@2751
   174
                pagename = wikiutil.quoteWikinameFS(pagename)
tw@2758
   175
                fields = timestamp, revstr, action, pagename, ip, hostname, userid, extra, comment
tw@2758
   176
                log_str = '\t'.join(fields) + '\n'
tw@2758
   177
                f.write(log_str)
tw@2758
   178
            if create_rev and not deleted:
tw@2758
   179
                timestamp = str(wikiutil.timestamp2version(time.time()))
tw@3018
   180
                revstr = '%08d' % (max_rev + 1)
tw@2758
   181
                action = 'SAVE'
tw@2758
   182
                ip = '127.0.0.1'
tw@2758
   183
                hostname = 'localhost'
tw@2758
   184
                userid = ''
tw@2758
   185
                extra = ''
tw@2758
   186
                comment = "converted to 1.6 markup"
tw@2758
   187
                fields = timestamp, revstr, action, pagename, ip, hostname, userid, extra, comment
tw@2751
   188
                log_str = '\t'.join(fields) + '\n'
tw@2751
   189
                f.write(log_str)
tw@2751
   190
            f.close()
tw@2583
   191
tw@2758
   192
    def copy(self, destfname, renames, deleted=False):
tw@2583
   193
        self.renames = renames
tw@2583
   194
        self.read()
tw@2758
   195
        self.write(destfname, deleted)
tw@2583
   196
tw@2583
   197
tw@2583
   198
class PageRev:
tw@2592
   199
    """ a single revision of a page """
tw@2602
   200
    def __init__(self, request, pagename, rev_dir, rev):
tw@2590
   201
        self.request = request
tw@2602
   202
        self.pagename = pagename
tw@2583
   203
        self.rev_dir = rev_dir
tw@2583
   204
        self.rev = rev
tw@2583
   205
tw@2583
   206
    def read(self):
tw@2583
   207
        fname = opj(self.rev_dir, '%08d' % self.rev)
tw@2583
   208
        f = file(fname, "rb")
tw@2583
   209
        data = f.read()
tw@2583
   210
        f.close()
tw@2583
   211
        data = data.decode(config.charset)
tw@2583
   212
        return data
tw@2583
   213
tw@2758
   214
    def write(self, data, rev_dir, convert, rev=None):
tw@2758
   215
        if rev is None:
tw@2758
   216
            rev = self.rev
tw@2610
   217
        if convert:
tw@2610
   218
            data = markup_converter(self.request, self.pagename, data, self.renames)
tw@2583
   219
        fname = opj(rev_dir, '%08d' % rev)
tw@2583
   220
        data = data.encode(config.charset)
tw@2583
   221
        f = file(fname, "wb")
tw@2583
   222
        f.write(data)
tw@2583
   223
        f.close()
tw@2583
   224
tw@2758
   225
    def copy(self, rev_dir, renames, convert=False, new_rev=None):
tw@2583
   226
        self.renames = renames
tw@2583
   227
        data = self.read()
tw@2758
   228
        self.write(data, rev_dir, convert, new_rev)
tw@2583
   229
tw@2583
   230
tw@2583
   231
class Attachment:
tw@2583
   232
    """ a single attachment """
tw@2590
   233
    def __init__(self, request, attach_dir, attfile):
tw@2590
   234
        self.request = request
tw@2583
   235
        self.path = opj(attach_dir, attfile)
tw@3119
   236
        self.name = attfile.decode('utf-8', 'replace')
tw@2583
   237
tw@2583
   238
    def copy(self, attach_dir):
tw@2583
   239
        """ copy attachment file from orig path to new destination """
tw@2583
   240
        attfile = self.name.encode('utf-8')
tw@2583
   241
        dest = opj(attach_dir, attfile)
tw@2583
   242
        copy_file(self.path, dest)
tw@2583
   243
tw@2583
   244
tw@2583
   245
class Page:
tw@2583
   246
    """ represents a page with all related data """
tw@2590
   247
    def __init__(self, request, pages_dir, qpagename):
tw@2590
   248
        self.request = request
tw@2583
   249
        self.name = wikiutil.unquoteWikiname(qpagename)
tw@2583
   250
        self.name_old = self.name # renaming: still original name when self.name has the new name
tw@2583
   251
        self.page_dir = opj(pages_dir, qpagename)
tw@2583
   252
        self.current = None # int current
tw@2583
   253
        self.editlog = None # dict (see read_editlog)
tw@2583
   254
        self.revlist = None # list of ints (page text revisions)
tw@2583
   255
        self.revisions = None # dict int: pagerev obj
tw@2583
   256
        self.attachments = None # dict of unicode fname: full path
tw@2583
   257
        self.renames = {} # info for renaming pages/attachments
tw@2583
   258
tw@2583
   259
    def read(self):
tw@2583
   260
        """ read a page, including revisions, log, attachments from disk """
tw@2583
   261
        page_dir = self.page_dir
tw@2583
   262
        # read current file
tw@2583
   263
        current_fname = opj(page_dir, 'current')
tw@2583
   264
        if os.path.exists(current_fname):
tw@2583
   265
            current_file = file(current_fname, "r")
tw@2583
   266
            current_rev = current_file.read()
tw@2583
   267
            current_file.close()
tw@3018
   268
            try:
tw@3018
   269
                self.current = int(current_rev)
tw@3018
   270
            except ValueError:
tw@3018
   271
                print "Error: invalid current file %s, SKIPPING THIS PAGE!" % current_fname
tw@3018
   272
                return
tw@2583
   273
        # read edit-log
tw@2583
   274
        editlog_fname = opj(page_dir, 'edit-log')
tw@2583
   275
        if os.path.exists(editlog_fname):
tw@2590
   276
            self.editlog = EditLog(self.request, editlog_fname)
tw@2583
   277
        # read page revisions
tw@2583
   278
        rev_dir = opj(page_dir, 'revisions')
tw@2583
   279
        if os.path.exists(rev_dir):
tw@2583
   280
            revlist = listdir(rev_dir)
tw@2583
   281
            revlist = [int(rev) for rev in revlist]
tw@2583
   282
            revlist.sort()
tw@2583
   283
            self.revlist = revlist
tw@2583
   284
            self.revisions = {}
tw@2583
   285
            for rev in revlist:
tw@2602
   286
                self.revisions[rev] = PageRev(self.request, self.name_old, rev_dir, rev)
tw@2758
   287
        # set deleted status
tw@2758
   288
        self.is_deleted = not self.revisions or self.current not in self.revisions
tw@2583
   289
        # read attachment filenames
tw@2583
   290
        attach_dir = opj(page_dir, 'attachments')
tw@2583
   291
        if os.path.exists(attach_dir):
tw@2583
   292
            self.attachments = {}
tw@2583
   293
            attlist = listdir(attach_dir)
tw@2583
   294
            for attfile in attlist:
tw@2590
   295
                a = Attachment(self.request, attach_dir, attfile)
tw@2583
   296
                self.attachments[a.name] = a
tw@2583
   297
tw@2583
   298
    def write(self, pages_dir):
tw@2583
   299
        """ write a page, including revisions, log, attachments to disk """
tw@2583
   300
        if ('PAGE', self.name) in self.renames:
tw@2583
   301
            name_new = self.renames[('PAGE', self.name)]
tw@2583
   302
            if name_new != self.name:
tw@2583
   303
                print "Renaming page %r -> %r" % (self.name, name_new)
tw@2583
   304
                self.name_old = self.name
tw@2583
   305
                self.name = name_new
tw@2583
   306
        qpagename = wikiutil.quoteWikinameFS(self.name)
tw@2583
   307
        page_dir = opj(pages_dir, qpagename)
tw@2583
   308
        os.makedirs(page_dir)
tw@2583
   309
        # write current file
tw@2758
   310
        current = self.current
tw@2758
   311
        if current is not None:
tw@2758
   312
            if create_rev and not self.is_deleted:
tw@2758
   313
                current += 1
tw@2583
   314
            current_fname = opj(page_dir, 'current')
tw@2583
   315
            current_file = file(current_fname, "w")
tw@2758
   316
            current_str = '%08d\n' % current
tw@2583
   317
            current_file.write(current_str)
tw@2583
   318
            current_file.close()
tw@2583
   319
        # copy edit-log
tw@2583
   320
        if self.editlog is not None:
tw@2583
   321
            editlog_fname = opj(page_dir, 'edit-log')
tw@2758
   322
            self.editlog.copy(editlog_fname, self.renames, deleted=self.is_deleted)
tw@2583
   323
        # copy page revisions
tw@2583
   324
        if self.revisions is not None:
tw@2583
   325
            rev_dir = opj(page_dir, 'revisions')
tw@2583
   326
            os.makedirs(rev_dir)
tw@2583
   327
            for rev in self.revlist:
tw@2758
   328
                if create_rev:
tw@2758
   329
                    self.revisions[rev].copy(rev_dir, self.renames)
tw@2610
   330
                else:
tw@2758
   331
                    if int(rev) == self.current:
tw@2758
   332
                        self.revisions[rev].copy(rev_dir, self.renames, convert=True)
tw@2758
   333
                    else:
tw@2758
   334
                        self.revisions[rev].copy(rev_dir, self.renames)
tw@2758
   335
            if create_rev and not self.is_deleted:
tw@2758
   336
                self.revisions[rev].copy(rev_dir, self.renames, convert=True, new_rev=rev+1)
tw@2758
   337
tw@2583
   338
        # copy attachments
tw@2583
   339
        if self.attachments is not None:
tw@2583
   340
            attach_dir = opj(page_dir, 'attachments')
tw@2583
   341
            os.makedirs(attach_dir)
tw@2583
   342
            for fn, att in self.attachments.items():
tw@2583
   343
                # we have to check for renames here because we need the (old) pagename, too:
tw@2583
   344
                if ('FILE', self.name_old, fn) in self.renames:
tw@2583
   345
                    fn_new = self.renames[('FILE', self.name_old, fn)]
tw@2583
   346
                    if fn_new != fn:
tw@2583
   347
                        print "Renaming file %r %r -> %r" % (self.name_old, fn, fn_new)
tw@2583
   348
                        att.name = fn_new
tw@2583
   349
                att.copy(attach_dir)
tw@2583
   350
tw@2583
   351
    def copy(self, pages_dir, renames):
tw@3175
   352
        self.renames = renames
tw@3175
   353
        self.read()
tw@3175
   354
        self.write(pages_dir)
tw@2583
   355
tw@2583
   356
tw@2583
   357
class User:
tw@2583
   358
    """ represents a user with all related data """
tw@2590
   359
    def __init__(self, request, users_dir, uid):
tw@2590
   360
        self.request = request
tw@2583
   361
        self.uid = uid
tw@2583
   362
        self.users_dir = users_dir
tw@2583
   363
        self.profile = None
tw@2583
   364
        self.bookmarks = None
tw@2583
   365
tw@2583
   366
    def read(self):
tw@2583
   367
        """ read profile and bookmarks data from disk """
tw@2583
   368
        self.profile = {}
tw@2583
   369
        fname = opj(self.users_dir, self.uid)
tw@2583
   370
        # read user profile
tw@2593
   371
        f = codecs.open(fname, 'r', config.charset)
tw@2583
   372
        for line in f:
tw@2592
   373
            line = line.replace(u'\r', '').replace(u'\n', '')
tw@2592
   374
            if not line.strip() or line.startswith(u'#'): # skip empty or comment lines
tw@2583
   375
                continue
tw@3018
   376
            try:
tw@3018
   377
                key, value = line.split(u'=', 1)
tw@3018
   378
            except Exception, err:
tw@3018
   379
                print "Error: User reader can not parse line %r from profile %r (%s)" % (line, fname, str(err))
tw@3018
   380
                continue
tw@2583
   381
            self.profile[key] = value
tw@2583
   382
        f.close()
tw@2583
   383
        # read bookmarks
tw@2583
   384
        self.bookmarks = {}
tw@2583
   385
        fname_pattern = opj(self.users_dir, "%s.*.bookmark" % self.uid)
tw@2583
   386
        for fname in glob.glob(fname_pattern):
tw@2583
   387
            f = file(fname, "r")
tw@2583
   388
            bookmark = f.read()
tw@2583
   389
            f.close()
tw@2583
   390
            wiki = fname.replace('.bookmark', '').replace(opj(self.users_dir, self.uid+'.'), '')
tw@2583
   391
            self.bookmarks[wiki] = int(bookmark)
tw@2583
   392
        # don't care about trail
tw@2583
   393
tw@2583
   394
    def write(self, users_dir):
tw@2583
   395
        """ write profile and bookmarks data to disk """
tw@2583
   396
        fname = opj(users_dir, self.uid)
tw@2593
   397
        f = codecs.open(fname, 'w', config.charset)
tw@2583
   398
        for key, value in self.profile.items():
tw@2592
   399
            if key in (u'subscribed_pages', u'quicklinks'):
tw@2592
   400
                pages = value.split(u'\t')
tw@2586
   401
                for i in range(len(pages)):
tw@2586
   402
                    pagename = pages[i]
tw@2589
   403
                    try:
tw@2592
   404
                        interwiki, pagename = pagename.split(u':', 1)
tw@2589
   405
                    except:
tw@2592
   406
                        interwiki, pagename = u'Self', pagename
tw@2593
   407
                    if interwiki == u'Self' or interwiki == self.request.cfg.interwikiname:
tw@2593
   408
                        if ('PAGE', pagename) in self.renames:
tw@2593
   409
                            pagename = self.renames[('PAGE', pagename)]
tw@2593
   410
                            pages[i] = u'%s:%s' % (interwiki, pagename)
tw@2589
   411
                key += '[]' # we have lists here
tw@2592
   412
                value = u'\t'.join(pages)
tw@2592
   413
                f.write(u"%s=%s\n" % (key, value))
tw@2589
   414
            else:
tw@2592
   415
                f.write(u"%s=%s\n" % (key, value))
tw@2589
   416
        bookmark_entries = [u'%s:%s' % item for item in self.bookmarks.items()]
tw@2592
   417
        key = u"bookmarks{}"
tw@2592
   418
        value = u'\t'.join(bookmark_entries)
tw@2592
   419
        f.write(u"%s=%s\n" % (key, value))
tw@2583
   420
        f.close()
tw@2583
   421
        # don't care about trail
tw@2583
   422
tw@2583
   423
    def copy(self, users_dir, renames):
tw@2583
   424
        self.renames = renames
tw@2583
   425
        self.read()
tw@2583
   426
        self.write(users_dir)
tw@2583
   427
tw@2583
   428
tw@2583
   429
class DataConverter(object):
tw@2589
   430
    def __init__(self, request, src_data_dir, dest_data_dir):
tw@2592
   431
        self.request = request
tw@2583
   432
        self.sdata = src_data_dir
tw@2583
   433
        self.ddata = dest_data_dir
tw@2583
   434
        self.pages = {}
tw@2583
   435
        self.users = {}
tw@2752
   436
        self.complete = {}
tw@2583
   437
        self.renames = {}
tw@2752
   438
        self.complete_fname = opj(self.sdata, 'complete.txt')
tw@2583
   439
        self.rename_fname1 = opj(self.sdata, 'rename1.txt')
tw@2583
   440
        self.rename_fname2 = opj(self.sdata, 'rename2.txt')
tw@2583
   441
tw@2583
   442
    def pass1(self):
tw@2583
   443
        """ First create the rename list - the user has to review/edit it as
tw@2583
   444
            we can't decide about page/attachment names automatically.
tw@2583
   445
        """
tw@2583
   446
        self.read_src()
tw@2583
   447
        # pages
tw@2583
   448
        for pn, p in self.pages.items():
tw@2583
   449
            p.read()
tw@2583
   450
            if not p.revisions:
tw@2583
   451
                continue # we don't care for pages with no revisions (trash)
tw@2805
   452
            if pn.endswith('/MoinEditorBackup'):
tw@2805
   453
                continue # we don't care for old editor backups
tw@2752
   454
            self.complete[('PAGE', pn)] = None
tw@2583
   455
            if "_" in pn:
tw@2583
   456
                # log all pagenames with underscores
tw@2583
   457
                self.renames[('PAGE', pn)] = None
tw@2583
   458
            if p.attachments is not None:
tw@2583
   459
                for fn in p.attachments:
tw@2583
   460
                    try:
tw@2583
   461
                        fn_str = fn.encode('ascii')
tw@2583
   462
                        log = False # pure ascii filenames are no problem
tw@2583
   463
                    except UnicodeEncodeError:
tw@2583
   464
                        log = True # this file maybe has a strange representation in wiki markup
tw@2583
   465
                    else:
tw@2583
   466
                        if ' ' in fn_str or '%' in fn_str: # files with blanks need quoting
tw@2583
   467
                            log = True
tw@2752
   468
                    self.complete[('FILE', pn, fn)] = None
tw@2583
   469
                    if log:
tw@2583
   470
                        # log all strange attachment filenames
tw@2583
   471
                        fn_str = fn.encode('utf-8')
tw@2583
   472
                        self.renames[('FILE', pn, fn)] = None
tw@2752
   473
        self.save_list(self.complete_fname, self.complete)
tw@2752
   474
        self.save_list(self.rename_fname1, self.renames)
tw@2583
   475
tw@2752
   476
    LIST_FIELDSEP = u'|' # in case | makes trouble, one can use \t tab char
tw@2750
   477
tw@2752
   478
    def save_list(self, fname, what):
tw@2806
   479
        what_sorted = what.keys()
tw@3119
   480
        # make sure we have 3-tuples:
tw@3119
   481
        what_sorted = [(k + (None, ))[:3] for k in what_sorted]
tw@3119
   482
        # we only have python 2.3, thus no cmp keyword for the sort() call,
tw@3119
   483
        # thus we need to do it the more complicated way:
tw@3119
   484
        what_sorted = [(pn, fn, rtype) for rtype, pn, fn in what_sorted] # shuffle
tw@3119
   485
        what_sorted.sort() # sort
tw@3119
   486
        what_sorted = [(rtype, pn, fn) for pn, fn, rtype in what_sorted] # shuffle
tw@2752
   487
        f = codecs.open(fname, 'w', 'utf-8')
tw@3119
   488
        for rtype, pn, fn in what_sorted:
tw@2583
   489
            if rtype == 'PAGE':
tw@2750
   490
                line = (rtype, pn, pn)
tw@2583
   491
            elif rtype == 'FILE':
tw@2750
   492
                line = (rtype, pn, fn, fn)
tw@2752
   493
            line = self.LIST_FIELDSEP.join(line)
tw@2750
   494
            f.write(line + u'\n')
tw@2583
   495
        f.close()
tw@2583
   496
tw@2752
   497
    def load_list(self, fname, what):
tw@2752
   498
        f = codecs.open(fname, 'r', 'utf-8')
tw@2583
   499
        for line in f:
tw@2583
   500
            line = line.rstrip()
tw@2583
   501
            if not line:
tw@2583
   502
                continue
tw@2752
   503
            t = line.split(self.LIST_FIELDSEP)
tw@2583
   504
            rtype, p1, p2, p3 = (t + [None]*3)[:4]
tw@2583
   505
            if rtype == u'PAGE':
tw@2752
   506
                what[(str(rtype), p1)] = p2
tw@2583
   507
            elif rtype == u'FILE':
tw@2752
   508
                what[(str(rtype), p1, p2)] = p3
tw@2583
   509
        f.close()
tw@2583
   510
tw@2583
   511
    def pass2(self):
tw@2583
   512
        """ Second, read the (user edited) rename list and do the renamings everywhere. """
tw@2583
   513
        self.read_src()
tw@2752
   514
        #self.load_list(self.complete_fname, self.complete)
tw@2752
   515
        self.load_list(self.rename_fname2, self.renames)
tw@2583
   516
        self.write_dest()
tw@2583
   517
tw@2583
   518
    def read_src(self):
tw@2583
   519
        # create Page objects in memory
tw@2583
   520
        pages_dir = opj(self.sdata, 'pages')
tw@2583
   521
        pagelist = listdir(pages_dir)
tw@2583
   522
        for qpagename in pagelist:
tw@2590
   523
            p = Page(self.request, pages_dir, qpagename)
tw@2583
   524
            self.pages[p.name] = p
tw@2583
   525
tw@2583
   526
        # create User objects in memory
tw@2583
   527
        users_dir = opj(self.sdata, 'user')
tw@2751
   528
        user_re = re.compile(r'^\d+\.\d+(\.\d+)?$')
tw@2583
   529
        userlist = listdir(users_dir)
tw@2751
   530
        userlist = [f for f in userlist if user_re.match(f)]
tw@2583
   531
        for userid in userlist:
tw@2590
   532
            u = User(self.request, users_dir, userid)
tw@2583
   533
            self.users[u.uid] = u
tw@2583
   534
tw@2583
   535
        # create log objects in memory
tw@2590
   536
        self.editlog = EditLog(self.request, opj(self.sdata, 'edit-log'))
tw@2590
   537
        self.eventlog = EventLog(self.request, opj(self.sdata, 'event-log'))
tw@2583
   538
tw@2583
   539
    def write_dest(self):
tw@2583
   540
        self.init_dest()
tw@2583
   541
        # copy pages
tw@2583
   542
        pages_dir = opj(self.ddata, 'pages')
tw@2805
   543
        for pn, page in self.pages.items():
tw@2805
   544
            if pn.endswith('/MoinEditorBackup'):
tw@2805
   545
                continue # we don't care for old editor backups
tw@2583
   546
            page.copy(pages_dir, self.renames)
tw@2583
   547
tw@2583
   548
        # copy users
tw@2583
   549
        users_dir = opj(self.ddata, 'user')
tw@2583
   550
        for user in self.users.values():
tw@2583
   551
            user.copy(users_dir, self.renames)
tw@2583
   552
tw@2583
   553
        # copy logs
tw@2583
   554
        self.editlog.copy(opj(self.ddata, 'edit-log'), self.renames)
tw@2583
   555
        self.eventlog.copy(opj(self.ddata, 'event-log'), self.renames)
tw@2583
   556
tw@2583
   557
    def init_dest(self):
tw@2591
   558
        try:
tw@2591
   559
            os.makedirs(self.ddata)
tw@2591
   560
        except:
tw@2591
   561
            pass
tw@2583
   562
        os.makedirs(opj(self.ddata, 'pages'))
tw@2583
   563
        os.makedirs(opj(self.ddata, 'user'))
tw@2583
   564
        copy_dir(opj(self.sdata, 'plugin'), opj(self.ddata, 'plugin'))
tw@2583
   565
        copy_file(opj(self.sdata, 'intermap.txt'), opj(self.ddata, 'intermap.txt'))
tw@2583
   566
tw@2583
   567