1 # -*- coding: iso-8859-1 -*-
3 MoinMoin - migration from 1.6.0alpha (rev 1844: 58ebb64243cc - used a similar markup as 1.5.8, but with quotes for linking stuff with blanks) to 1.6.0 (creole link style)
7 a) reverse underscore == blank stuff in pagenames (introducing this was a fault)
9 pagename quoted pagename
10 -----------------------------------------------------
11 old MainPage/Sub_Page MainPage(2f)Sub_Page
12 new MainPage/Sub Page MainPage(2f)Sub(20)Page or
13 new MainPage/Sub_Page MainPage(2f)Sub_Page (user has to decide by editing rename1.txt)
17 ----------------------------------------------------
22 b) decode url encoded chars in attachment names (and quote the whole fname):
25 ----------------------------------------------------
26 old attachment:file%20with%20blanks.txt
27 new [[attachment:file with blanks.txt]]
29 c) users: move bookmarks from separate files into user profile
30 d) users: generate new name[] for lists and name{} for dicts
32 e) kill all */MoinEditorBackup pages (replaced by drafts functionality)
34 @copyright: 2007 by Thomas Waldmann
35 @license: GNU GPL, see COPYING for details.
41 import codecs, urllib, glob
43 from MoinMoin import config, wikiutil
44 from MoinMoin.script.migration.migutil import opj, listdir, copy_file, move_file, copy_dir
46 import mimetypes # this MUST be after wikiutil import!
48 from _conv160b_wiki import convert_wiki
50 create_rev = True # create a <new> rev with the converted content of <new-1> rev?
52 def markup_converter(request, pagename, text, renames):
53 """ Convert the <text> content of page <pagename>, using <renames> dict
54 to rename links correctly. Additionally, convert some changed markup.
56 if text.startswith('<?xml'):
57 # would be done with xslt processor
60 pis, body = wikiutil.get_processing_instructions(text)
62 if pi == 'format' and val != 'wiki':
66 text = convert_wiki(request, pagename, text, renames)
71 def __init__(self, request, fname):
72 self.request = request
78 """ read complete event-log from disk """
82 f = file(self.fname, 'r')
85 line = line.replace('\r', '').replace('\n', '')
86 if not line.strip(): # skip empty lines
88 fields = line.split('\t')
90 timestamp, action, kvpairs = fields[:3]
91 timestamp = int(timestamp)
92 kvdict = wikiutil.parseQueryString(kvpairs)
93 data.append((timestamp, action, kvdict))
94 except ValueError, err:
95 # corrupt event log line, log error and skip it
96 print "Error: invalid event log (%s) line %d, err: %s, SKIPPING THIS LINE!" % (self.fname, lineno, str(err))
103 def write(self, fname):
104 """ write complete event-log to disk """
107 for timestamp, action, kvdict in self.data:
108 pagename = kvdict.get('pagename')
109 if pagename and ('PAGE', pagename) in self.renames:
110 kvdict['pagename'] = self.renames[('PAGE', pagename)]
111 kvpairs = wikiutil.makeQueryString(kvdict)
112 fields = str(timestamp), action, kvpairs
113 line = '\t'.join(fields) + '\n'
117 def copy(self, destfname, renames):
118 self.renames = renames
120 self.write(destfname)
124 def __init__(self, request, fname):
125 self.request = request
131 """ read complete edit-log from disk """
134 f = file(self.fname, 'r')
136 line = line.replace('\r', '').replace('\n', '')
137 if not line.strip(): # skip empty lines
139 fields = line.split('\t') + [''] * 9
140 timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields[:9]
141 timestamp = int(timestamp)
143 pagename = wikiutil.unquoteWikiname(pagename)
144 data[(timestamp, rev, pagename)] = (timestamp, rev, action, pagename, ip, hostname, userid, extra, comment)
151 def write(self, fname, deleted=False):
152 """ write complete edit-log to disk """
154 editlog = self.data.items()
158 for key, fields in editlog:
159 timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields
160 if action.startswith('ATT'):
162 fname = urllib.unquote(extra).decode('utf-8')
163 except UnicodeDecodeError:
164 fname = urllib.unquote(extra).decode('iso-8859-1')
165 if ('FILE', pagename, fname) in self.renames:
166 fname = self.renames[('FILE', pagename, fname)]
167 extra = urllib.quote(fname.encode('utf-8'))
168 if ('PAGE', pagename) in self.renames:
169 pagename = self.renames[('PAGE', pagename)]
170 timestamp = str(timestamp)
172 max_rev = max(rev, max_rev)
173 revstr = '%08d' % rev
174 pagename = wikiutil.quoteWikinameFS(pagename)
175 fields = timestamp, revstr, action, pagename, ip, hostname, userid, extra, comment
176 log_str = '\t'.join(fields) + '\n'
178 if create_rev and not deleted:
179 timestamp = str(wikiutil.timestamp2version(time.time()))
180 revstr = '%08d' % (max_rev + 1)
183 hostname = 'localhost'
186 comment = "converted to 1.6 markup"
187 fields = timestamp, revstr, action, pagename, ip, hostname, userid, extra, comment
188 log_str = '\t'.join(fields) + '\n'
192 def copy(self, destfname, renames, deleted=False):
193 self.renames = renames
195 self.write(destfname, deleted)
199 """ a single revision of a page """
200 def __init__(self, request, pagename, rev_dir, rev):
201 self.request = request
202 self.pagename = pagename
203 self.rev_dir = rev_dir
207 fname = opj(self.rev_dir, '%08d' % self.rev)
208 f = file(fname, "rb")
211 data = data.decode(config.charset)
214 def write(self, data, rev_dir, convert, rev=None):
218 data = markup_converter(self.request, self.pagename, data, self.renames)
219 fname = opj(rev_dir, '%08d' % rev)
220 data = data.encode(config.charset)
221 f = file(fname, "wb")
225 def copy(self, rev_dir, renames, convert=False, new_rev=None):
226 self.renames = renames
228 self.write(data, rev_dir, convert, new_rev)
232 """ a single attachment """
233 def __init__(self, request, attach_dir, attfile):
234 self.request = request
235 self.path = opj(attach_dir, attfile)
236 self.name = attfile.decode('utf-8', 'replace')
238 def copy(self, attach_dir):
239 """ copy attachment file from orig path to new destination """
240 attfile = self.name.encode('utf-8')
241 dest = opj(attach_dir, attfile)
242 copy_file(self.path, dest)
246 """ represents a page with all related data """
247 def __init__(self, request, pages_dir, qpagename):
248 self.request = request
249 self.name = wikiutil.unquoteWikiname(qpagename)
250 self.name_old = self.name # renaming: still original name when self.name has the new name
251 self.page_dir = opj(pages_dir, qpagename)
252 self.current = None # int current
253 self.editlog = None # dict (see read_editlog)
254 self.revlist = None # list of ints (page text revisions)
255 self.revisions = None # dict int: pagerev obj
256 self.attachments = None # dict of unicode fname: full path
257 self.renames = {} # info for renaming pages/attachments
260 """ read a page, including revisions, log, attachments from disk """
261 page_dir = self.page_dir
263 current_fname = opj(page_dir, 'current')
264 if os.path.exists(current_fname):
265 current_file = file(current_fname, "r")
266 current_rev = current_file.read()
269 self.current = int(current_rev)
271 print "Error: invalid current file %s, SKIPPING THIS PAGE!" % current_fname
274 editlog_fname = opj(page_dir, 'edit-log')
275 if os.path.exists(editlog_fname):
276 self.editlog = EditLog(self.request, editlog_fname)
277 # read page revisions
278 rev_dir = opj(page_dir, 'revisions')
279 if os.path.exists(rev_dir):
280 revlist = listdir(rev_dir)
281 revlist = [int(rev) for rev in revlist]
283 self.revlist = revlist
286 self.revisions[rev] = PageRev(self.request, self.name_old, rev_dir, rev)
288 self.is_deleted = not self.revisions or self.current not in self.revisions
289 # read attachment filenames
290 attach_dir = opj(page_dir, 'attachments')
291 if os.path.exists(attach_dir):
292 self.attachments = {}
293 attlist = listdir(attach_dir)
294 for attfile in attlist:
295 a = Attachment(self.request, attach_dir, attfile)
296 self.attachments[a.name] = a
298 def write(self, pages_dir):
299 """ write a page, including revisions, log, attachments to disk """
300 if ('PAGE', self.name) in self.renames:
301 name_new = self.renames[('PAGE', self.name)]
302 if name_new != self.name:
303 print "Renaming page %r -> %r" % (self.name, name_new)
304 self.name_old = self.name
306 qpagename = wikiutil.quoteWikinameFS(self.name)
307 page_dir = opj(pages_dir, qpagename)
308 os.makedirs(page_dir)
310 current = self.current
311 if current is not None:
312 if create_rev and not self.is_deleted:
314 current_fname = opj(page_dir, 'current')
315 current_file = file(current_fname, "w")
316 current_str = '%08d\n' % current
317 current_file.write(current_str)
320 if self.editlog is not None:
321 editlog_fname = opj(page_dir, 'edit-log')
322 self.editlog.copy(editlog_fname, self.renames, deleted=self.is_deleted)
323 # copy page revisions
324 if self.revisions is not None:
325 rev_dir = opj(page_dir, 'revisions')
327 for rev in self.revlist:
329 self.revisions[rev].copy(rev_dir, self.renames)
331 if int(rev) == self.current:
332 self.revisions[rev].copy(rev_dir, self.renames, convert=True)
334 self.revisions[rev].copy(rev_dir, self.renames)
335 if create_rev and not self.is_deleted:
336 self.revisions[rev].copy(rev_dir, self.renames, convert=True, new_rev=rev+1)
339 if self.attachments is not None:
340 attach_dir = opj(page_dir, 'attachments')
341 os.makedirs(attach_dir)
342 for fn, att in self.attachments.items():
343 # we have to check for renames here because we need the (old) pagename, too:
344 if ('FILE', self.name_old, fn) in self.renames:
345 fn_new = self.renames[('FILE', self.name_old, fn)]
347 print "Renaming file %r %r -> %r" % (self.name_old, fn, fn_new)
351 def copy(self, pages_dir, renames):
352 self.renames = renames
354 self.write(pages_dir)
358 """ represents a user with all related data """
359 def __init__(self, request, users_dir, uid):
360 self.request = request
362 self.users_dir = users_dir
364 self.bookmarks = None
367 """ read profile and bookmarks data from disk """
369 fname = opj(self.users_dir, self.uid)
371 f = codecs.open(fname, 'r', config.charset)
373 line = line.replace(u'\r', '').replace(u'\n', '')
374 if not line.strip() or line.startswith(u'#'): # skip empty or comment lines
377 key, value = line.split(u'=', 1)
378 except Exception, err:
379 print "Error: User reader can not parse line %r from profile %r (%s)" % (line, fname, str(err))
381 self.profile[key] = value
385 fname_pattern = opj(self.users_dir, "%s.*.bookmark" % self.uid)
386 for fname in glob.glob(fname_pattern):
390 wiki = fname.replace('.bookmark', '').replace(opj(self.users_dir, self.uid+'.'), '')
391 self.bookmarks[wiki] = int(bookmark)
392 # don't care about trail
394 def write(self, users_dir):
395 """ write profile and bookmarks data to disk """
396 fname = opj(users_dir, self.uid)
397 f = codecs.open(fname, 'w', config.charset)
398 for key, value in self.profile.items():
399 if key in (u'subscribed_pages', u'quicklinks'):
400 pages = value.split(u'\t')
401 for i in range(len(pages)):
404 interwiki, pagename = pagename.split(u':', 1)
406 interwiki, pagename = u'Self', pagename
407 if interwiki == u'Self' or interwiki == self.request.cfg.interwikiname:
408 if ('PAGE', pagename) in self.renames:
409 pagename = self.renames[('PAGE', pagename)]
410 pages[i] = u'%s:%s' % (interwiki, pagename)
411 key += '[]' # we have lists here
412 value = u'\t'.join(pages)
413 f.write(u"%s=%s\n" % (key, value))
415 f.write(u"%s=%s\n" % (key, value))
416 bookmark_entries = [u'%s:%s' % item for item in self.bookmarks.items()]
418 value = u'\t'.join(bookmark_entries)
419 f.write(u"%s=%s\n" % (key, value))
421 # don't care about trail
423 def copy(self, users_dir, renames):
424 self.renames = renames
426 self.write(users_dir)
429 class DataConverter(object):
430 def __init__(self, request, src_data_dir, dest_data_dir):
431 self.request = request
432 self.sdata = src_data_dir
433 self.ddata = dest_data_dir
438 self.complete_fname = opj(self.sdata, 'complete.txt')
439 self.rename_fname1 = opj(self.sdata, 'rename1.txt')
440 self.rename_fname2 = opj(self.sdata, 'rename2.txt')
443 """ First create the rename list - the user has to review/edit it as
444 we can't decide about page/attachment names automatically.
448 for pn, p in self.pages.items():
451 continue # we don't care for pages with no revisions (trash)
452 if pn.endswith('/MoinEditorBackup'):
453 continue # we don't care for old editor backups
454 self.complete[('PAGE', pn)] = None
456 # log all pagenames with underscores
457 self.renames[('PAGE', pn)] = None
458 if p.attachments is not None:
459 for fn in p.attachments:
461 fn_str = fn.encode('ascii')
462 log = False # pure ascii filenames are no problem
463 except UnicodeEncodeError:
464 log = True # this file maybe has a strange representation in wiki markup
466 if ' ' in fn_str or '%' in fn_str: # files with blanks need quoting
468 self.complete[('FILE', pn, fn)] = None
470 # log all strange attachment filenames
471 fn_str = fn.encode('utf-8')
472 self.renames[('FILE', pn, fn)] = None
473 self.save_list(self.complete_fname, self.complete)
474 self.save_list(self.rename_fname1, self.renames)
476 LIST_FIELDSEP = u'|' # in case | makes trouble, one can use \t tab char
478 def save_list(self, fname, what):
479 what_sorted = what.keys()
480 # make sure we have 3-tuples:
481 what_sorted = [(k + (None, ))[:3] for k in what_sorted]
482 # we only have python 2.3, thus no cmp keyword for the sort() call,
483 # thus we need to do it the more complicated way:
484 what_sorted = [(pn, fn, rtype) for rtype, pn, fn in what_sorted] # shuffle
485 what_sorted.sort() # sort
486 what_sorted = [(rtype, pn, fn) for pn, fn, rtype in what_sorted] # shuffle
487 f = codecs.open(fname, 'w', 'utf-8')
488 for rtype, pn, fn in what_sorted:
490 line = (rtype, pn, pn)
491 elif rtype == 'FILE':
492 line = (rtype, pn, fn, fn)
493 line = self.LIST_FIELDSEP.join(line)
494 f.write(line + u'\n')
497 def load_list(self, fname, what):
498 f = codecs.open(fname, 'r', 'utf-8')
503 t = line.split(self.LIST_FIELDSEP)
504 rtype, p1, p2, p3 = (t + [None]*3)[:4]
506 what[(str(rtype), p1)] = p2
507 elif rtype == u'FILE':
508 what[(str(rtype), p1, p2)] = p3
512 """ Second, read the (user edited) rename list and do the renamings everywhere. """
514 #self.load_list(self.complete_fname, self.complete)
515 self.load_list(self.rename_fname2, self.renames)
519 # create Page objects in memory
520 pages_dir = opj(self.sdata, 'pages')
521 pagelist = listdir(pages_dir)
522 for qpagename in pagelist:
523 p = Page(self.request, pages_dir, qpagename)
524 self.pages[p.name] = p
526 # create User objects in memory
527 users_dir = opj(self.sdata, 'user')
528 user_re = re.compile(r'^\d+\.\d+(\.\d+)?$')
529 userlist = listdir(users_dir)
530 userlist = [f for f in userlist if user_re.match(f)]
531 for userid in userlist:
532 u = User(self.request, users_dir, userid)
533 self.users[u.uid] = u
535 # create log objects in memory
536 self.editlog = EditLog(self.request, opj(self.sdata, 'edit-log'))
537 self.eventlog = EventLog(self.request, opj(self.sdata, 'event-log'))
539 def write_dest(self):
542 pages_dir = opj(self.ddata, 'pages')
543 for pn, page in self.pages.items():
544 if pn.endswith('/MoinEditorBackup'):
545 continue # we don't care for old editor backups
546 page.copy(pages_dir, self.renames)
549 users_dir = opj(self.ddata, 'user')
550 for user in self.users.values():
551 user.copy(users_dir, self.renames)
554 self.editlog.copy(opj(self.ddata, 'edit-log'), self.renames)
555 self.eventlog.copy(opj(self.ddata, 'event-log'), self.renames)
559 os.makedirs(self.ddata)
562 os.makedirs(opj(self.ddata, 'pages'))
563 os.makedirs(opj(self.ddata, 'user'))
564 copy_dir(opj(self.sdata, 'plugin'), opj(self.ddata, 'plugin'))
565 copy_file(opj(self.sdata, 'intermap.txt'), opj(self.ddata, 'intermap.txt'))