changeset 5148:a0ca2bd83cd3

migration scripts: make finding damaged edit-log entries easier
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Tue, 15 Sep 2009 13:50:03 +0200
parents 2d080ca7f8bd
children eae57b3d9e0e
files MoinMoin/script/migration/_conv160.py MoinMoin/script/migration/_conv160a.py
diffstat 2 files changed, 16 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/script/migration/_conv160.py	Fri Sep 11 00:45:05 2009 +0200
+++ b/MoinMoin/script/migration/_conv160.py	Tue Sep 15 13:50:03 2009 +0200
@@ -131,15 +131,22 @@
         """ read complete edit-log from disk """
         data = {}
         try:
+            lineno = 0
             f = file(self.fname, 'r')
             for line in f:
+                lineno += 1
                 line = line.replace('\r', '').replace('\n', '')
                 if not line.strip(): # skip empty lines
                     continue
                 fields = line.split('\t') + [''] * 9
                 timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields[:9]
                 timestamp = int(timestamp)
-                rev = int(rev)
+                try:
+                    rev = int(rev)
+                except ValueError, err:
+                    print "Error: %r has a damaged timestamp in log line %d [%s] - skipping this entry" % (
+                        self.fname, lineno, str(err))
+                    continue # ignore this line, do not terminate - to find all those errors in one go
                 pagename = wikiutil.unquoteWikiname(pagename)
                 data[(timestamp, rev, pagename)] = (timestamp, rev, action, pagename, ip, hostname, userid, extra, comment)
             f.close()
--- a/MoinMoin/script/migration/_conv160a.py	Fri Sep 11 00:45:05 2009 +0200
+++ b/MoinMoin/script/migration/_conv160a.py	Tue Sep 15 13:50:03 2009 +0200
@@ -131,15 +131,22 @@
         """ read complete edit-log from disk """
         data = {}
         try:
+            lineno = 0
             f = file(self.fname, 'r')
             for line in f:
+                lineno += 1
                 line = line.replace('\r', '').replace('\n', '')
                 if not line.strip(): # skip empty lines
                     continue
                 fields = line.split('\t') + [''] * 9
                 timestamp, rev, action, pagename, ip, hostname, userid, extra, comment = fields[:9]
                 timestamp = int(timestamp)
-                rev = int(rev)
+                try:
+                    rev = int(rev)
+                except ValueError, err:
+                    print "Error: %r has a damaged timestamp in log line %d [%s] - skipping this entry" % (
+                        self.fname, lineno, str(err))
+                    continue # ignore this line, do not terminate - to find all those errors in one go
                 pagename = wikiutil.unquoteWikiname(pagename)
                 data[(timestamp, rev, pagename)] = (timestamp, rev, action, pagename, ip, hostname, userid, extra, comment)
             f.close()