changeset 1670:f7e7a0919c08

markup converter: more complete now, but still buggy
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sat, 18 Nov 2006 02:03:55 +0100
parents 1f6d701942f5
children fa334986a93d
files MoinMoin/script/migration/wikimarkup_converter.py
diffstat 1 files changed, 21 insertions(+), 96 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/script/migration/wikimarkup_converter.py	Tue Nov 07 23:46:38 2006 +0100
+++ b/MoinMoin/script/migration/wikimarkup_converter.py	Sat Nov 18 02:03:55 2006 +0100
@@ -3,7 +3,7 @@
 import sys
 sys.path.insert(0, '../../..')
 
-import re
+import re, codecs
 from MoinMoin import config, wikiutil
 
 class Parser:
@@ -262,29 +262,27 @@
             # can be used to escape real, non-empty bang paths
             word = ''
             self.in_pre = 'no_parser'
-            return "{{{"
+            return "{{{#!"
         elif s_word.startswith('#!'):
             # First try to find a parser for this
             parser_name = s_word[2:].split()[0]  # XXX loses args
-            self.setParser(parser_name)
-
-        if self.parser:
             self.parser_name = parser_name
             self.in_pre = 'found_parser'
             self.parser_lines = [word]
-            return ''
+            return "{{{%s" % s_word
         elif s_word:
             self.in_pre = 'no_parser'
             return "{{{%s" % s_word
         else:
             self.in_pre = 'search_parser'
-            return ''
+            return '{{{'
 
     def _heading_repl(self, word):
         h = word.strip() # XXX loses indentation
         level = 1
         while h[level:level+1] == '=':
             level += 1
+        title_text = h[level:-level].strip()
         depth = min(5, level)
         h = "=" * depth
         return "%s %s %s" % (h, title_text, h)
@@ -339,7 +337,7 @@
 
     def _interwiki_repl(self, word):
         """Handle InterWiki links."""
-        wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
+        #wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
         return word
 
     def _url_repl(self, word):
@@ -353,7 +351,6 @@
             return self.attachment(word)
 
         if wikiutil.isPicture(word):
-            word = wikiutil.mapURL(self.request, word)
             # Get image name http://here.com/dir/image.gif -> image
             name = word.split('/')[-1]
             name = ''.join(name.split('.')[:-1])
@@ -446,15 +443,9 @@
 
     def _indent_repl(self, match):
         """Handle pure indentation (no - * 1. markup)."""
-        result = []
         if not (self.in_li or self.in_dd):
-            self._close_item(result)
             self.in_li = 1
-            css_class = None
-            if self.line_was_empty and not self.first_list_item:
-                css_class = 'gap'
-            result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
-        return ''.join(result)
+        return match
 
     def _li_none_repl(self, match):
         """Handle type=none (" .") lists."""
@@ -472,16 +463,9 @@
 
     def _dl_repl(self, match):
         """Handle definition lists."""
-        result = []
-        self._close_item(result)
         self.in_dd = 1
-        result.extend([
-            self.formatter.definition_term(1),
-            self.formatter.text(match[1:-3].lstrip(' ')),
-            self.formatter.definition_term(0),
-            self.formatter.definition_desc(1),
-        ])
-        return ''.join(result)
+        # match[1:-3].lstrip(' ')
+        return match
 
 
     def _indent_level(self):
@@ -624,47 +608,13 @@
         return attr, msg
 
     def _tableZ_repl(self, word):
-        """Handle table row end."""
-        if self.in_table:
-            result = ''
-            result += self.formatter.table_cell(0) + self.formatter.table_row(0)
-            return result
-        else:
-            return self.formatter.text(word)
+        return word
 
     def _table_repl(self, word):
-        """Handle table cell separator."""
-        if self.in_table:
-            result = []
-            # check for attributes
-            attrs, attrerr = self._getTableAttrs(word)
-
-            # start the table row?
-            if self.table_rowstart:
-                self.table_rowstart = 0
-                result.append(self.formatter.table_row(1, attrs))
-            else:
-                # Close table cell, first closing open p
-                result.append(self.formatter.table_cell(0))
-
-            # check for adjacent cell markers
-            if word.count("|") > 2:
-                if not attrs.has_key('align') and \
-                   not (attrs.has_key('style') and 'text-align' in attrs['style'].lower()):
-                    # add center alignment if we don't have some alignment already
-                    attrs['align'] = '"center"'
-                if not attrs.has_key('colspan'):
-                    attrs['colspan'] = '"%d"' % (word.count("|")/2)
-
-            # return the complete cell markup
-            result.append(self.formatter.table_cell(1, attrs) + attrerr)
-            return ''.join(result)
-        else:
-            return self.formatter.text(word)
+        return word
 
 
     def _close_item(self, result):
-        #result.append("<!-- close item begin -->\n")
         if self.in_table:
             self.in_table = 0
         if self.in_li:
@@ -751,7 +701,7 @@
                 for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
                            "#pragma", "#form", "#acl", "#language"):
                     if line.lower().startswith(pi):
-                        self.request.write(line)
+                        self.request.write(line + '\n')
                         found = True
                         break
                 if not found:
@@ -773,7 +723,6 @@
                         self.parser_name = parser_name
                         continue
                     else:
-                        self.request.write(self.formatter.preformatted(1))
                         self.in_pre = 'no_parser'
                 if self.in_pre == 'found_parser':
                     # processing mode
@@ -785,9 +734,7 @@
                     if line[:endpos]:
                         self.parser_lines.append(line[:endpos])
 
-                    # Close p before calling parser
-                    # TODO: do we really need this?
-                    res = self.formatter.parser(self.parser_name, self.parser_lines)
+                    res = '\n'.join(self.parser_lines) # dont call parser, emit as is
                     self.request.write(res)
                     del self.parser_lines
                     self.in_pre = None
@@ -860,47 +807,25 @@
             formatted_line = self.scan(scanning_re, line)
             self.request.write(formatted_line)
             self.request.write("\n")
-            if self.in_pre == 'no_parser':
-                self.request.write(self.formatter.linebreak())
 
         # Close code displays, paragraphs, tables and open lists
         self.request.write(self._undent())
-        if self.in_pre: self.request.write(self.formatter.preformatted(0))
         if self.in_table: self.request.write(self.formatter.table(0))
 
 
 
 def convert(intext, pagemap, filemap):
-    request = sys.stdout
+    import StringIO
+    request = StringIO.StringIO()
     p = Parser("TestPage", intext, request)
     p.convert()
+    return request.getvalue()
 
 if __name__ == '__main__':
-    intext = """
-A link using _ instead of blank: ["Other_Page"]
-
-##A normal CamelCase link and a interwiki MoinMoin:WhatEver link.
-
-attachment:file%20with%20blanks.txt
-
- . asdadasd
- . asadads
-  . asdads
-  . asdads
- . asdads
-
-Testing:
- * asda
-  * asdads
-
-Another:
- 1. asdasd
- 1. asdad
-  1. asada
-
-
-"""
+    intext = codecs.open("in.txt", "r", "utf-8").read()
     pagemap = {'Other_Page': 'Other Page', }
     filemap = {'file with blanks.txt': True}
-    print convert(intext, pagemap, filemap)
-
+    data = convert(intext, pagemap, filemap)
+    f = codecs.open("out.txt", "w", "utf-8")
+    f.write(data)
+    f.close()