diff MoinMoin/script/migration/_conv160_wiki.py @ 2599:13f0331f3a42

1.6 converter: add content conversion (unfinished), cleanup
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sat, 04 Aug 2007 16:54:35 +0200
parents
children b601db2e4d34
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/script/migration/_conv160_wiki.py	Sat Aug 04 16:54:35 2007 +0200
@@ -0,0 +1,324 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - convert content in wiki markup
+
+    Assuming we have this "renames" map:
+    -------------------------------------------------------
+    'PAGE', 'some_page'        -> 'some page'
+    'FILE', 'with%20blank.txt' -> 'with blank.txt'
+
+    Markup transformations needed:
+    -------------------------------------------------------
+    ["some_page"]           -> ["some page"] # renamed
+    [:some_page:some text]  -> ["some page" some text] # NEW: free link with link text
+    [:page:text]            -> ["page" text] # NEW: free link with link text
+                               (with a page not being renamed)
+
+    attachment:with%20blank.txt -> attachment:"with blank.txt"
+    attachment:some_page/with%20blank.txt -> attachment:"some page/with blank.txt"
+    The attachment processing should also urllib.unquote the filename (or at
+    least replace %20 by space) and put it into "quotes" if it contains spaces.
+
+    @copyright: 2007 MoinMoin:JohannesBerg,
+                2007 MoinMoin:ThomasWaldmann
+    @license: GNU GPL, see COPYING for details.
+"""
+
+import re, codecs
+from MoinMoin import i18n
+i18n.wikiLanguages = lambda : []
+from MoinMoin import config, wikiutil
+from MoinMoin.parser.text_moin_wiki import Parser
+
+class Converter(Parser):
+    def __init__(self, request, raw, renames):
+        self.request = request
+        self.raw = raw
+        self.renames = renames
+        self.in_pre = False
+        self._ = None
+
+    def _replace(self, item_type, item_name):
+        """ replace a item_name if it is in the renames dict """
+        key = (item_type, item_name)
+        try:
+            return self.renames[key]
+        except KeyError:
+            return item_name
+
+    def return_word(self, word):
+        return word
+    _remark_repl = return_word
+    _table_repl = return_word
+    _tableZ_repl = return_word
+    _emph_repl = return_word
+    _emph_ibb_repl = return_word
+    _emph_ibi_repl = return_word
+    _emph_ib_or_bi_repl = return_word
+    _u_repl = return_word
+    _strike_repl = return_word
+    _sup_repl = return_word
+    _sub_repl = return_word
+    _small_repl = return_word
+    _big_repl = return_word
+    _tt_repl = return_word
+    _tt_bt_repl = return_word
+    _notword_repl = return_word
+    _rule_repl = return_word
+    _smiley_repl = return_word
+    _smileyA_repl = return_word
+    _ent_repl = return_word
+    _ent_numeric_repl = return_word
+    _ent_symbolic_repl = return_word
+    _heading_repl = return_word
+    _email_repl = return_word
+    _macro_repl = return_word
+    _interwiki_repl = return_word
+    _word_repl = return_word
+    _indent_repl = return_word
+    _li_none_repl = return_word
+    _li_repl = return_word
+    _ol_repl = return_word
+    _dl_repl = return_word
+    _comment_repl = return_word
+
+    # PRE SECTION HANDLING ---------------------------------------------------
+
+    def _pre_repl(self, word):
+        origw = word
+        word = word.strip()
+        if word == '{{{' and not self.in_pre:
+            self.in_pre = True
+            return origw
+        elif word == '}}}' and self.in_pre:
+            self.in_pre = False
+            return origw
+        return word
+
+    def _parser_repl(self, word):
+        origw = word
+        if word.startswith('{{{'):
+            word = word[3:]
+
+        s_word = word.strip()
+        self.in_pre = True
+        return origw
+
+    # LINKS ------------------------------------------------------------------
+
+    def _replace_target(self, target):
+        target_and_anchor = target.split('#', 1)
+        if len(target_and_anchor) > 1:
+            target, anchor = target_and_anchor
+            target = self._replace('PAGE', target)
+            return '%s#%s' % (target, anchor)
+        else:
+            target = self._replace('PAGE', target)
+            return target
+
+    def interwiki(self, target_and_text, **kw):
+        # TODO: maybe support [wiki:Page http://wherever/image.png] ?
+        scheme, rest = target_and_text.split(':', 1)
+        wikiname, pagename, text = wikiutil.split_wiki(rest)
+        if not text:
+            text = pagename
+        #self.request.log("interwiki: split_wiki -> %s.%s.%s" % (wikiname,pagename,text))
+
+        if wikiname.lower() == 'self': # [wiki:Self:LocalPage text] or [:LocalPage:text]
+            return target_and_text # self._word_repl(pagename, text)
+
+        # check for image URL, and possibly return IMG tag
+        if not kw.get('pretty_url', 0) and wikiutil.isPicture(pagename):
+            dummy, wikiurl, dummy, wikitag_bad = wikiutil.resolve_wiki(self.request, rest)
+            href = wikiutil.join_wiki(wikiurl, pagename)
+            #self.request.log("interwiki: join_wiki -> %s.%s.%s" % (wikiurl,pagename,href))
+            return target_and_text # self.formatter.image(src=href)
+
+        return target_and_text # wikiname, pagename, text
+
+    def attachment(self, target_and_text, **kw):
+        """ This gets called on attachment URLs """
+        _ = self._
+        #self.request.log("attachment: target_and_text %s" % target_and_text)
+        scheme, fname, text = wikiutil.split_wiki(target_and_text)
+        if not text:
+            text = fname
+
+        if scheme == 'drawing':
+            return target_and_text # self.formatter.attachment_drawing(fname, text)
+
+        # check for image, and possibly return IMG tag (images are always inlined)
+        if not kw.get('pretty_url', 0) and wikiutil.isPicture(fname):
+            return target_and_text # self.formatter.attachment_image(fname)
+
+        # inline the attachment
+        if scheme == 'inline':
+            return target_and_text # self.formatter.attachment_inlined(fname, text)
+
+        return target_and_text # self.formatter.attachment_link(fname, text)
+
+
+    def _url_repl(self, word):
+        """Handle literal URLs including inline images."""
+        scheme = word.split(":", 1)[0]
+
+        if scheme == "wiki":
+            return word # self.interwiki(word)
+
+        if scheme in self.attachment_schemas:
+            return self.attachment(word)
+
+        if wikiutil.isPicture(word):
+            # Get image name http://here.com/dir/image.gif -> image
+            name = word.split('/')[-1]
+            name = ''.join(name.split('.')[:-1])
+            return word # self.formatter.image(src=word, alt=name)
+        else:
+            return word # word, scheme
+
+    def _wikiname_bracket_repl(self, text):
+        """Handle special-char wikinames with link text, like:
+           ["Jim O'Brian" Jim's home page] or ['Hello "world"!' a page with doublequotes]i
+        """
+        word = text[1:-1] # strip brackets
+        first_char = word[0]
+        if first_char in wikiutil.QUOTE_CHARS:
+            # split on closing quote
+            target, linktext = word[1:].split(first_char, 1)
+            target = self._replace_target(target)
+            return '[%s%s%s%s]' % (first_char, target, first_char, linktext)
+        else: # not quoted
+            # split on whitespace
+            target, linktext = word.split(None, 1)
+            target = target.replace("_", " ")
+            return '[%s%s%s]' % (target, word[len(target)], linktext)
+
+
+    def _url_bracket_repl(self, word):
+        """Handle bracketed URLs."""
+        word = word[1:-1] # strip brackets
+
+        # Local extended link? [:page name:link text]
+        if word[0] == ':':
+            words = word[1:].split(':', 1)
+            words[0] = self._replace_target(words[0])
+            if len(words) == 1:
+                return '[:%s]' % words[0]
+            return '[:%s:%s]' % (words[0], words[1])
+
+        return '[%s]' % word
+
+
+    # SCANNING ---------------------------------------------------------------
+    def scan(self, scan_re, line):
+        """ Scans one line
+
+        Append text before match, invoke replace() with match, and add text after match.
+        """
+        result = []
+        lastpos = 0
+
+        for match in scan_re.finditer(line):
+            # Add text before the match
+            if lastpos < match.start():
+                result.append(line[lastpos:match.start()])
+            # Replace match with markup
+            result.append(self.replace(match))
+            lastpos = match.end()
+
+        # Add remainder of the line
+        result.append(line[lastpos:])
+        return u''.join(result)
+
+    def replace(self, match):
+        """ Replace match using type name """
+        result = []
+        for _type, hit in match.groupdict().items():
+            if hit is not None and not _type in ["hmarker", ]:
+                # Get replace method and replace hit
+                replace = getattr(self, '_' + _type + '_repl')
+                result.append(replace(hit))
+                return ''.join(result)
+        else:
+            # We should never get here
+            import pprint
+            raise Exception("Can't handle match %r\n%s\n%s" % (
+                match,
+                pprint.pformat(match.groupdict()),
+                pprint.pformat(match.groups()),
+            ))
+
+        return ""
+
+    def convert(self):
+        """ For each line, scan through looking for magic
+            strings, outputting verbatim any intervening text.
+        """
+        # prepare regex patterns
+        rules = self.formatting_rules.replace('\n', '|')
+        if 1: # self.cfg.bang_meta:
+            rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % {
+                'word_rule': self.word_rule,
+                'rules': rules,
+            }
+        pre_rules = self.pre_formatting_rules.replace('\n', '|')
+        scan_re = re.compile(rules, re.UNICODE)
+        pre_scan_re = re.compile(pre_rules, re.UNICODE)
+        eol_re = re.compile(r'\r?\n', re.UNICODE)
+
+        rawtext = self.raw
+
+        # remove last item because it's guaranteed to be empty
+        self.lines = eol_re.split(rawtext)[:-1]
+        self.in_processing_instructions = 1
+
+        # Main loop
+        for line in self.lines:
+            # ignore processing instructions
+            if self.in_processing_instructions:
+                found = False
+                for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
+                           "#pragma", "#form", "#acl", "#language"):
+                    if line.lower().startswith(pi):
+                        self.request.write(line + '\r\n')
+                        found = True
+                        break
+                if not found:
+                    self.in_processing_instructions = 0
+                else:
+                    continue # do not parse this line
+            if self.in_pre:
+                # still looking for processing instructions
+                if self.in_pre == 'search_parser':
+                    if line.strip().startswith("#!"):
+                        self.in_pre = True
+                        self.request.write(line + '\r\n')
+                        continue
+                    else:
+                        self.in_pre = True
+            else:
+                # Paragraph break on empty lines
+                if not line.strip():
+                    self.request.write(line + '\r\n')
+                    continue
+
+            # Scan line, format and write
+            scanning_re = self.in_pre and pre_scan_re or scan_re
+            formatted_line = self.scan(scanning_re, line)
+            self.request.write(formatted_line + '\r\n')
+
+def convert_wiki(intext, renames):
+    """ Convert content written in wiki markup """
+    import StringIO
+    request = StringIO.StringIO()
+    noeol = False
+    if not intext.endswith('\r\n'):
+        intext += '\r\n'
+        noeol = True
+    p = Converter(request, intext, renames)
+    p.convert()
+    res = request.getvalue()
+    if noeol:
+        res = res[:-2]
+    return res
+