MoinMoin/script/migration/_conv160a_wiki.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Wed, 11 Feb 2009 02:34:33 +0100
changeset 4569 3caaa8c74c41
parent 4490 b120d9978144
child 4636 83483f4e26cb
permissions -rw-r--r--
wikiutil: replace moin's cgi/urllib wrappers by calls to werkzeug.utils code
     1 # -*- coding: iso-8859-1 -*-
     2 """
     3     MoinMoin - convert content in 1.6.0alpha (rev 1844: 58ebb64243cc) wiki markup to 1.6.0 style
     4                by using a modified 1.6.0alpha parser as translator.
     5 
     6     PLEASE NOTE: most moin users will never need to execute this code,
     7                  because it is just for users of 1.6.0alpha version,
     8                  that used modified link markup, but was never released.
     9                  The 1.5.x/1.6.x releases use a different link markup than 1.6.0a.
    10 
    11     @copyright: 2007 MoinMoin:JohannesBerg,
    12                 2007-2009 MoinMoin:ThomasWaldmann
    13     @license: GNU GPL, see COPYING for details.
    14 """
    15 
    16 import re
    17 
    18 from MoinMoin import i18n
    19 i18n.wikiLanguages = lambda: {}
    20 
    21 from MoinMoin import config, macro, wikiutil
    22 from MoinMoin.action import AttachFile
    23 from MoinMoin.Page import Page
    24 from MoinMoin.support.python_compatibility import rsplit
    25 
    26 import wikiutil160a
    27 from text_moin160a_wiki import Parser
    28 
    29 QUOTE_CHARS = u"'\""
    30 
    31 def convert_wiki(request, pagename, intext, renames):
    32     """ Convert content written in wiki markup """
    33     noeol = False
    34     if not intext.endswith('\r\n'):
    35         intext += '\r\n'
    36         noeol = True
    37     c = Converter(request, pagename, intext, renames)
    38     result = request.redirectedOutput(c.convert, request)
    39     if noeol and result.endswith('\r\n'):
    40         result = result[:-2]
    41     return result
    42 
    43 
    44 STONEAGE_IMAGELINK = False # True for ImageLink(target,image), False for ImageLink(image,target)
    45 
    46 # copied from moin 1.6.0 macro/ImageLink.py (to be safe in case we remove ImageLink some day)
    47 # ... and slightly modified/refactored for our needs here.
    48 # hint: using parse_quoted_separated from wikiutil does NOT work here, because we do not have
    49 #       quoted urls when they contain a '=' char in the 1.5 data input.
    50 def explore_args(args):
    51     """ explore args for positional and keyword parameters """
    52     if args:
    53         args = args.split(',')
    54         args = [arg.strip() for arg in args]
    55     else:
    56         args = []
    57 
    58     kw_count = 0
    59     kw = {} # keyword args
    60     pp = [] # positional parameters
    61 
    62     kwAllowed = ('width', 'height', 'alt')
    63 
    64     for arg in args:
    65         if '=' in arg:
    66             key, value = arg.split('=', 1)
    67             key_lowerstr = str(key.lower())
    68             # avoid that urls with "=" are interpreted as keyword
    69             if key_lowerstr in kwAllowed:
    70                 kw_count += 1
    71                 kw[key_lowerstr] = value
    72             elif not kw_count and '://' in arg:
    73                 # assuming that this is the image
    74                 pp.append(arg)
    75         else:
    76             pp.append(arg)
    77 
    78     if STONEAGE_IMAGELINK and len(pp) >= 2:
    79         pp[0], pp[1] = pp[1], pp[0]
    80 
    81     return pp, kw
    82 
    83 
    84 class Converter(Parser):
    85     def __init__(self, request, pagename, raw, renames):
    86         self.pagename = pagename
    87         self.raw = raw
    88         self.renames = renames
    89         self.request = request
    90         self._ = None
    91         self.in_pre = 0
    92 
    93         self.formatting_rules = self.formatting_rules % {'macronames': u'|'.join(['ImageLink', ] + macro.getNames(self.request.cfg))}
    94 
    95     # no change
    96     def return_word(self, word):
    97         return word
    98     _emph_repl = return_word
    99     _emph_ibb_repl = return_word
   100     _emph_ibi_repl = return_word
   101     _emph_ib_or_bi_repl = return_word
   102     _u_repl = return_word
   103     _strike_repl = return_word
   104     _sup_repl = return_word
   105     _sub_repl = return_word
   106     _small_repl = return_word
   107     _big_repl = return_word
   108     _tt_repl = return_word
   109     _tt_bt_repl = return_word
   110     _remark_repl = return_word
   111     _table_repl = return_word
   112     _tableZ_repl = return_word
   113     _rule_repl = return_word
   114     _smiley_repl = return_word
   115     _smileyA_repl = return_word
   116     _ent_repl = return_word
   117     _ent_numeric_repl = return_word
   118     _ent_symbolic_repl = return_word
   119     _heading_repl = return_word
   120     _email_repl = return_word
   121     _notword_repl = return_word
   122     _indent_repl = return_word
   123     _li_none_repl = return_word
   124     _li_repl = return_word
   125     _ol_repl = return_word
   126     _dl_repl = return_word
   127     _comment_repl = return_word
   128 
   129     # translate pagenames using pagename translation map
   130 
   131     def _replace(self, key):
   132         """ replace a item_name if it is in the renames dict
   133             key is either a 2-tuple ('PAGE', pagename)
   134             or a 3-tuple ('FILE', pagename, filename)
   135         """
   136         current_page = self.pagename
   137         item_type, page_name, file_name = (key + (None, ))[:3]
   138         abs_page_name = wikiutil.AbsPageName(current_page, page_name)
   139         if item_type == 'PAGE':
   140             key = (item_type, abs_page_name)
   141             new_name = self.renames.get(key)
   142             if new_name is None:
   143                 # we don't have an entry in rename map - apply the same magic
   144                 # to the page name as 1.5 did (" " -> "_") and try again:
   145                 abs_magic_name = abs_page_name.replace(u' ', u'_')
   146                 key = (item_type, abs_magic_name)
   147                 new_name = self.renames.get(key)
   148                 if new_name is None:
   149                     # we didn't find it under the magic name either -
   150                     # that means we do not rename it!
   151                     new_name = page_name
   152             if new_name != page_name and abs_page_name != page_name:
   153                 # we have to fix the (absolute) new_name to be a relative name (as it was before)
   154                 new_name = wikiutil.RelPageName(current_page, new_name)
   155         elif item_type == 'FILE':
   156             key = (item_type, abs_page_name, file_name)
   157             new_name = self.renames.get(key)
   158             if new_name is None:
   159                 # we don't have an entry in rename map - apply the same magic
   160                 # to the page name as 1.5 did (" " -> "_") and try again:
   161                 abs_magic_name = abs_page_name.replace(u' ', u'_')
   162                 key = (item_type, abs_magic_name, file_name)
   163                 new_name = self.renames.get(key)
   164                 if new_name is None:
   165                     # we didn't find it under the magic name either -
   166                     # that means we do not rename it!
   167                     new_name = file_name
   168         return new_name
   169 
   170     def _replace_target(self, target):
   171         target_and_anchor = rsplit(target, '#', 1)
   172         if len(target_and_anchor) > 1:
   173             target, anchor = target_and_anchor
   174             target = self._replace(('PAGE', target))
   175             return '%s#%s' % (target, anchor)
   176         else:
   177             target = self._replace(('PAGE', target))
   178             return target
   179 
   180     # markup conversion
   181 
   182     def _macro_repl(self, word):
   183         # we use [[...]] for links now, macros will be <<...>>
   184         macro_rule = ur"""
   185             \[\[
   186             (?P<macro_name>\w+)
   187             (\((?P<macro_args>.*?)\))?
   188             \]\]
   189         """
   190         word = unicode(word) # XXX why is word not unicode before???
   191         m = re.match(macro_rule, word, re.X|re.U)
   192         macro_name = m.group('macro_name')
   193         macro_args = m.group('macro_args')
   194         if macro_name == 'ImageLink':
   195             fixed, kw = explore_args(macro_args)
   196             #print "macro_args=%r" % macro_args
   197             #print "fixed=%r, kw=%r" % (fixed, kw)
   198             image, target = (fixed + ['', ''])[:2]
   199             if image is None:
   200                 image = ''
   201             if target is None:
   202                 target = ''
   203             if '://' not in image:
   204                 # if it is not a URL, it is meant as attachment
   205                 image = u'attachment:%s' % image
   206             if not target:
   207                 target = image
   208             elif target.startswith('inline:'):
   209                 target = 'attachment:' + target[7:] # we don't support inline:
   210             elif target.startswith('wiki:'):
   211                 target = target[5:] # drop wiki:
   212             image_attrs = []
   213             alt = kw.get('alt') or ''
   214             width = kw.get('width')
   215             if width is not None:
   216                 image_attrs.append(u"width=%s" % width)
   217             height = kw.get('height')
   218             if height is not None:
   219                 image_attrs.append(u"height=%s" % height)
   220             image_attrs = u", ".join(image_attrs)
   221             if image_attrs:
   222                 image_attrs = u'|' + image_attrs
   223             if alt or image_attrs:
   224                 alt = u'|' + alt
   225             result = u'[[%s|{{%s%s%s}}]]' % (target, image, alt, image_attrs)
   226         else:
   227             if macro_args:
   228                 macro_args = u"(%s)" % macro_args
   229             else:
   230                 macro_args = u''
   231             result = u"<<%s%s>>" % (macro_name, macro_args)
   232         # XXX later check whether some to be renamed pagename is used as macro param
   233         return result
   234 
   235     def _word_repl(self, word, text=None):
   236         """Handle WikiNames."""
   237         if not text:
   238             if wikiutil.isStrictWikiname(word):
   239                 return word
   240             else:
   241                 return '[[%s]]' % word
   242         else: # internal use:
   243             return '[[%s|%s]]' % (word, text)
   244 
   245     def _wikiname_bracket_repl(self, text):
   246         """Handle special-char wikinames with link text, like:
   247            ["Jim O'Brian" Jim's home page] or ['Hello "world"!' a page with doublequotes]
   248         """
   249         word = text[1:-1] # strip brackets
   250         first_char = word[0]
   251         if first_char in QUOTE_CHARS:
   252             # split on closing quote
   253             target, linktext = word[1:].split(first_char, 1)
   254         else: # not quoted
   255             # split on whitespace
   256             target, linktext = word.split(None, 1)
   257         if target:
   258             target = self._replace(('PAGE', target))
   259             linktext = linktext.strip()
   260             if linktext and linktext != target:
   261                 return '[[%s|%s]]' % (target, linktext)
   262             else:
   263                 return '[[%s]]' % target
   264         else:
   265             return text
   266 
   267     def _interwiki_repl(self, word):
   268         """Handle InterWiki links."""
   269         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
   270         if wikitag_bad:
   271             return word
   272         else:
   273             return self.interwiki("wiki:" + word)
   274 
   275     def interwiki(self, target_and_text, **kw):
   276         scheme, rest = target_and_text.split(':', 1)
   277         wikiname, pagename, text = wikiutil160a.split_wiki(rest)
   278 
   279         #if (pagename.startswith(wikiutil.CHILD_PREFIX) or # fancy link to subpage [wiki:/SubPage text]
   280         #    Page(self.request, pagename).exists()): # fancy link to local page [wiki:LocalPage text]
   281         #    # XXX OtherWiki:FooPage markup -> checks for local FooPage -sense???
   282         #    pagename = wikiutil.url_unquote(pagename)
   283         #    pagename = self._replace_target(pagename)
   284         #    return '[[%s%s]]' % (pagename, text)
   285 
   286         if wikiname in ('Self', self.request.cfg.interwikiname, ''): # [wiki:Self:LocalPage text] or [:LocalPage:text]
   287             orig_pagename = pagename
   288             pagename = wikiutil.url_unquote(pagename)
   289             pagename = self._replace_target(pagename)
   290             camelcase = wikiutil.isStrictWikiname(pagename)
   291             if camelcase and (not text or text == orig_pagename):
   292                 return pagename # optimize special case
   293             else:
   294                 if text:
   295                     text = '|' + text
   296                 return '[[%s%s]]' % (pagename, text)
   297 
   298         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, wikiname+':')
   299         if wikitag_bad: # likely we got some /InterWiki as wikitail, we don't want that!
   300             pagename = wikiutil.url_unquote(pagename)
   301             pagename = self._replace_target(pagename)
   302             wikitail = pagename
   303         else: # good
   304             wikitail = wikiutil.url_unquote(pagename)
   305 
   306         # link to self?
   307         if wikiutil.isPicture(wikitail):
   308             return '{{%s:%s%s}}' % (wikitag, wikitail, text)
   309         else:
   310             if ' ' not in wikitail and not text:
   311                 return '%s:%s' % (wikitag, wikitail)
   312             else:
   313                 if text:
   314                     text = '|' + text
   315                 return '[[%s:%s%s]]' % (wikitag, wikitail, text)
   316 
   317     def attachment(self, target_and_text, **kw):
   318         """ This gets called on attachment URLs """
   319         _ = self._
   320         scheme, fname, text = wikiutil160a.split_wiki(target_and_text)
   321 
   322         pagename, fname = AttachFile.absoluteName(fname, self.pagename)
   323         from_this_page = pagename == self.pagename
   324         fname = self._replace(('FILE', pagename, fname))
   325         #fname = wikiutil.url_unquote(fname)
   326         #fname = self._replace(('FILE', pagename, fname))
   327         pagename = self._replace(('PAGE', pagename))
   328         if from_this_page:
   329             name = fname
   330         else:
   331             name = "%s/%s" % (pagename, fname)
   332 
   333         fn_txt = name
   334         if text:
   335             fn_txt += '|' + text
   336 
   337         if scheme == 'drawing':
   338             return "{{drawing:%s}}" % fn_txt
   339 
   340         # check for image, and possibly return IMG tag (images are always inlined)
   341         if not kw.get('pretty_url', 0) and wikiutil.isPicture(fname):
   342             return "{{attachment:%s}}" % fn_txt
   343 
   344         # inline the attachment
   345         if scheme == 'inline':
   346             return '{{attachment:%s}}' % fn_txt
   347 
   348         return '[[attachment:%s]]' % fn_txt
   349 
   350     def _url_repl(self, word):
   351         """Handle literal URLs including inline images."""
   352         scheme = word.split(":", 1)[0]
   353 
   354         if scheme == 'wiki':
   355             return self.interwiki(word)
   356         if scheme in self.attachment_schemas:
   357             return '%s' % self.attachment(word)
   358 
   359         if wikiutil.isPicture(word): # magic will go away in 1.6!
   360             return '{{%s}}' % word # new markup for inline images
   361         else:
   362             return word
   363 
   364 
   365     def _url_bracket_repl(self, word):
   366         """Handle bracketed URLs."""
   367         word = word[1:-1] # strip brackets
   368 
   369         # Local extended link? [:page name:link text] XXX DEPRECATED
   370         if word[0] == ':':
   371             words = word[1:].split(':', 1)
   372             link, text = (words + ['', ''])[:2]
   373             if link.strip() == text.strip():
   374                 text = ''
   375             link = self._replace_target(link)
   376             if text:
   377                 text = '|' + text
   378             return '[[%s%s]]' % (link, text)
   379 
   380         scheme_and_rest = word.split(":", 1)
   381         if len(scheme_and_rest) == 1: # no scheme
   382             # Traditional split on space
   383             words = word.split(None, 1)
   384             if words[0].startswith('#'): # anchor link
   385                 link, text = (words + ['', ''])[:2]
   386                 if link.strip() == text.strip():
   387                     text = ''
   388                 if text:
   389                     text = '|' + text
   390                 return '[[%s%s]]' % (link, text)
   391         else:
   392             scheme = scheme_and_rest[0]
   393             if scheme == "wiki":
   394                 return self.interwiki(word, pretty_url=1)
   395             if scheme in self.attachment_schemas:
   396                 m = self.attachment(word)
   397                 if scheme == 'attachment':
   398                     # with url_bracket markup, 1.6.0a parser does not embed pictures, but link!
   399                     return '[[%s]]' % m[2:-2]
   400                 else:
   401                     # drawing and inline
   402                     return m
   403 
   404             words = word.split(None, 1)
   405             if len(words) == 1:
   406                 words = words * 2
   407 
   408         target, text = words
   409         if wikiutil.isPicture(text) and re.match(self.url_rule, text):
   410             return '[[%s|{{%s}}]]' % (target, text)
   411         else:
   412             if target == text:
   413                 return '[[%s]]' % target
   414             else:
   415                 return '[[%s|%s]]' % (target, text)
   416 
   417     def _pre_repl(self, word):
   418         w = word.strip()
   419         if w == '{{{' and not self.in_pre:
   420             self.in_pre = True
   421         elif w == '}}}' and self.in_pre:
   422             self.in_pre = False
   423         return word
   424 
   425     def _processor_repl(self, word):
   426         self.in_pre = True
   427         return word
   428 
   429     def scan(self, scan_re, line):
   430         """ Scans one line - append text before match, invoke replace() with match, and add text after match.  """
   431         result = []
   432         lastpos = 0
   433 
   434         for match in scan_re.finditer(line):
   435             # Add text before the match
   436             if lastpos < match.start():
   437                 result.append(line[lastpos:match.start()])
   438             # Replace match with markup
   439             result.append(self.replace(match))
   440             lastpos = match.end()
   441 
   442         # Add remainder of the line
   443         result.append(line[lastpos:])
   444         return u''.join(result)
   445 
   446 
   447     def replace(self, match):
   448         """ Replace match using type name """
   449         result = []
   450         for _type, hit in match.groupdict().items():
   451             if hit is not None and not _type in ["hmarker", ]:
   452                 # Get replace method and replace hit
   453                 replace = getattr(self, '_' + _type + '_repl')
   454                 # print _type, hit
   455                 result.append(replace(hit))
   456                 return ''.join(result)
   457         else:
   458             # We should never get here
   459             import pprint
   460             raise Exception("Can't handle match %r\n%s\n%s" % (
   461                 match,
   462                 pprint.pformat(match.groupdict()),
   463                 pprint.pformat(match.groups()),
   464             ))
   465 
   466         return ""
   467 
   468     def convert(self, request):
   469         """ For each line, scan through looking for magic
   470             strings, outputting verbatim any intervening text.
   471         """
   472         self.request = request
   473         # prepare regex patterns
   474         rules = self.formatting_rules.replace('\n', '|')
   475         if self.request.cfg.bang_meta:
   476             rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % {
   477                 'word_rule': self.word_rule,
   478                 'rules': rules,
   479             }
   480         pre_rules = r'''(?P<pre>\}\}\})'''
   481         pre_scan_re = re.compile(pre_rules, re.UNICODE)
   482         scan_re = re.compile(rules, re.UNICODE)
   483         eol_re = re.compile(r'\r?\n', re.UNICODE)
   484 
   485         rawtext = self.raw
   486 
   487         # remove last item because it's guaranteed to be empty
   488         self.lines = eol_re.split(rawtext)[:-1]
   489         self.in_processing_instructions = True
   490 
   491         # Main loop
   492         for line in self.lines:
   493             # ignore processing instructions
   494             if self.in_processing_instructions:
   495                 found = False
   496                 for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
   497                            "#pragma", "#form", "#acl", "#language"):
   498                     if line.lower().startswith(pi):
   499                         self.request.write(line + '\r\n')
   500                         found = True
   501                         break
   502                 if not found:
   503                     self.in_processing_instructions = False
   504                 else:
   505                     continue # do not parse this line
   506             if not line.strip():
   507                 self.request.write(line + '\r\n')
   508             else:
   509                 # Scan line, format and write
   510                 scanning_re = self.in_pre and pre_scan_re or scan_re
   511                 formatted_line = self.scan(scanning_re, line)
   512                 self.request.write(formatted_line + '\r\n')
   513