MoinMoin/script/migration/_conv160a_wiki.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Wed, 11 Feb 2009 02:34:33 +0100
changeset 4569 3caaa8c74c41
parent 4490 b120d9978144
child 4636 83483f4e26cb
permissions -rw-r--r--
wikiutil: replace moin's cgi/urllib wrappers by calls to werkzeug.utils code
tw@3925
     1
# -*- coding: iso-8859-1 -*-
tw@3925
     2
"""
tw@3925
     3
    MoinMoin - convert content in 1.6.0alpha (rev 1844: 58ebb64243cc) wiki markup to 1.6.0 style
tw@3925
     4
               by using a modified 1.6.0alpha parser as translator.
tw@3925
     5
tw@4488
     6
    PLEASE NOTE: most moin users will never need to execute this code,
tw@4488
     7
                 because it is just for users of 1.6.0alpha version,
tw@4488
     8
                 that used modified link markup, but was never released.
tw@4488
     9
                 The 1.5.x/1.6.x releases use a different link markup than 1.6.0a.
tw@3925
    10
tw@3925
    11
    @copyright: 2007 MoinMoin:JohannesBerg,
tw@4488
    12
                2007-2009 MoinMoin:ThomasWaldmann
tw@3925
    13
    @license: GNU GPL, see COPYING for details.
tw@3925
    14
"""
tw@3925
    15
tw@3925
    16
import re
tw@3925
    17
tw@3925
    18
from MoinMoin import i18n
tw@3925
    19
i18n.wikiLanguages = lambda: {}
tw@3925
    20
tw@3925
    21
from MoinMoin import config, macro, wikiutil
tw@3925
    22
from MoinMoin.action import AttachFile
tw@3925
    23
from MoinMoin.Page import Page
tw@3925
    24
from MoinMoin.support.python_compatibility import rsplit
tw@3925
    25
tw@3925
    26
import wikiutil160a
tw@3925
    27
from text_moin160a_wiki import Parser
tw@3925
    28
tw@3925
    29
QUOTE_CHARS = u"'\""
tw@3925
    30
tw@3925
    31
def convert_wiki(request, pagename, intext, renames):
tw@3925
    32
    """ Convert content written in wiki markup """
tw@3925
    33
    noeol = False
tw@3925
    34
    if not intext.endswith('\r\n'):
tw@3925
    35
        intext += '\r\n'
tw@3925
    36
        noeol = True
tw@3925
    37
    c = Converter(request, pagename, intext, renames)
tw@3925
    38
    result = request.redirectedOutput(c.convert, request)
tw@3925
    39
    if noeol and result.endswith('\r\n'):
tw@3925
    40
        result = result[:-2]
tw@3925
    41
    return result
tw@3925
    42
tw@3925
    43
tw@3925
    44
STONEAGE_IMAGELINK = False # True for ImageLink(target,image), False for ImageLink(image,target)
tw@3925
    45
tw@3925
    46
# copied from moin 1.6.0 macro/ImageLink.py (to be safe in case we remove ImageLink some day)
tw@3925
    47
# ... and slightly modified/refactored for our needs here.
tw@3925
    48
# hint: using parse_quoted_separated from wikiutil does NOT work here, because we do not have
tw@3925
    49
#       quoted urls when they contain a '=' char in the 1.5 data input.
tw@3925
    50
def explore_args(args):
tw@3925
    51
    """ explore args for positional and keyword parameters """
tw@3925
    52
    if args:
tw@3925
    53
        args = args.split(',')
tw@3925
    54
        args = [arg.strip() for arg in args]
tw@3925
    55
    else:
tw@3925
    56
        args = []
tw@3925
    57
tw@3925
    58
    kw_count = 0
tw@3925
    59
    kw = {} # keyword args
tw@3925
    60
    pp = [] # positional parameters
tw@3925
    61
tw@3925
    62
    kwAllowed = ('width', 'height', 'alt')
tw@3925
    63
tw@3925
    64
    for arg in args:
tw@3925
    65
        if '=' in arg:
tw@3925
    66
            key, value = arg.split('=', 1)
tw@3925
    67
            key_lowerstr = str(key.lower())
tw@3925
    68
            # avoid that urls with "=" are interpreted as keyword
tw@3925
    69
            if key_lowerstr in kwAllowed:
tw@3925
    70
                kw_count += 1
tw@3925
    71
                kw[key_lowerstr] = value
tw@3925
    72
            elif not kw_count and '://' in arg:
tw@3925
    73
                # assuming that this is the image
tw@3925
    74
                pp.append(arg)
tw@3925
    75
        else:
tw@3925
    76
            pp.append(arg)
tw@3925
    77
tw@3925
    78
    if STONEAGE_IMAGELINK and len(pp) >= 2:
tw@3925
    79
        pp[0], pp[1] = pp[1], pp[0]
tw@3925
    80
tw@3925
    81
    return pp, kw
tw@3925
    82
tw@3925
    83
tw@3925
    84
class Converter(Parser):
tw@3925
    85
    def __init__(self, request, pagename, raw, renames):
tw@3925
    86
        self.pagename = pagename
tw@3925
    87
        self.raw = raw
tw@3925
    88
        self.renames = renames
tw@3925
    89
        self.request = request
tw@3925
    90
        self._ = None
tw@3925
    91
        self.in_pre = 0
tw@3925
    92
tw@3925
    93
        self.formatting_rules = self.formatting_rules % {'macronames': u'|'.join(['ImageLink', ] + macro.getNames(self.request.cfg))}
tw@3925
    94
tw@3925
    95
    # no change
tw@3925
    96
    def return_word(self, word):
tw@3925
    97
        return word
tw@3925
    98
    _emph_repl = return_word
tw@3925
    99
    _emph_ibb_repl = return_word
tw@3925
   100
    _emph_ibi_repl = return_word
tw@3925
   101
    _emph_ib_or_bi_repl = return_word
tw@3925
   102
    _u_repl = return_word
tw@3925
   103
    _strike_repl = return_word
tw@3925
   104
    _sup_repl = return_word
tw@3925
   105
    _sub_repl = return_word
tw@3925
   106
    _small_repl = return_word
tw@3925
   107
    _big_repl = return_word
tw@3925
   108
    _tt_repl = return_word
tw@3925
   109
    _tt_bt_repl = return_word
tw@3925
   110
    _remark_repl = return_word
tw@3925
   111
    _table_repl = return_word
tw@3925
   112
    _tableZ_repl = return_word
tw@3925
   113
    _rule_repl = return_word
tw@3925
   114
    _smiley_repl = return_word
tw@3925
   115
    _smileyA_repl = return_word
tw@3925
   116
    _ent_repl = return_word
tw@3925
   117
    _ent_numeric_repl = return_word
tw@3925
   118
    _ent_symbolic_repl = return_word
tw@3925
   119
    _heading_repl = return_word
tw@3925
   120
    _email_repl = return_word
tw@3925
   121
    _notword_repl = return_word
tw@3925
   122
    _indent_repl = return_word
tw@3925
   123
    _li_none_repl = return_word
tw@3925
   124
    _li_repl = return_word
tw@3925
   125
    _ol_repl = return_word
tw@3925
   126
    _dl_repl = return_word
tw@3925
   127
    _comment_repl = return_word
tw@3925
   128
tw@3925
   129
    # translate pagenames using pagename translation map
tw@3925
   130
tw@3925
   131
    def _replace(self, key):
tw@3925
   132
        """ replace a item_name if it is in the renames dict
tw@3925
   133
            key is either a 2-tuple ('PAGE', pagename)
tw@3925
   134
            or a 3-tuple ('FILE', pagename, filename)
tw@3925
   135
        """
tw@3925
   136
        current_page = self.pagename
tw@3925
   137
        item_type, page_name, file_name = (key + (None, ))[:3]
tw@3925
   138
        abs_page_name = wikiutil.AbsPageName(current_page, page_name)
tw@3925
   139
        if item_type == 'PAGE':
tw@3925
   140
            key = (item_type, abs_page_name)
tw@3925
   141
            new_name = self.renames.get(key)
tw@3925
   142
            if new_name is None:
tw@3925
   143
                # we don't have an entry in rename map - apply the same magic
tw@3925
   144
                # to the page name as 1.5 did (" " -> "_") and try again:
tw@3925
   145
                abs_magic_name = abs_page_name.replace(u' ', u'_')
tw@3925
   146
                key = (item_type, abs_magic_name)
tw@3925
   147
                new_name = self.renames.get(key)
tw@3925
   148
                if new_name is None:
tw@3925
   149
                    # we didn't find it under the magic name either -
tw@3925
   150
                    # that means we do not rename it!
tw@3925
   151
                    new_name = page_name
tw@3925
   152
            if new_name != page_name and abs_page_name != page_name:
tw@3925
   153
                # we have to fix the (absolute) new_name to be a relative name (as it was before)
tw@3925
   154
                new_name = wikiutil.RelPageName(current_page, new_name)
tw@3925
   155
        elif item_type == 'FILE':
tw@3925
   156
            key = (item_type, abs_page_name, file_name)
tw@3925
   157
            new_name = self.renames.get(key)
tw@3925
   158
            if new_name is None:
tw@3925
   159
                # we don't have an entry in rename map - apply the same magic
tw@3925
   160
                # to the page name as 1.5 did (" " -> "_") and try again:
tw@3925
   161
                abs_magic_name = abs_page_name.replace(u' ', u'_')
tw@3925
   162
                key = (item_type, abs_magic_name, file_name)
tw@3925
   163
                new_name = self.renames.get(key)
tw@3925
   164
                if new_name is None:
tw@3925
   165
                    # we didn't find it under the magic name either -
tw@3925
   166
                    # that means we do not rename it!
tw@3925
   167
                    new_name = file_name
tw@3925
   168
        return new_name
tw@3925
   169
tw@3925
   170
    def _replace_target(self, target):
tw@3925
   171
        target_and_anchor = rsplit(target, '#', 1)
tw@3925
   172
        if len(target_and_anchor) > 1:
tw@3925
   173
            target, anchor = target_and_anchor
tw@3925
   174
            target = self._replace(('PAGE', target))
tw@3925
   175
            return '%s#%s' % (target, anchor)
tw@3925
   176
        else:
tw@3925
   177
            target = self._replace(('PAGE', target))
tw@3925
   178
            return target
tw@3925
   179
tw@3925
   180
    # markup conversion
tw@3925
   181
tw@3925
   182
    def _macro_repl(self, word):
tw@3925
   183
        # we use [[...]] for links now, macros will be <<...>>
tw@3925
   184
        macro_rule = ur"""
tw@3925
   185
            \[\[
tw@3925
   186
            (?P<macro_name>\w+)
tw@3925
   187
            (\((?P<macro_args>.*?)\))?
tw@3925
   188
            \]\]
tw@3925
   189
        """
tw@3925
   190
        word = unicode(word) # XXX why is word not unicode before???
tw@3925
   191
        m = re.match(macro_rule, word, re.X|re.U)
tw@3925
   192
        macro_name = m.group('macro_name')
tw@3925
   193
        macro_args = m.group('macro_args')
tw@3925
   194
        if macro_name == 'ImageLink':
tw@3925
   195
            fixed, kw = explore_args(macro_args)
tw@3925
   196
            #print "macro_args=%r" % macro_args
tw@3925
   197
            #print "fixed=%r, kw=%r" % (fixed, kw)
tw@3925
   198
            image, target = (fixed + ['', ''])[:2]
tw@3925
   199
            if image is None:
tw@3925
   200
                image = ''
tw@3925
   201
            if target is None:
tw@3925
   202
                target = ''
tw@3925
   203
            if '://' not in image:
tw@3925
   204
                # if it is not a URL, it is meant as attachment
tw@3925
   205
                image = u'attachment:%s' % image
tw@3925
   206
            if not target:
tw@3925
   207
                target = image
tw@3925
   208
            elif target.startswith('inline:'):
tw@3925
   209
                target = 'attachment:' + target[7:] # we don't support inline:
tw@3925
   210
            elif target.startswith('wiki:'):
tw@3925
   211
                target = target[5:] # drop wiki:
tw@3925
   212
            image_attrs = []
tw@3925
   213
            alt = kw.get('alt') or ''
tw@3925
   214
            width = kw.get('width')
tw@3925
   215
            if width is not None:
tw@3925
   216
                image_attrs.append(u"width=%s" % width)
tw@3925
   217
            height = kw.get('height')
tw@3925
   218
            if height is not None:
tw@3925
   219
                image_attrs.append(u"height=%s" % height)
tw@3925
   220
            image_attrs = u", ".join(image_attrs)
tw@3925
   221
            if image_attrs:
tw@3925
   222
                image_attrs = u'|' + image_attrs
tw@3925
   223
            if alt or image_attrs:
tw@3925
   224
                alt = u'|' + alt
tw@3925
   225
            result = u'[[%s|{{%s%s%s}}]]' % (target, image, alt, image_attrs)
tw@3925
   226
        else:
tw@3925
   227
            if macro_args:
tw@3925
   228
                macro_args = u"(%s)" % macro_args
tw@3925
   229
            else:
tw@3925
   230
                macro_args = u''
tw@3925
   231
            result = u"<<%s%s>>" % (macro_name, macro_args)
tw@3925
   232
        # XXX later check whether some to be renamed pagename is used as macro param
tw@3925
   233
        return result
tw@3925
   234
tw@3925
   235
    def _word_repl(self, word, text=None):
tw@3925
   236
        """Handle WikiNames."""
tw@3925
   237
        if not text:
tw@3925
   238
            if wikiutil.isStrictWikiname(word):
tw@3925
   239
                return word
tw@3925
   240
            else:
tw@3925
   241
                return '[[%s]]' % word
tw@3925
   242
        else: # internal use:
tw@3925
   243
            return '[[%s|%s]]' % (word, text)
tw@3925
   244
tw@3925
   245
    def _wikiname_bracket_repl(self, text):
tw@3925
   246
        """Handle special-char wikinames with link text, like:
tw@3925
   247
           ["Jim O'Brian" Jim's home page] or ['Hello "world"!' a page with doublequotes]
tw@3925
   248
        """
tw@3925
   249
        word = text[1:-1] # strip brackets
tw@3925
   250
        first_char = word[0]
tw@3925
   251
        if first_char in QUOTE_CHARS:
tw@3925
   252
            # split on closing quote
tw@3925
   253
            target, linktext = word[1:].split(first_char, 1)
tw@3925
   254
        else: # not quoted
tw@3925
   255
            # split on whitespace
tw@3925
   256
            target, linktext = word.split(None, 1)
tw@3925
   257
        if target:
tw@3925
   258
            target = self._replace(('PAGE', target))
tw@3925
   259
            linktext = linktext.strip()
tw@3925
   260
            if linktext and linktext != target:
tw@3925
   261
                return '[[%s|%s]]' % (target, linktext)
tw@3925
   262
            else:
tw@3925
   263
                return '[[%s]]' % target
tw@3925
   264
        else:
tw@3925
   265
            return text
tw@3925
   266
tw@3925
   267
    def _interwiki_repl(self, word):
tw@3925
   268
        """Handle InterWiki links."""
tw@3925
   269
        wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
tw@3925
   270
        if wikitag_bad:
tw@3925
   271
            return word
tw@3925
   272
        else:
tw@3925
   273
            return self.interwiki("wiki:" + word)
tw@3925
   274
tw@3925
   275
    def interwiki(self, target_and_text, **kw):
tw@3925
   276
        scheme, rest = target_and_text.split(':', 1)
tw@3925
   277
        wikiname, pagename, text = wikiutil160a.split_wiki(rest)
tw@3925
   278
tw@4489
   279
        #if (pagename.startswith(wikiutil.CHILD_PREFIX) or # fancy link to subpage [wiki:/SubPage text]
tw@4489
   280
        #    Page(self.request, pagename).exists()): # fancy link to local page [wiki:LocalPage text]
tw@4489
   281
        #    # XXX OtherWiki:FooPage markup -> checks for local FooPage -sense???
tw@4489
   282
        #    pagename = wikiutil.url_unquote(pagename)
tw@4489
   283
        #    pagename = self._replace_target(pagename)
tw@4489
   284
        #    return '[[%s%s]]' % (pagename, text)
tw@3925
   285
tw@3925
   286
        if wikiname in ('Self', self.request.cfg.interwikiname, ''): # [wiki:Self:LocalPage text] or [:LocalPage:text]
tw@4489
   287
            orig_pagename = pagename
tw@3925
   288
            pagename = wikiutil.url_unquote(pagename)
tw@3925
   289
            pagename = self._replace_target(pagename)
tw@3925
   290
            camelcase = wikiutil.isStrictWikiname(pagename)
tw@4489
   291
            if camelcase and (not text or text == orig_pagename):
tw@4489
   292
                return pagename # optimize special case
tw@3925
   293
            else:
tw@4489
   294
                if text:
tw@4489
   295
                    text = '|' + text
tw@3925
   296
                return '[[%s%s]]' % (pagename, text)
tw@3925
   297
tw@3925
   298
        wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, wikiname+':')
tw@3925
   299
        if wikitag_bad: # likely we got some /InterWiki as wikitail, we don't want that!
tw@3925
   300
            pagename = wikiutil.url_unquote(pagename)
tw@3925
   301
            pagename = self._replace_target(pagename)
tw@3925
   302
            wikitail = pagename
tw@3925
   303
        else: # good
tw@3925
   304
            wikitail = wikiutil.url_unquote(pagename)
tw@3925
   305
tw@3925
   306
        # link to self?
tw@3925
   307
        if wikiutil.isPicture(wikitail):
tw@3925
   308
            return '{{%s:%s%s}}' % (wikitag, wikitail, text)
tw@3925
   309
        else:
tw@3925
   310
            if ' ' not in wikitail and not text:
tw@3925
   311
                return '%s:%s' % (wikitag, wikitail)
tw@3925
   312
            else:
tw@4489
   313
                if text:
tw@4489
   314
                    text = '|' + text
tw@3925
   315
                return '[[%s:%s%s]]' % (wikitag, wikitail, text)
tw@3925
   316
tw@4488
   317
    def attachment(self, target_and_text, **kw):
tw@4488
   318
        """ This gets called on attachment URLs """
tw@4488
   319
        _ = self._
tw@4488
   320
        scheme, fname, text = wikiutil160a.split_wiki(target_and_text)
tw@3925
   321
tw@3925
   322
        pagename, fname = AttachFile.absoluteName(fname, self.pagename)
tw@3925
   323
        from_this_page = pagename == self.pagename
tw@3925
   324
        fname = self._replace(('FILE', pagename, fname))
tw@4569
   325
        #fname = wikiutil.url_unquote(fname)
tw@4488
   326
        #fname = self._replace(('FILE', pagename, fname))
tw@3925
   327
        pagename = self._replace(('PAGE', pagename))
tw@3925
   328
        if from_this_page:
tw@3925
   329
            name = fname
tw@3925
   330
        else:
tw@3925
   331
            name = "%s/%s" % (pagename, fname)
tw@3925
   332
tw@4488
   333
        fn_txt = name
tw@3925
   334
        if text:
tw@3925
   335
            fn_txt += '|' + text
tw@3925
   336
tw@3925
   337
        if scheme == 'drawing':
tw@3925
   338
            return "{{drawing:%s}}" % fn_txt
tw@3925
   339
tw@3925
   340
        # check for image, and possibly return IMG tag (images are always inlined)
tw@3925
   341
        if not kw.get('pretty_url', 0) and wikiutil.isPicture(fname):
tw@3925
   342
            return "{{attachment:%s}}" % fn_txt
tw@3925
   343
tw@3925
   344
        # inline the attachment
tw@3925
   345
        if scheme == 'inline':
tw@3925
   346
            return '{{attachment:%s}}' % fn_txt
tw@3925
   347
tw@3925
   348
        return '[[attachment:%s]]' % fn_txt
tw@3925
   349
tw@3925
   350
    def _url_repl(self, word):
tw@3925
   351
        """Handle literal URLs including inline images."""
tw@3925
   352
        scheme = word.split(":", 1)[0]
tw@3925
   353
tw@3925
   354
        if scheme == 'wiki':
tw@3925
   355
            return self.interwiki(word)
tw@3925
   356
        if scheme in self.attachment_schemas:
tw@3925
   357
            return '%s' % self.attachment(word)
tw@3925
   358
tw@3925
   359
        if wikiutil.isPicture(word): # magic will go away in 1.6!
tw@3925
   360
            return '{{%s}}' % word # new markup for inline images
tw@3925
   361
        else:
tw@3925
   362
            return word
tw@3925
   363
tw@3925
   364
tw@3925
   365
    def _url_bracket_repl(self, word):
tw@3925
   366
        """Handle bracketed URLs."""
tw@3925
   367
        word = word[1:-1] # strip brackets
tw@3925
   368
tw@3925
   369
        # Local extended link? [:page name:link text] XXX DEPRECATED
tw@3925
   370
        if word[0] == ':':
tw@3925
   371
            words = word[1:].split(':', 1)
tw@4488
   372
            link, text = (words + ['', ''])[:2]
tw@4488
   373
            if link.strip() == text.strip():
tw@4488
   374
                text = ''
tw@4488
   375
            link = self._replace_target(link)
tw@4488
   376
            if text:
tw@4488
   377
                text = '|' + text
tw@4488
   378
            return '[[%s%s]]' % (link, text)
tw@3925
   379
tw@3925
   380
        scheme_and_rest = word.split(":", 1)
tw@3925
   381
        if len(scheme_and_rest) == 1: # no scheme
tw@3925
   382
            # Traditional split on space
tw@3925
   383
            words = word.split(None, 1)
tw@3925
   384
            if words[0].startswith('#'): # anchor link
tw@4488
   385
                link, text = (words + ['', ''])[:2]
tw@4488
   386
                if link.strip() == text.strip():
tw@4488
   387
                    text = ''
tw@4488
   388
                if text:
tw@4488
   389
                    text = '|' + text
tw@4488
   390
                return '[[%s%s]]' % (link, text)
tw@3925
   391
        else:
tw@4488
   392
            scheme = scheme_and_rest[0]
tw@3925
   393
            if scheme == "wiki":
tw@3925
   394
                return self.interwiki(word, pretty_url=1)
tw@3925
   395
            if scheme in self.attachment_schemas:
tw@4488
   396
                m = self.attachment(word)
tw@4490
   397
                if scheme == 'attachment':
tw@4490
   398
                    # with url_bracket markup, 1.6.0a parser does not embed pictures, but link!
tw@4490
   399
                    return '[[%s]]' % m[2:-2]
tw@4490
   400
                else:
tw@4490
   401
                    # drawing and inline
tw@4490
   402
                    return m
tw@3925
   403
tw@3925
   404
            words = word.split(None, 1)
tw@3925
   405
            if len(words) == 1:
tw@3925
   406
                words = words * 2
tw@3925
   407
tw@3925
   408
        target, text = words
tw@3925
   409
        if wikiutil.isPicture(text) and re.match(self.url_rule, text):
tw@3925
   410
            return '[[%s|{{%s}}]]' % (target, text)
tw@3925
   411
        else:
tw@3925
   412
            if target == text:
tw@3925
   413
                return '[[%s]]' % target
tw@3925
   414
            else:
tw@3925
   415
                return '[[%s|%s]]' % (target, text)
tw@3925
   416
tw@3925
   417
    def _pre_repl(self, word):
tw@3925
   418
        w = word.strip()
tw@3925
   419
        if w == '{{{' and not self.in_pre:
tw@3925
   420
            self.in_pre = True
tw@3925
   421
        elif w == '}}}' and self.in_pre:
tw@3925
   422
            self.in_pre = False
tw@3925
   423
        return word
tw@3925
   424
tw@3925
   425
    def _processor_repl(self, word):
tw@3925
   426
        self.in_pre = True
tw@3925
   427
        return word
tw@3925
   428
tw@3925
   429
    def scan(self, scan_re, line):
tw@3925
   430
        """ Scans one line - append text before match, invoke replace() with match, and add text after match.  """
tw@3925
   431
        result = []
tw@3925
   432
        lastpos = 0
tw@3925
   433
tw@3925
   434
        for match in scan_re.finditer(line):
tw@3925
   435
            # Add text before the match
tw@3925
   436
            if lastpos < match.start():
tw@3925
   437
                result.append(line[lastpos:match.start()])
tw@3925
   438
            # Replace match with markup
tw@3925
   439
            result.append(self.replace(match))
tw@3925
   440
            lastpos = match.end()
tw@3925
   441
tw@3925
   442
        # Add remainder of the line
tw@3925
   443
        result.append(line[lastpos:])
tw@3925
   444
        return u''.join(result)
tw@3925
   445
tw@3925
   446
tw@3925
   447
    def replace(self, match):
tw@3925
   448
        """ Replace match using type name """
tw@3925
   449
        result = []
tw@3925
   450
        for _type, hit in match.groupdict().items():
tw@3925
   451
            if hit is not None and not _type in ["hmarker", ]:
tw@3925
   452
                # Get replace method and replace hit
tw@3925
   453
                replace = getattr(self, '_' + _type + '_repl')
tw@3925
   454
                # print _type, hit
tw@3925
   455
                result.append(replace(hit))
tw@3925
   456
                return ''.join(result)
tw@3925
   457
        else:
tw@3925
   458
            # We should never get here
tw@3925
   459
            import pprint
tw@3925
   460
            raise Exception("Can't handle match %r\n%s\n%s" % (
tw@3925
   461
                match,
tw@3925
   462
                pprint.pformat(match.groupdict()),
tw@3925
   463
                pprint.pformat(match.groups()),
tw@3925
   464
            ))
tw@3925
   465
tw@3925
   466
        return ""
tw@3925
   467
tw@3925
   468
    def convert(self, request):
tw@3925
   469
        """ For each line, scan through looking for magic
tw@3925
   470
            strings, outputting verbatim any intervening text.
tw@3925
   471
        """
tw@3925
   472
        self.request = request
tw@3925
   473
        # prepare regex patterns
tw@3925
   474
        rules = self.formatting_rules.replace('\n', '|')
tw@3925
   475
        if self.request.cfg.bang_meta:
tw@3925
   476
            rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % {
tw@3925
   477
                'word_rule': self.word_rule,
tw@3925
   478
                'rules': rules,
tw@3925
   479
            }
tw@3925
   480
        pre_rules = r'''(?P<pre>\}\}\})'''
tw@3925
   481
        pre_scan_re = re.compile(pre_rules, re.UNICODE)
tw@3925
   482
        scan_re = re.compile(rules, re.UNICODE)
tw@3925
   483
        eol_re = re.compile(r'\r?\n', re.UNICODE)
tw@3925
   484
tw@3925
   485
        rawtext = self.raw
tw@3925
   486
tw@3925
   487
        # remove last item because it's guaranteed to be empty
tw@3925
   488
        self.lines = eol_re.split(rawtext)[:-1]
tw@3925
   489
        self.in_processing_instructions = True
tw@3925
   490
tw@3925
   491
        # Main loop
tw@3925
   492
        for line in self.lines:
tw@3925
   493
            # ignore processing instructions
tw@3925
   494
            if self.in_processing_instructions:
tw@3925
   495
                found = False
tw@3925
   496
                for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
tw@3925
   497
                           "#pragma", "#form", "#acl", "#language"):
tw@3925
   498
                    if line.lower().startswith(pi):
tw@3925
   499
                        self.request.write(line + '\r\n')
tw@3925
   500
                        found = True
tw@3925
   501
                        break
tw@3925
   502
                if not found:
tw@3925
   503
                    self.in_processing_instructions = False
tw@3925
   504
                else:
tw@3925
   505
                    continue # do not parse this line
tw@3925
   506
            if not line.strip():
tw@3925
   507
                self.request.write(line + '\r\n')
tw@3925
   508
            else:
tw@3925
   509
                # Scan line, format and write
tw@3925
   510
                scanning_re = self.in_pre and pre_scan_re or scan_re
tw@3925
   511
                formatted_line = self.scan(scanning_re, line)
tw@3925
   512
                self.request.write(formatted_line + '\r\n')
tw@3925
   513