MoinMoin/script/migration/_conv160_wiki.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Wed, 11 Feb 2009 02:34:33 +0100
changeset 4569 3caaa8c74c41
parent 3119 2a380f99afa8
child 5067 10965bc1ee3c
permissions -rw-r--r--
wikiutil: replace moin's cgi/urllib wrappers by calls to werkzeug.utils code
tw@2599
     1
# -*- coding: iso-8859-1 -*-
tw@2599
     2
"""
tw@2730
     3
    MoinMoin - convert content in 1.5.8 wiki markup to 1.6.0 style
tw@2730
     4
               by using a modified 1.5.8 parser as translator.
tw@2599
     5
tw@2599
     6
    Assuming we have this "renames" map:
tw@2599
     7
    -------------------------------------------------------
tw@2599
     8
    'PAGE', 'some_page'        -> 'some page'
tw@2599
     9
    'FILE', 'with%20blank.txt' -> 'with blank.txt'
tw@2599
    10
tw@2599
    11
    Markup transformations needed:
tw@2599
    12
    -------------------------------------------------------
tw@2726
    13
    ["some_page"]           -> [[some page]] # renamed
tw@2729
    14
    [:some_page:some text]  -> [[some page|some text]]
tw@2729
    15
    [:page:text]            -> [[page|text]]
tw@2599
    16
                               (with a page not being renamed)
tw@2599
    17
tw@2726
    18
    attachment:with%20blank.txt -> [[attachment:with blank.txt]]
tw@2726
    19
    attachment:some_page/with%20blank.txt -> [[attachment:some page/with blank.txt]]
tw@2599
    20
    The attachment processing should also urllib.unquote the filename (or at
tw@2599
    21
    least replace %20 by space) and put it into "quotes" if it contains spaces.
tw@2599
    22
tw@2599
    23
    @copyright: 2007 MoinMoin:JohannesBerg,
tw@2599
    24
                2007 MoinMoin:ThomasWaldmann
tw@2599
    25
    @license: GNU GPL, see COPYING for details.
tw@2599
    26
"""
tw@2599
    27
tw@2730
    28
import re
tw@2730
    29
tw@2599
    30
from MoinMoin import i18n
tw@2730
    31
i18n.wikiLanguages = lambda: {}
tw@2730
    32
tw@2730
    33
from MoinMoin import config, wikiutil, macro
tw@2602
    34
from MoinMoin.action import AttachFile
tw@2730
    35
from MoinMoin.Page import Page
tw@2761
    36
from MoinMoin.support.python_compatibility import rsplit
tw@2730
    37
tw@2730
    38
from text_moin158_wiki import Parser
tw@2730
    39
tw@2730
    40
def convert_wiki(request, pagename, intext, renames):
tw@2730
    41
    """ Convert content written in wiki markup """
tw@2730
    42
    noeol = False
tw@2730
    43
    if not intext.endswith('\r\n'):
tw@2730
    44
        intext += '\r\n'
tw@2730
    45
        noeol = True
tw@2730
    46
    c = Converter(request, pagename, intext, renames)
tw@2730
    47
    result = request.redirectedOutput(c.convert, request)
tw@2730
    48
    if noeol and result.endswith('\r\n'):
tw@2730
    49
        result = result[:-2]
tw@2730
    50
    return result
tw@2730
    51
tw@2599
    52
tw@3119
    53
STONEAGE_IMAGELINK = False # True for ImageLink(target,image), False for ImageLink(image,target)
tw@3119
    54
tw@3119
    55
# copied from moin 1.6.0 macro/ImageLink.py (to be safe in case we remove ImageLink some day)
tw@3119
    56
# ... and slightly modified/refactored for our needs here.
tw@3119
    57
# hint: using parse_quoted_separated from wikiutil does NOT work here, because we do not have
tw@3119
    58
#       quoted urls when they contain a '=' char in the 1.5 data input.
tw@3119
    59
def explore_args(args):
tw@3119
    60
    """ explore args for positional and keyword parameters """
tw@3119
    61
    if args:
tw@3119
    62
        args = args.split(',')
tw@3119
    63
        args = [arg.strip() for arg in args]
tw@3119
    64
    else:
tw@3119
    65
        args = []
tw@3119
    66
tw@3119
    67
    kw_count = 0
tw@3119
    68
    kw = {} # keyword args
tw@3119
    69
    pp = [] # positional parameters
tw@3119
    70
tw@3119
    71
    kwAllowed = ('width', 'height', 'alt')
tw@3119
    72
tw@3119
    73
    for arg in args:
tw@3119
    74
        if '=' in arg:
tw@3119
    75
            key, value = arg.split('=', 1)
tw@3119
    76
            key_lowerstr = str(key.lower())
tw@3119
    77
            # avoid that urls with "=" are interpreted as keyword
tw@3119
    78
            if key_lowerstr in kwAllowed:
tw@3119
    79
                kw_count += 1
tw@3119
    80
                kw[key_lowerstr] = value
tw@3119
    81
            elif not kw_count and '://' in arg:
tw@3119
    82
                # assuming that this is the image
tw@3119
    83
                pp.append(arg)
tw@3119
    84
        else:
tw@3119
    85
            pp.append(arg)
tw@3119
    86
tw@3119
    87
    if STONEAGE_IMAGELINK and len(pp) >= 2:
tw@3119
    88
        pp[0], pp[1] = pp[1], pp[0]
tw@3119
    89
tw@3119
    90
    return pp, kw
tw@3119
    91
tw@3119
    92
tw@2599
    93
class Converter(Parser):
tw@2730
    94
    def __init__(self, request, pagename, raw, renames):
tw@2602
    95
        self.pagename = pagename
tw@2599
    96
        self.raw = raw
tw@2599
    97
        self.renames = renames
tw@2730
    98
        self.request = request
tw@2599
    99
        self._ = None
tw@2730
   100
        self.in_pre = 0
tw@2730
   101
tw@3119
   102
        self.formatting_rules = self.formatting_rules % {'macronames': u'|'.join(['ImageLink', ] + macro.getNames(self.request.cfg))}
tw@2730
   103
tw@2730
   104
    # no change
tw@2730
   105
    def return_word(self, word):
tw@2730
   106
        return word
tw@2730
   107
    _emph_repl = return_word
tw@2730
   108
    _emph_ibb_repl = return_word
tw@2730
   109
    _emph_ibi_repl = return_word
tw@2730
   110
    _emph_ib_or_bi_repl = return_word
tw@2730
   111
    _u_repl = return_word
tw@2730
   112
    _strike_repl = return_word
tw@2730
   113
    _sup_repl = return_word
tw@2730
   114
    _sub_repl = return_word
tw@2730
   115
    _small_repl = return_word
tw@2730
   116
    _big_repl = return_word
tw@2730
   117
    _tt_repl = return_word
tw@2730
   118
    _tt_bt_repl = return_word
tw@2730
   119
    _remark_repl = return_word
tw@2730
   120
    _table_repl = return_word
tw@2730
   121
    _tableZ_repl = return_word
tw@2730
   122
    _rule_repl = return_word
tw@2730
   123
    _smiley_repl = return_word
tw@2730
   124
    _smileyA_repl = return_word
tw@2730
   125
    _ent_repl = return_word
tw@2730
   126
    _ent_numeric_repl = return_word
tw@2730
   127
    _ent_symbolic_repl = return_word
tw@2730
   128
    _heading_repl = return_word
tw@2730
   129
    _email_repl = return_word
tw@2730
   130
    _notword_repl = return_word
tw@2730
   131
    _indent_repl = return_word
tw@2730
   132
    _li_none_repl = return_word
tw@2730
   133
    _li_repl = return_word
tw@2730
   134
    _ol_repl = return_word
tw@2730
   135
    _dl_repl = return_word
tw@2730
   136
    _comment_repl = return_word
tw@2730
   137
tw@2730
   138
    # translate pagenames using pagename translation map
tw@2599
   139
tw@2602
   140
    def _replace(self, key):
tw@2611
   141
        """ replace a item_name if it is in the renames dict
tw@2611
   142
            key is either a 2-tuple ('PAGE', pagename)
tw@2611
   143
            or a 3-tuple ('FILE', pagename, filename)
tw@2611
   144
        """
tw@2611
   145
        current_page = self.pagename
tw@2611
   146
        item_type, page_name, file_name = (key + (None, ))[:3]
tw@2706
   147
        abs_page_name = wikiutil.AbsPageName(current_page, page_name)
tw@2611
   148
        if item_type == 'PAGE':
tw@2611
   149
            key = (item_type, abs_page_name)
tw@2752
   150
            new_name = self.renames.get(key)
tw@2752
   151
            if new_name is None:
tw@2752
   152
                # we don't have an entry in rename map - apply the same magic
tw@2752
   153
                # to the page name as 1.5 did (" " -> "_") and try again:
tw@2752
   154
                abs_magic_name = abs_page_name.replace(u' ', u'_')
tw@2752
   155
                key = (item_type, abs_magic_name)
tw@2752
   156
                new_name = self.renames.get(key)
tw@2752
   157
                if new_name is None:
tw@2752
   158
                    # we didn't find it under the magic name either -
tw@2752
   159
                    # that means we do not rename it!
tw@2752
   160
                    new_name = page_name
tw@2752
   161
            if new_name != page_name and abs_page_name != page_name:
tw@2752
   162
                # we have to fix the (absolute) new_name to be a relative name (as it was before)
tw@2752
   163
                new_name = wikiutil.RelPageName(current_page, new_name)
tw@2611
   164
        elif item_type == 'FILE':
tw@2611
   165
            key = (item_type, abs_page_name, file_name)
tw@2752
   166
            new_name = self.renames.get(key)
tw@2752
   167
            if new_name is None:
tw@2752
   168
                # we don't have an entry in rename map - apply the same magic
tw@2752
   169
                # to the page name as 1.5 did (" " -> "_") and try again:
tw@2752
   170
                abs_magic_name = abs_page_name.replace(u' ', u'_')
tw@2752
   171
                key = (item_type, abs_magic_name, file_name)
tw@2752
   172
                new_name = self.renames.get(key)
tw@2752
   173
                if new_name is None:
tw@2752
   174
                    # we didn't find it under the magic name either -
tw@2752
   175
                    # that means we do not rename it!
tw@2752
   176
                    new_name = file_name
tw@2611
   177
        return new_name
tw@2599
   178
tw@2599
   179
    def _replace_target(self, target):
tw@2761
   180
        target_and_anchor = rsplit(target, '#', 1)
tw@2599
   181
        if len(target_and_anchor) > 1:
tw@2599
   182
            target, anchor = target_and_anchor
tw@2602
   183
            target = self._replace(('PAGE', target))
tw@2599
   184
            return '%s#%s' % (target, anchor)
tw@2599
   185
        else:
tw@2602
   186
            target = self._replace(('PAGE', target))
tw@2599
   187
            return target
tw@2599
   188
tw@2730
   189
    # markup conversion
tw@2730
   190
tw@2730
   191
    def _macro_repl(self, word):
tw@2730
   192
        # we use [[...]] for links now, macros will be <<...>>
tw@2814
   193
        macro_rule = ur"""
tw@2814
   194
            \[\[
tw@2814
   195
            (?P<macro_name>\w+)
tw@2814
   196
            (\((?P<macro_args>.*?)\))?
tw@2814
   197
            \]\]
tw@2814
   198
        """
tw@2814
   199
        word = unicode(word) # XXX why is word not unicode before???
tw@2814
   200
        m = re.match(macro_rule, word, re.X|re.U)
tw@2814
   201
        macro_name = m.group('macro_name')
tw@2814
   202
        macro_args = m.group('macro_args')
tw@2814
   203
        if macro_name == 'ImageLink':
tw@3119
   204
            fixed, kw = explore_args(macro_args)
tw@3018
   205
            #print "macro_args=%r" % macro_args
tw@3119
   206
            #print "fixed=%r, kw=%r" % (fixed, kw)
tw@2814
   207
            image, target = (fixed + ['', ''])[:2]
tw@3018
   208
            if image is None:
tw@3018
   209
                image = ''
tw@3018
   210
            if target is None:
tw@3018
   211
                target = ''
tw@2814
   212
            if '://' not in image:
tw@2814
   213
                # if it is not a URL, it is meant as attachment
tw@2814
   214
                image = u'attachment:%s' % image
tw@2814
   215
            if not target:
tw@2814
   216
                target = image
tw@2833
   217
            elif target.startswith('inline:'):
tw@2833
   218
                target = 'attachment:' + target[7:] # we don't support inline:
tw@3119
   219
            elif target.startswith('wiki:'):
tw@3119
   220
                target = target[5:] # drop wiki:
tw@2814
   221
            image_attrs = []
tw@2814
   222
            alt = kw.get('alt') or ''
tw@2814
   223
            width = kw.get('width')
tw@2814
   224
            if width is not None:
tw@2814
   225
                image_attrs.append(u"width=%s" % width)
tw@2814
   226
            height = kw.get('height')
tw@2814
   227
            if height is not None:
tw@2814
   228
                image_attrs.append(u"height=%s" % height)
tw@2814
   229
            image_attrs = u", ".join(image_attrs)
tw@2814
   230
            if image_attrs:
tw@2814
   231
                image_attrs = u'|' + image_attrs
tw@2814
   232
            if alt or image_attrs:
tw@2814
   233
                alt = u'|' + alt
tw@2814
   234
            result = u'[[%s|{{%s%s%s}}]]' % (target, image, alt, image_attrs)
tw@2814
   235
        else:
tw@2814
   236
            if macro_args:
tw@2814
   237
                macro_args = u"(%s)" % macro_args
tw@2814
   238
            else:
tw@2814
   239
                macro_args = u''
tw@2814
   240
            result = u"<<%s%s>>" % (macro_name, macro_args)
tw@2730
   241
        # XXX later check whether some to be renamed pagename is used as macro param
tw@2814
   242
        return result
tw@2730
   243
tw@2730
   244
    def _word_repl(self, word, text=None):
tw@2730
   245
        """Handle WikiNames."""
tw@2730
   246
        if not text:
tw@2730
   247
            return word
tw@2730
   248
        else: # internal use:
tw@2730
   249
            return '[[%s|%s]]' % (word, text)
tw@2730
   250
tw@2730
   251
    def _wikiname_bracket_repl(self, word):
tw@2730
   252
        """Handle special-char wikinames."""
tw@2730
   253
        pagename = word[2:-2]
tw@2730
   254
        if pagename:
tw@2603
   255
            pagename = self._replace(('PAGE', pagename))
tw@2730
   256
            return '[[%s]]' % pagename
tw@2602
   257
        else:
tw@2730
   258
            return word
tw@2599
   259
tw@2603
   260
    def _interwiki_repl(self, word):
tw@2603
   261
        """Handle InterWiki links."""
tw@2604
   262
        wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
tw@2604
   263
        if wikitag_bad:
tw@2604
   264
            return word
tw@2604
   265
        else:
tw@2730
   266
            wikiname, pagename = word.split(':', 1)
tw@2730
   267
            pagename = wikiutil.url_unquote(pagename) # maybe someone has used %20 for blanks in pagename
tw@2730
   268
            camelcase = wikiutil.isStrictWikiname(pagename)
tw@2730
   269
            if wikiname in ('Self', self.request.cfg.interwikiname):
tw@2730
   270
                pagename = self._replace(('PAGE', pagename))
tw@2730
   271
                if camelcase:
tw@2729
   272
                    return '%s' % pagename # optimize special case
tw@2729
   273
                else:
tw@2729
   274
                    return '[[%s]]' % pagename # optimize special case
tw@2608
   275
            else:
tw@2730
   276
                if ' ' in pagename: # we could get a ' '  by urlunquoting
tw@2730
   277
                    return '[[%s:%s]]' % (wikiname, pagename)
tw@2729
   278
                else:
tw@2730
   279
                    return '%s:%s' % (wikiname, pagename)
tw@2730
   280
tw@2730
   281
    def interwiki(self, url_and_text):
tw@2730
   282
        if len(url_and_text) == 1:
tw@2730
   283
            url = url_and_text[0]
tw@2730
   284
            text = ''
tw@2730
   285
        else:
tw@2730
   286
            url, text = url_and_text
tw@2730
   287
            text = '|' + text
tw@2730
   288
tw@2730
   289
        # keep track of whether this is a self-reference, so links
tw@2730
   290
        # are always shown even the page doesn't exist.
tw@2730
   291
        scheme, url = url.split(':', 1)
tw@2730
   292
        wikiname, pagename = wikiutil.split_wiki(url)
tw@2730
   293
        if (url.startswith(wikiutil.CHILD_PREFIX) or # fancy link to subpage [wiki:/SubPage text]
tw@2730
   294
            Page(self.request, url).exists()): # fancy link to local page [wiki:LocalPage text]
tw@2742
   295
            pagename = wikiutil.url_unquote(url)
tw@2742
   296
            pagename = self._replace_target(pagename)
tw@2742
   297
            return '[[%s%s]]' % (pagename, text)
tw@2742
   298
        if wikiname in ('Self', self.request.cfg.interwikiname, ''): # [wiki:Self:LocalPage text] or [:LocalPage:text]
tw@2730
   299
            pagename = wikiutil.url_unquote(pagename)
tw@2730
   300
            pagename = self._replace_target(pagename)
tw@2730
   301
            return '[[%s%s]]' % (pagename, text)
tw@2730
   302
tw@2730
   303
        wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, url)
tw@3119
   304
        if wikitag_bad: # likely we got some /InterWiki as wikitail, we don't want that!
tw@3119
   305
            pagename = wikiutil.url_unquote(pagename)
tw@3119
   306
            pagename = self._replace_target(pagename)
tw@3119
   307
            wikitail = pagename
tw@3119
   308
        else: # good
tw@3119
   309
            wikitail = wikiutil.url_unquote(wikitail)
tw@2730
   310
tw@2730
   311
        # link to self?
tw@2759
   312
        if wikiutil.isPicture(wikitail):
tw@2759
   313
            return '{{%s:%s%s}}' % (wikitag, wikitail, text)
tw@2730
   314
        else:
tw@2759
   315
            if ' ' not in wikitail and not text:
tw@2759
   316
                return '%s:%s' % (wikitag, wikitail)
tw@2730
   317
            else:
tw@2730
   318
                return '[[%s:%s%s]]' % (wikitag, wikitail, text)
tw@2730
   319
tw@2730
   320
    def attachment(self, url_and_text):
tw@2730
   321
        """ This gets called on attachment URLs. """
tw@2730
   322
        if len(url_and_text) == 1:
tw@2730
   323
            url = url_and_text[0]
tw@2730
   324
            text = ''
tw@2730
   325
        else:
tw@2730
   326
            url, text = url_and_text
tw@2730
   327
            text = '|' + text
tw@2730
   328
tw@2730
   329
        scheme, fname = url.split(":", 1)
tw@2730
   330
        #scheme, fname, text = wikiutil.split_wiki(target_and_text)
tw@2730
   331
tw@2730
   332
        pagename, fname = AttachFile.absoluteName(fname, self.pagename)
tw@2730
   333
        from_this_page = pagename == self.pagename
tw@2730
   334
        fname = self._replace(('FILE', pagename, fname))
tw@4569
   335
        fname = wikiutil.url_unquote(fname)
tw@2730
   336
        fname = self._replace(('FILE', pagename, fname))
tw@2730
   337
        pagename = self._replace(('PAGE', pagename))
tw@2730
   338
        if from_this_page:
tw@2730
   339
            name = fname
tw@2730
   340
        else:
tw@2730
   341
            name = "%s/%s" % (pagename, fname)
tw@2730
   342
tw@2730
   343
        if scheme == 'drawing':
tw@2730
   344
            return "{{drawing:%s%s}}" % (name, text)
tw@2730
   345
tw@2730
   346
        # check for image URL, and possibly return IMG tag
tw@2730
   347
        # (images are always inlined, just like for other URLs)
tw@2730
   348
        if wikiutil.isPicture(name):
tw@2730
   349
            return "{{attachment:%s%s}}" % (name, text)
tw@2730
   350
tw@2730
   351
        # inline the attachment
tw@2730
   352
        if scheme == 'inline':
tw@2730
   353
            return '{{attachment:%s%s}}' % (name, text)
tw@2730
   354
        else: # 'attachment'
tw@2730
   355
            return '[[attachment:%s%s]]' % (name, text)
tw@2603
   356
tw@2599
   357
    def _url_repl(self, word):
tw@2599
   358
        """Handle literal URLs including inline images."""
tw@2599
   359
        scheme = word.split(":", 1)[0]
tw@2599
   360
tw@2730
   361
        if scheme == 'wiki':
tw@2730
   362
            return self.interwiki([word])
tw@2730
   363
        if scheme in self.attachment_schemas:
tw@2730
   364
            return '%s' % self.attachment([word])
tw@2599
   365
tw@2730
   366
        if wikiutil.isPicture(word): # magic will go away in 1.6!
tw@2740
   367
            return '{{%s}}' % word # new markup for inline images
tw@2599
   368
        else:
tw@2730
   369
            return word
tw@2599
   370
tw@2599
   371
    def _url_bracket_repl(self, word):
tw@2599
   372
        """Handle bracketed URLs."""
tw@2599
   373
        word = word[1:-1] # strip brackets
tw@2599
   374
tw@2730
   375
        # Local extended link?
tw@2599
   376
        if word[0] == ':':
tw@2599
   377
            words = word[1:].split(':', 1)
tw@2612
   378
            link, text = (words + ['', ''])[:2]
tw@2612
   379
            if link.strip() == text.strip():
tw@2612
   380
                text = ''
tw@2612
   381
            link = self._replace_target(link)
tw@2612
   382
            if text:
tw@2729
   383
                text = '|' + text
tw@2729
   384
            return '[[%s%s]]' % (link, text)
tw@2599
   385
tw@2730
   386
        # Traditional split on space
tw@2730
   387
        words = word.split(None, 1)
tw@2730
   388
        if words[0][0] == '#':
tw@2730
   389
            # anchor link
tw@2730
   390
            link, text = (words + ['', ''])[:2]
tw@2730
   391
            if link.strip() == text.strip():
tw@2730
   392
                text = ''
tw@2730
   393
            #link = self._replace_target(link)
tw@2730
   394
            if text:
tw@2730
   395
                text = '|' + text
tw@2730
   396
            return '[[%s%s]]' % (link, text)
tw@2599
   397
tw@2730
   398
        scheme = words[0].split(":", 1)[0]
tw@2730
   399
        if scheme == "wiki":
tw@2730
   400
            return self.interwiki(words)
tw@2730
   401
            #scheme, wikiname, pagename, text = self.interwiki(word)
tw@2730
   402
            #print "%r %r %r %r" % (scheme, wikiname, pagename, text)
tw@2730
   403
            #if wikiname in ('Self', self.request.cfg.interwikiname, ''):
tw@2730
   404
            #    if text:
tw@2730
   405
            #        text = '|' + text
tw@2730
   406
            #    return '[[%s%s]]' % (pagename, text)
tw@2730
   407
            #else:
tw@2730
   408
            #    if text:
tw@2730
   409
            #        text = '|' + text
tw@2730
   410
            #    return "[[%s:%s%s]]" % (wikiname, pagename, text)
tw@2730
   411
        if scheme in self.attachment_schemas:
tw@2740
   412
            m = self.attachment(words)
tw@2740
   413
            if m.startswith('{{') and m.endswith('}}'):
tw@2740
   414
                # with url_bracket markup, 1.5.8 parser does not embed, but link!
tw@2740
   415
                m = '[[%s]]' % m[2:-2]
tw@2740
   416
            return m
tw@2730
   417
tw@2730
   418
        target, desc = (words + ['', ''])[:2]
tw@2730
   419
        if wikiutil.isPicture(desc) and re.match(self.url_rule, desc):
tw@2730
   420
            #return '[[%s|{{%s|%s}}]]' % (words[0], words[1], words[0])
tw@2730
   421
            return '[[%s|{{%s}}]]' % (target, desc)
tw@2603
   422
        else:
tw@2730
   423
            if desc:
tw@2730
   424
                desc = '|' + desc
tw@2730
   425
            return '[[%s%s]]' % (target, desc)
tw@2599
   426
tw@2730
   427
    def _pre_repl(self, word):
tw@2730
   428
        w = word.strip()
tw@2730
   429
        if w == '{{{' and not self.in_pre:
tw@2730
   430
            self.in_pre = True
tw@2730
   431
        elif w == '}}}' and self.in_pre:
tw@2730
   432
            self.in_pre = False
tw@2730
   433
        return word
tw@2730
   434
tw@2730
   435
    def _processor_repl(self, word):
tw@2730
   436
        self.in_pre = True
tw@2730
   437
        return word
tw@2730
   438
tw@2599
   439
    def scan(self, scan_re, line):
tw@2730
   440
        """ Scans one line - append text before match, invoke replace() with match, and add text after match.  """
tw@2599
   441
        result = []
tw@2599
   442
        lastpos = 0
tw@2599
   443
tw@2599
   444
        for match in scan_re.finditer(line):
tw@2599
   445
            # Add text before the match
tw@2599
   446
            if lastpos < match.start():
tw@2599
   447
                result.append(line[lastpos:match.start()])
tw@2599
   448
            # Replace match with markup
tw@2599
   449
            result.append(self.replace(match))
tw@2599
   450
            lastpos = match.end()
tw@2599
   451
tw@2599
   452
        # Add remainder of the line
tw@2599
   453
        result.append(line[lastpos:])
tw@2599
   454
        return u''.join(result)
tw@2599
   455
tw@2730
   456
tw@2599
   457
    def replace(self, match):
tw@2599
   458
        """ Replace match using type name """
tw@2599
   459
        result = []
tw@2599
   460
        for _type, hit in match.groupdict().items():
tw@2599
   461
            if hit is not None and not _type in ["hmarker", ]:
tw@2599
   462
                # Get replace method and replace hit
tw@2599
   463
                replace = getattr(self, '_' + _type + '_repl')
tw@2730
   464
                # print _type, hit
tw@2599
   465
                result.append(replace(hit))
tw@2599
   466
                return ''.join(result)
tw@2599
   467
        else:
tw@2599
   468
            # We should never get here
tw@2599
   469
            import pprint
tw@2599
   470
            raise Exception("Can't handle match %r\n%s\n%s" % (
tw@2599
   471
                match,
tw@2599
   472
                pprint.pformat(match.groupdict()),
tw@2599
   473
                pprint.pformat(match.groups()),
tw@2599
   474
            ))
tw@2599
   475
tw@2599
   476
        return ""
tw@2599
   477
tw@2604
   478
    def convert(self, request):
tw@2599
   479
        """ For each line, scan through looking for magic
tw@2599
   480
            strings, outputting verbatim any intervening text.
tw@2599
   481
        """
tw@2604
   482
        self.request = request
tw@2599
   483
        # prepare regex patterns
tw@2599
   484
        rules = self.formatting_rules.replace('\n', '|')
tw@2730
   485
        if self.request.cfg.bang_meta:
tw@2599
   486
            rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % {
tw@2599
   487
                'word_rule': self.word_rule,
tw@2599
   488
                'rules': rules,
tw@2599
   489
            }
tw@2749
   490
        pre_rules = r'''(?P<pre>\}\}\})'''
tw@2749
   491
        pre_scan_re = re.compile(pre_rules, re.UNICODE)
tw@2599
   492
        scan_re = re.compile(rules, re.UNICODE)
tw@2599
   493
        eol_re = re.compile(r'\r?\n', re.UNICODE)
tw@2599
   494
tw@2599
   495
        rawtext = self.raw
tw@2599
   496
tw@2599
   497
        # remove last item because it's guaranteed to be empty
tw@2599
   498
        self.lines = eol_re.split(rawtext)[:-1]
tw@2730
   499
        self.in_processing_instructions = True
tw@2599
   500
tw@2599
   501
        # Main loop
tw@2599
   502
        for line in self.lines:
tw@2599
   503
            # ignore processing instructions
tw@2599
   504
            if self.in_processing_instructions:
tw@2599
   505
                found = False
tw@2599
   506
                for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
tw@2599
   507
                           "#pragma", "#form", "#acl", "#language"):
tw@2599
   508
                    if line.lower().startswith(pi):
tw@2599
   509
                        self.request.write(line + '\r\n')
tw@2599
   510
                        found = True
tw@2599
   511
                        break
tw@2599
   512
                if not found:
tw@2730
   513
                    self.in_processing_instructions = False
tw@2599
   514
                else:
tw@2599
   515
                    continue # do not parse this line
tw@2730
   516
            if not line.strip():
tw@2730
   517
                self.request.write(line + '\r\n')
tw@2599
   518
            else:
tw@2730
   519
                # Scan line, format and write
tw@2730
   520
                scanning_re = self.in_pre and pre_scan_re or scan_re
tw@2730
   521
                formatted_line = self.scan(scanning_re, line)
tw@2730
   522
                self.request.write(formatted_line + '\r\n')
tw@2599
   523