MoinMoin/parser/text_moin_wiki.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Wed, 11 Feb 2009 02:34:33 +0100
changeset 4569 3caaa8c74c41
parent 4498 910474dded06
child 5101 d8ccac2f24c5
permissions -rw-r--r--
wikiutil: replace moin's cgi/urllib wrappers by calls to werkzeug.utils code
tw-public@0
     1
# -*- coding: iso-8859-1 -*-
tw-public@0
     2
"""
tw-public@0
     3
    MoinMoin - MoinMoin Wiki Markup Parser
tw-public@0
     4
tw@1918
     5
    @copyright: 2000-2002 Juergen Hermann <jh@web.de>,
tw@3127
     6
                2006-2008 MoinMoin:ThomasWaldmann,
rb@2016
     7
                2007 by MoinMoin:ReimarBauer
tw-public@0
     8
    @license: GNU GPL, see COPYING for details.
tw-public@0
     9
"""
tw-public@0
    10
tw@1667
    11
import re
tw@3107
    12
tw@3110
    13
from MoinMoin import log
tw@3110
    14
logging = log.getLogger(__name__)
tw@3052
    15
tw@635
    16
from MoinMoin import config, wikiutil, macro
tw@2755
    17
from MoinMoin.Page import Page
tw@4493
    18
from MoinMoin.support.python_compatibility import set
johannes@2567
    19
tw@2286
    20
Dependencies = ['user'] # {{{#!wiki comment ... }}} has different output depending on the user's profile settings
tw-public@0
    21
tw@3052
    22
johannes@3784
    23
_ = lambda x: x
johannes@3784
    24
tw-public@0
    25
class Parser:
tw-public@0
    26
    """
tw@2777
    27
        Parse wiki format markup (and call the formatter to generate output).
tw-public@0
    28
tw-public@0
    29
        All formatting commands can be parsed one line at a time, though
tw-public@0
    30
        some state is carried over between lines.
tw-public@0
    31
tw@2777
    32
        Methods named like _*_repl() are responsible to handle the named regex patterns.
tw-public@0
    33
    """
tw-public@0
    34
tw-public@0
    35
    # allow caching
tw-public@0
    36
    caching = 1
tw@2196
    37
    Dependencies = Dependencies
johannes@3784
    38
    quickhelp = _(u"""\
johannes@3784
    39
 Emphasis:: <<Verbatim('')>>''italics''<<Verbatim('')>>; <<Verbatim(''')>>'''bold'''<<Verbatim(''')>>; <<Verbatim(''''')>>'''''bold italics'''''<<Verbatim(''''')>>; <<Verbatim('')>>''mixed ''<<Verbatim(''')>>'''''bold'''<<Verbatim(''')>> and italics''<<Verbatim('')>>; <<Verbatim(----)>> horizontal rule.
johannes@3784
    40
 Headings:: = Title 1 =; == Title 2 ==; === Title 3 ===; ==== Title 4 ====; ===== Title 5 =====.
johannes@3784
    41
 Lists:: space and one of: * bullets; 1., a., A., i., I. numbered items; 1.#n start numbering at n; space alone indents.
johannes@3784
    42
 Links:: <<Verbatim(JoinCapitalizedWords)>>; <<Verbatim([[target|linktext]])>>.
johannes@3784
    43
 Tables:: || cell text |||| cell text spanning 2 columns ||;    no trailing white space allowed after tables or titles.
johannes@3784
    44
tw@4473
    45
(!) For more help, see HelpOnEditing or HelpOnMoinWikiSyntax.
johannes@3784
    46
""")
tw-public@0
    47
tw-public@0
    48
    # some common strings
tw@2719
    49
    CHILD_PREFIX = wikiutil.CHILD_PREFIX
tw@2719
    50
    CHILD_PREFIX_LEN = wikiutil.CHILD_PREFIX_LEN
tw-public@0
    51
    PARENT_PREFIX = wikiutil.PARENT_PREFIX
tw@2719
    52
    PARENT_PREFIX_LEN = wikiutil.PARENT_PREFIX_LEN
tw@2719
    53
tw-public@0
    54
    punct_pattern = re.escape(u'''"\'}]|:,.)?!''')
tw@3052
    55
    url_scheme = u'|'.join(config.url_schemas)
tw-public@0
    56
tw-public@0
    57
    # some common rules
tw@2719
    58
    url_rule = ur'''
tw@3052
    59
        (?:^|(?<=\W))  # require either beginning of line or some non-alphanum char (whitespace, punctuation) to the left
tw@2719
    60
        (?P<url_target>  # capture whole url there
tw@2719
    61
         (?P<url_scheme>%(url_scheme)s)  # some scheme
tw@2719
    62
         \:
tw@2719
    63
         \S+?  # anything non-whitespace
tw@2719
    64
        )
tw@3052
    65
        (?:$|(?=\s|[%(punct)s]+(\s|$)))  # require either end of line or some whitespace or some punctuation+blank/eol afterwards
tw@2719
    66
    ''' % {
tw@2719
    67
        'url_scheme': url_scheme,
tw@2719
    68
        'punct': punct_pattern,
tw@2719
    69
    }
tw@2719
    70
tw@3052
    71
    # this is for a free (non-bracketed) interwiki link - to avoid false positives,
tw@3052
    72
    # we are rather restrictive here (same as in moin 1.5: require that the
tw@3052
    73
    # interwiki_wiki name starts with an uppercase letter A-Z. Later, the code
tw@3052
    74
    # also checks whether the wiki name is in the interwiki map (if not, it renders
tw@3052
    75
    # normal text, no link):
tw@3052
    76
    interwiki_rule = ur'''
tw@3052
    77
        (?:^|(?<=\W))  # require either beginning of line or some non-alphanum char (whitespace, punctuation) to the left
tw@3052
    78
        (?P<interwiki_wiki>[A-Z][a-zA-Z]+)  # interwiki wiki name
tw@3052
    79
        \:
tw@3052
    80
        (?P<interwiki_page>  # interwiki page name
tw@3052
    81
         (?=[^ ]*[%(u)s%(l)s0..9][^ ]*\ )  # make sure there is something non-blank with at least one alphanum letter following
tw@3052
    82
         [^\s%(punct)s]+  # we take all until we hit some blank or punctuation char ...
tw@3052
    83
        )
tw@3052
    84
    ''' % {
tw@3052
    85
        'u': config.chars_upper,
tw@3052
    86
        'l': config.chars_lower,
tw@3052
    87
        'punct': punct_pattern,
tw@3052
    88
    }
tw@3052
    89
tw@3390
    90
    # BE CAREFUL: if you do changes to word_rule, consider doing them also to word_rule_js (see below)
tw@2719
    91
    word_rule = ur'''
tw@2719
    92
        (?:
tw@2744
    93
         (?<![%(u)s%(l)s/])  # require anything not upper/lower/slash before
tw@2719
    94
         |
tw@2719
    95
         ^  # ... or beginning of line
tw@2719
    96
        )
tw@2719
    97
        (?P<word_bang>\!)?  # configurable: avoid getting CamelCase rendered as link
tw@2719
    98
        (?P<word_name>
tw@2744
    99
         (?:
tw@2778
   100
          (%(parent)s)*  # there might be either ../ parent prefix(es)
tw@2744
   101
          |
tw@2744
   102
          ((?<!%(child)s)%(child)s)?  # or maybe a single / child prefix (but not if we already had it before)
tw@2744
   103
         )
tw@2719
   104
         (
tw@2744
   105
          ((?<!%(child)s)%(child)s)?  # there might be / child prefix (but not if we already had it before)
tw@2719
   106
          (?:[%(u)s][%(l)s]+){2,}  # at least 2 upper>lower transitions make CamelCase
tw@2719
   107
         )+  # we can have MainPage/SubPage/SubSubPage ...
tw@2778
   108
         (?:
tw@2778
   109
          \#  # anchor separator          TODO check if this does not make trouble at places where word_rule is used
tw@2778
   110
          (?P<word_anchor>\S+)  # some anchor name
tw@2778
   111
         )?
tw@2719
   112
        )
tw@2771
   113
        (?:
tw@2771
   114
         (?![%(u)s%(l)s/])  # require anything not upper/lower/slash following
tw@2771
   115
         |
tw@2771
   116
         $  # ... or end of line
tw@2771
   117
        )
tw@2719
   118
    ''' % {
tw-public@0
   119
        'u': config.chars_upper,
tw-public@0
   120
        'l': config.chars_lower,
tw@2719
   121
        'child': re.escape(CHILD_PREFIX),
tw@2719
   122
        'parent': re.escape(PARENT_PREFIX),
tw-public@0
   123
    }
tw@3390
   124
    # simplified word_rule for FCKeditor's "unlink" plugin (puts a ! in front of a WikiName if WikiName matches word_rule_js),
tw@3390
   125
    # because JavaScript can not use group names and verbose regular expressions!
tw@3390
   126
    word_rule_js = (
tw@3390
   127
        ur'''(?:(?<![%(u)s%(l)s/])|^)'''
tw@3390
   128
        ur'''(?:'''
tw@3390
   129
         ur'''(?:(%(parent)s)*|((?<!%(child)s)%(child)s)?)'''
tw@3390
   130
         ur'''(((?<!%(child)s)%(child)s)?(?:[%(u)s][%(l)s]+){2,})+'''
tw@3390
   131
         ur'''(?:\#(?:\S+))?'''
tw@3390
   132
        ur''')'''
tw@3390
   133
        ur'''(?:(?![%(u)s%(l)s/])|$)'''
tw@3390
   134
    ) % {
tw@3390
   135
        'u': config.chars_upper,
tw@3390
   136
        'l': config.chars_lower,
tw@3390
   137
        'child': re.escape(CHILD_PREFIX),
tw@3390
   138
        'parent': re.escape(PARENT_PREFIX),
tw@3390
   139
    }
tw-public@0
   140
tw@2723
   141
    # link targets:
tw@2719
   142
    extern_rule = r'(?P<extern_addr>(?P<extern_scheme>%s)\:.*)' % url_scheme
tw@2722
   143
    attach_rule = r'(?P<attach_scheme>attachment|drawing)\:(?P<attach_addr>.*)'
tw@2719
   144
    page_rule = r'(?P<page_name>.*)'
tw@2719
   145
tw@2723
   146
    link_target_rules = r'|'.join([
tw@2719
   147
        extern_rule,
tw@2719
   148
        attach_rule,
tw@2719
   149
        page_rule,
tw@2719
   150
    ])
tw@2723
   151
    link_target_re = re.compile(link_target_rules, re.VERBOSE|re.UNICODE)
tw@2719
   152
tw@2809
   153
    link_rule = r"""
tw@2809
   154
        (?P<link>
johannes@2925
   155
            \[\[  # link target
johannes@2925
   156
            \s*  # strip space
tw@3052
   157
            (?P<link_target>[^|]+?)
johannes@2925
   158
            \s*  # strip space
johannes@2925
   159
            (
johannes@2925
   160
                \|  # link description
johannes@2925
   161
                \s*  # strip space
johannes@2925
   162
                (?P<link_desc>
johannes@2925
   163
                    (?:  # 1. we have either a transclusion here (usually a image)
johannes@2925
   164
                        \{\{
tw@3052
   165
                        \s*[^|]+?\s*  # usually image target (strip space)
tw@3052
   166
                        (\|\s*[^|]*?\s*  # usually image alt text (optional, strip space)
tw@3052
   167
                            (\|\s*[^|]*?\s*  # transclusion parameters (usually key="value" format, optional, strip space)
johannes@2925
   168
                            )?
johannes@2925
   169
                        )?
johannes@2925
   170
                        \}\}
johannes@2925
   171
                    )
johannes@2925
   172
                    |
johannes@2925
   173
                    (?:  # 2. or we have simple text here.
tw@3052
   174
                        [^|]+?
johannes@2925
   175
                    )
johannes@2925
   176
                )?
johannes@2925
   177
                \s*  # strip space
johannes@2925
   178
                (
johannes@2925
   179
                    \|  # link parameters
johannes@2925
   180
                    \s*  # strip space
tw@3052
   181
                    (?P<link_params>[^|]+?)?
johannes@2925
   182
                    \s*  # strip space
tw@2811
   183
                )?
tw@2811
   184
            )?
tw@2809
   185
            \]\]
tw@2809
   186
        )
tw@2809
   187
    """
tw@2809
   188
tw@2723
   189
    transclude_rule = r"""
tw@2723
   190
        (?P<transclude>
tw@2723
   191
            \{\{
tw@3052
   192
            \s*(?P<transclude_target>[^|]+?)\s*  # usually image target (strip space)
tw@3052
   193
            (\|\s*(?P<transclude_desc>[^|]+?)?\s*  # usually image alt text (optional, strip space)
tw@3052
   194
                (\|\s*(?P<transclude_params>[^|]+?)?\s*  # transclusion parameters (usually key="value" format, optional, strip space)
tw@2811
   195
                )?
tw@2811
   196
            )?
tw@2723
   197
            \}\}
tw@2723
   198
        )
tw@2723
   199
    """
tw@2723
   200
    text_rule = r"""
tw@2723
   201
        (?P<simple_text>
tw@3052
   202
            [^|]+  # some text (not empty, does not contain separator)
tw@2723
   203
        )
tw@2723
   204
    """
tw@2723
   205
    # link descriptions:
tw@2723
   206
    link_desc_rules = r'|'.join([
tw@2723
   207
            transclude_rule,
tw@2723
   208
            text_rule,
tw@2723
   209
    ])
tw@2723
   210
    link_desc_re = re.compile(link_desc_rules, re.VERBOSE|re.UNICODE)
tw@2723
   211
tw@2723
   212
    # transclude descriptions:
tw@2723
   213
    transclude_desc_rules = r'|'.join([
tw@2723
   214
            text_rule,
tw@2723
   215
    ])
tw@2723
   216
    transclude_desc_re = re.compile(transclude_desc_rules, re.VERBOSE|re.UNICODE)
tw@2723
   217
tw@2723
   218
    # lists:
tw@2719
   219
    ol_rule = ur"""
tw@2719
   220
        ^\s+  # indentation
tw@2719
   221
        (?:[0-9]+|[aAiI])\. # arabic, alpha, roman counting
tw@2719
   222
        (?:\#\d+)?  # optional start number
tw@2719
   223
        \s  # require one blank afterwards
tw@2719
   224
    """
tw@2719
   225
    ol_re = re.compile(ol_rule, re.VERBOSE|re.UNICODE)
tw@2719
   226
tw@2719
   227
    dl_rule = ur"""
tw@2719
   228
        ^\s+  # indentation
tw@2719
   229
        .*?::  # definition term::
tw@2719
   230
        \s  # require on blank afterwards
tw@2719
   231
    """
tw@2719
   232
    dl_re = re.compile(dl_rule, re.VERBOSE|re.UNICODE)
tw-public@0
   233
tw@2779
   234
    # others
tw@2779
   235
    indent_re = re.compile(ur"^\s*", re.UNICODE)
tw@2779
   236
    eol_re = re.compile(r'\r?\n', re.UNICODE)
tw@2779
   237
tw@3052
   238
    # this is used inside parser/pre sections (we just want to know when it's over):
tw@3052
   239
    parser_unique = u''
tw@3052
   240
    parser_scan_rule = ur"""
tw@3052
   241
(?P<parser_end>
tw@3052
   242
    %s\}\}\}  # in parser/pre, we only look for the end of the parser/pre
tw@2719
   243
)
tw@2719
   244
"""
tw@3052
   245
tw@1402
   246
tw@2719
   247
    # the big, fat, less ugly one ;)
tw@2719
   248
    # please be very careful: blanks and # must be escaped with \ !
tw@2719
   249
    scan_rules = ur"""
tw@2719
   250
(?P<emph_ibb>
tw@2719
   251
    '''''(?=[^']+''')  # italic on, bold on, ..., bold off
tw@2719
   252
)|(?P<emph_ibi>
tw@2719
   253
    '''''(?=[^']+'')  # italic on, bold on, ..., italic off
tw@2719
   254
)|(?P<emph_ib_or_bi>
tw@2719
   255
    '{5}(?=[^'])  # italic and bold or bold and italic
tw@2719
   256
)|(?P<emph>
tw@2719
   257
    '{2,3}  # italic or bold
tw@2719
   258
)|(?P<u>
tw@2719
   259
    __ # underline
tw@2719
   260
)|(?P<small>
tw@2719
   261
    (
tw@2719
   262
     (?P<small_on>\~-\ ?)  # small on (we eat a trailing blank if it is there)
tw@2719
   263
    |
tw@2719
   264
     (?P<small_off>-\~)  # small off
tw@2719
   265
    )
tw@2719
   266
)|(?P<big>
tw@2719
   267
    (
tw@2719
   268
     (?P<big_on>\~\+\ ?)  # big on (eat trailing blank)
tw@2719
   269
    |
tw@2719
   270
     (?P<big_off>\+\~)  # big off
tw@2719
   271
    )
tw@2719
   272
)|(?P<strike>
tw@2719
   273
    (
tw@2719
   274
     (?P<strike_on>--\()  # strike-through on
tw@2719
   275
    |
tw@2719
   276
     (?P<strike_off>\)--)  # strike-through off
tw@2719
   277
    )
tw@2719
   278
)|(?P<remark>
tw@2719
   279
    (
tw@3052
   280
     (^|(?<=\s))  # we require either beginning of line or some whitespace before a remark begin
tw@3052
   281
     (?P<remark_on>/\*\s)  # inline remark on (require and eat whitespace after it)
tw@3052
   282
    )
tw@2719
   283
    |
tw@3052
   284
    (
tw@3052
   285
     (?P<remark_off>\s\*/)  # off (require and eat whitespace before it)
tw@3052
   286
     (?=\s)  # we require some whitespace after a remark end
tw@2719
   287
    )
tw@2719
   288
)|(?P<sup>
tw@2719
   289
    \^  # superscript on
tw@2719
   290
    (?P<sup_text>.*?)  # capture the text
tw@2719
   291
    \^  # off
tw@2719
   292
)|(?P<sub>
tw@2719
   293
    ,,  # subscript on
tw@3694
   294
    (?P<sub_text>.*?)  # capture the text
tw@2719
   295
    ,,  # off
tw@2719
   296
)|(?P<tt>
tw@2719
   297
    \{\{\{  # teletype on
tw@2719
   298
    (?P<tt_text>.*?)  # capture the text
tw@2719
   299
    \}\}\}  # off
tw@2719
   300
)|(?P<tt_bt>
tw@2719
   301
    `  # teletype (using a backtick) on
tw@2719
   302
    (?P<tt_bt_text>.*?)  # capture the text
tw@2719
   303
    `  # off
tw@2719
   304
)|(?P<interwiki>
tw@3052
   305
    %(interwiki_rule)s  # OtherWiki:PageName
tw@2719
   306
)|(?P<word>  # must come AFTER interwiki rule!
tw@2719
   307
    %(word_rule)s  # CamelCase wiki words
tw@2723
   308
)|
tw@2809
   309
%(link_rule)s
tw@2809
   310
|
tw@2723
   311
%(transclude_rule)s
tw@2723
   312
|(?P<url>
tw@2719
   313
    %(url_rule)s
tw@2719
   314
)|(?P<email>
tw@2719
   315
    [-\w._+]+  # name
tw@2719
   316
    \@  # at
tw@2719
   317
    [\w-]+(\.[\w-]+)+  # server/domain
tw@2719
   318
)|(?P<smiley>
tw@2719
   319
    (^|(?<=\s))  # we require either beginning of line or some space before a smiley
tw@2719
   320
    (%(smiley)s)  # one of the smileys
tw@2719
   321
    (?=\s)  # we require some space after the smiley
tw@2719
   322
)|(?P<macro>
tw@2719
   323
    <<
tw@2779
   324
    (?P<macro_name>\w+)  # name of the macro
tw@2719
   325
    (?:\((?P<macro_args>.*?)\))?  # optionally macro arguments
tw@2719
   326
    >>
tw@2719
   327
)|(?P<heading>
tw@2719
   328
    ^(?P<hmarker>=+)\s+  # some === at beginning of line, eat trailing blanks
tw@2811
   329
    (?P<heading_text>.*?)  # capture heading text
tw@2719
   330
    \s+(?P=hmarker)\s$  # some === at end of line (matching amount as we have seen), eat blanks
tw@2719
   331
)|(?P<parser>
tw@3052
   332
    \{\{\{  # parser on
tw@3052
   333
    (?P<parser_unique>(\{*|\w*))  # either some more {{{{ or some chars to solve the nesting problem
tw@3052
   334
    (?P<parser_line>
tw@3052
   335
     (
tw@3052
   336
      \#!  # hash bang
tw@3052
   337
      (?P<parser_name>\w*)  # we have a parser name (can be empty) directly following the {{{
tw@3052
   338
      (
tw@3052
   339
       \s+  # some space ...
tw@3052
   340
       (?P<parser_args>.+?)  # followed by parser args
tw@3052
   341
      )?  # parser args are optional
tw@3052
   342
      \s*  # followed by whitespace (eat it) until EOL
tw@3052
   343
     )
tw@2719
   344
    |
tw@3052
   345
     (?P<parser_nothing>\s*)  # no parser name, only whitespace up to EOL (eat it)
tw@3052
   346
    )$
tw@3052
   347
    # "parser off" detection is done with parser_scan_rule!
tw@2719
   348
)|(?P<comment>
tw@2719
   349
    ^\#\#.*$  # src code comment, rest of line
tw@2719
   350
)|(?P<ol>
tw@2719
   351
    %(ol_rule)s  # ordered list
tw@2719
   352
)|(?P<dl>
tw@2719
   353
    %(dl_rule)s  # definition list
tw@2719
   354
)|(?P<li>
tw@2719
   355
    ^\s+\*\s*  # unordered list
tw@2719
   356
)|(?P<li_none>
tw@2719
   357
    ^\s+\.\s*  # unordered list, no bullets
tw@2719
   358
)|(?P<indent>
tw@2719
   359
    ^\s+  # indented by some spaces
tw@2719
   360
)|(?P<tableZ>
tw@2719
   361
    \|\|\ $  # the right end of a table row
tw@2719
   362
)|(?P<table>
johannes@2925
   363
    (?:\|\|)+(?:<(?!<)[^>]*?>)?(?!\|?\s$) # a table
tw@2719
   364
)|(?P<rule>
tw@2719
   365
    -{4,}  # hor. rule, min. 4 -
tw@2719
   366
)|(?P<entity>
tw@2719
   367
    &(
tw@2719
   368
      ([a-zA-Z]+)  # symbolic entity, like &uuml;
tw@2719
   369
      |
tw@2719
   370
      (\#(\d{1,5}|x[0-9a-fA-F]+))  # numeric entities, like &#42; or &#x42;
tw@2719
   371
     );
tw@2719
   372
)|(?P<sgml_entity>  # must come AFTER entity rule!
tw@2719
   373
    [<>&]  # needs special treatment for html/xml
tw@2719
   374
)"""  % {
tw@2719
   375
        'url_scheme': url_scheme,
tw@2719
   376
        'url_rule': url_rule,
tw-public@0
   377
        'punct': punct_pattern,
tw-public@0
   378
        'ol_rule': ol_rule,
tw-public@0
   379
        'dl_rule': dl_rule,
tw@3052
   380
        'interwiki_rule': interwiki_rule,
tw-public@0
   381
        'word_rule': word_rule,
tw@2809
   382
        'link_rule': link_rule,
tw@2723
   383
        'transclude_rule': transclude_rule,
tw@2757
   384
        'u': config.chars_upper,
tw@2757
   385
        'l': config.chars_lower,
tw@1866
   386
        'smiley': u'|'.join([re.escape(s) for s in config.smileys])}
tw@2779
   387
    scan_re = re.compile(scan_rules, re.UNICODE|re.VERBOSE)
tw-public@0
   388
tw@2286
   389
    # Don't start p before these
tw@407
   390
    no_new_p_before = ("heading rule table tableZ tr td "
tw@407
   391
                       "ul ol dl dt dd li li_none indent "
tw@3052
   392
                       "macro parser")
tw@407
   393
    no_new_p_before = no_new_p_before.split()
tw@407
   394
    no_new_p_before = dict(zip(no_new_p_before, [1] * len(no_new_p_before)))
tw-public@0
   395
tw-public@0
   396
    def __init__(self, raw, request, **kw):
tw-public@0
   397
        self.raw = raw
tw-public@0
   398
        self.request = request
tw@1671
   399
        self.form = request.form # Macro object uses this
tw-public@0
   400
        self._ = request.getText
tw-public@0
   401
        self.cfg = request.cfg
tw@42
   402
        self.line_anchors = kw.get('line_anchors', True)
tw@2719
   403
        self.start_line = kw.get('start_line', 0)
tw-public@0
   404
        self.macro = None
tw-public@0
   405
tw@1752
   406
        # currently, there is only a single, optional argument to this parser and
tw@1752
   407
        # (when given), it is used as class(es) for a div wrapping the formatter output
tw@1752
   408
        # either use a single class like "comment" or multiple like "comment/red/dotted"
tw@1752
   409
        self.wrapping_div_class = kw.get('format_args', '').strip().replace('/', ' ')
tw@1752
   410
tw@1667
   411
        self.is_em = 0 # must be int
tw@1667
   412
        self.is_b = 0 # must be int
tw@1667
   413
        self.is_u = False
tw@1667
   414
        self.is_strike = False
tw@1667
   415
        self.is_big = False
tw@1667
   416
        self.is_small = False
tw@1786
   417
        self.is_remark = False
tw@1667
   418
tw-public@0
   419
        self.lineno = 0
tw@553
   420
        self.in_list = 0 # between <ul/ol/dl> and </ul/ol/dl>
tw@553
   421
        self.in_li = 0 # between <li> and </li>
tw@553
   422
        self.in_dd = 0 # between <dd> and </dd>
tw@1400
   423
tw@1400
   424
        # states of the parser concerning being inside/outside of some "pre" section:
tw@1400
   425
        # None == we are not in any kind of pre section (was: 0)
tw@1400
   426
        # 'search_parser' == we didn't get a parser yet, still searching for it (was: 1)
tw@1400
   427
        # 'found_parser' == we found a valid parser (was: 2)
tw@1400
   428
        self.in_pre = None
tw@1400
   429
tw-public@0
   430
        self.in_table = 0
tw-public@0
   431
        self.inhibit_p = 0 # if set, do not auto-create a <p>aragraph
tw-public@0
   432
tw-public@0
   433
        # holds the nesting level (in chars) of open lists
tw-public@0
   434
        self.list_indents = []
tw-public@0
   435
        self.list_types = []
tw@1356
   436
tw-public@0
   437
    def _close_item(self, result):
tw-public@0
   438
        #result.append("<!-- close item begin -->\n")
tw-public@0
   439
        if self.in_table:
tw-public@0
   440
            result.append(self.formatter.table(0))
tw-public@0
   441
            self.in_table = 0
tw-public@0
   442
        if self.in_li:
tw-public@0
   443
            self.in_li = 0
tw-public@0
   444
            if self.formatter.in_p:
tw-public@0
   445
                result.append(self.formatter.paragraph(0))
tw-public@0
   446
            result.append(self.formatter.listitem(0))
tw-public@0
   447
        if self.in_dd:
tw-public@0
   448
            self.in_dd = 0
tw-public@0
   449
            if self.formatter.in_p:
tw-public@0
   450
                result.append(self.formatter.paragraph(0))
tw-public@0
   451
            result.append(self.formatter.definition_desc(0))
tw-public@0
   452
        #result.append("<!-- close item end -->\n")
tw-public@0
   453
tw@2719
   454
    def _u_repl(self, word, groups):
tw-public@0
   455
        """Handle underline."""
tw-public@0
   456
        self.is_u = not self.is_u
tw-public@0
   457
        return self.formatter.underline(self.is_u)
tw-public@0
   458
tw@2719
   459
    def _remark_repl(self, word, groups):
tw@2719
   460
        """Handle remarks."""
tw@2719
   461
        on = groups.get('remark_on')
tw@2719
   462
        if on and self.is_remark:
tw@2719
   463
            return self.formatter.text(word)
tw@2719
   464
        off = groups.get('remark_off')
tw@2719
   465
        if off and not self.is_remark:
tw@2719
   466
            return self.formatter.text(word)
tw@2719
   467
        self.is_remark = not self.is_remark
tw@2719
   468
        return self.formatter.span(self.is_remark, css_class='comment')
tw@2719
   469
    _remark_on_repl = _remark_repl
tw@2719
   470
    _remark_off_repl = _remark_repl
tw@2719
   471
tw@2719
   472
    def _strike_repl(self, word, groups):
tw-public@0
   473
        """Handle strikethrough."""
tw@2719
   474
        on = groups.get('strike_on')
tw@2719
   475
        if on and self.is_strike:
tw@2719
   476
            return self.formatter.text(word)
tw@2719
   477
        off = groups.get('strike_off')
tw@2719
   478
        if off and not self.is_strike:
tw@2719
   479
            return self.formatter.text(word)
tw-public@0
   480
        self.is_strike = not self.is_strike
tw-public@0
   481
        return self.formatter.strike(self.is_strike)
tw@2719
   482
    _strike_on_repl = _strike_repl
tw@2719
   483
    _strike_off_repl = _strike_repl
tw-public@0
   484
tw@2719
   485
    def _small_repl(self, word, groups):
tw-public@0
   486
        """Handle small."""
tw@2719
   487
        on = groups.get('small_on')
tw@2719
   488
        if on and self.is_small:
alex@279
   489
            return self.formatter.text(word)
tw@2719
   490
        off = groups.get('small_off')
tw@2719
   491
        if off and not self.is_small:
alex@279
   492
            return self.formatter.text(word)
tw-public@0
   493
        self.is_small = not self.is_small
tw-public@0
   494
        return self.formatter.small(self.is_small)
tw@2719
   495
    _small_on_repl = _small_repl
tw@2719
   496
    _small_off_repl = _small_repl
tw-public@0
   497
tw@2719
   498
    def _big_repl(self, word, groups):
tw-public@0
   499
        """Handle big."""
tw@2719
   500
        on = groups.get('big_on')
tw@2719
   501
        if on and self.is_big:
alex@279
   502
            return self.formatter.text(word)
tw@2719
   503
        off = groups.get('big_off')
tw@2719
   504
        if off and not self.is_big:
alex@279
   505
            return self.formatter.text(word)
tw-public@0
   506
        self.is_big = not self.is_big
tw-public@0
   507
        return self.formatter.big(self.is_big)
tw@2723
   508
    _big_on_repl = _big_repl
tw@2723
   509
    _big_off_repl = _big_repl
tw-public@0
   510
tw@2719
   511
    def _emph_repl(self, word, groups):
tw-public@0
   512
        """Handle emphasis, i.e. '' and '''."""
tw-public@0
   513
        if len(word) == 3:
tw-public@0
   514
            self.is_b = not self.is_b
tw-public@0
   515
            if self.is_em and self.is_b:
tw-public@0
   516
                self.is_b = 2
tw-public@0
   517
            return self.formatter.strong(self.is_b)
tw-public@0
   518
        else:
tw-public@0
   519
            self.is_em = not self.is_em
tw-public@0
   520
            if self.is_em and self.is_b:
tw-public@0
   521
                self.is_em = 2
tw-public@0
   522
            return self.formatter.emphasis(self.is_em)
tw-public@0
   523
tw@2719
   524
    def _emph_ibb_repl(self, word, groups):
tw-public@0
   525
        """Handle mixed emphasis, i.e. ''''' followed by '''."""
tw-public@0
   526
        self.is_b = not self.is_b
tw-public@0
   527
        self.is_em = not self.is_em
tw-public@0
   528
        if self.is_em and self.is_b:
tw-public@0
   529
            self.is_b = 2
tw-public@0
   530
        return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
tw-public@0
   531
tw@2719
   532
    def _emph_ibi_repl(self, word, groups):
tw-public@0
   533
        """Handle mixed emphasis, i.e. ''''' followed by ''."""
tw-public@0
   534
        self.is_b = not self.is_b
tw-public@0
   535
        self.is_em = not self.is_em
tw-public@0
   536
        if self.is_em and self.is_b:
tw-public@0
   537
            self.is_em = 2
tw-public@0
   538
        return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
tw-public@0
   539
tw@2719
   540
    def _emph_ib_or_bi_repl(self, word, groups):
tw-public@0
   541
        """Handle mixed emphasis, exactly five '''''."""
tw-public@0
   542
        b_before_em = self.is_b > self.is_em > 0
tw-public@0
   543
        self.is_b = not self.is_b
tw-public@0
   544
        self.is_em = not self.is_em
tw-public@0
   545
        if b_before_em:
tw-public@0
   546
            return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
tw-public@0
   547
        else:
tw-public@0
   548
            return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
tw-public@0
   549
tw@2719
   550
    def _sup_repl(self, word, groups):
tw-public@0
   551
        """Handle superscript."""
tw@2719
   552
        text = groups.get('sup_text', '')
tw@2719
   553
        return (self.formatter.sup(1) +
tw@2719
   554
                self.formatter.text(text) +
tw@2719
   555
                self.formatter.sup(0))
tw@2719
   556
    _sup_text_repl = _sup_repl
tw-public@0
   557
tw@2719
   558
    def _sub_repl(self, word, groups):
tw-public@0
   559
        """Handle subscript."""
tw@2719
   560
        text = groups.get('sub_text', '')
tw@2719
   561
        return (self.formatter.sub(1) +
tw@2719
   562
               self.formatter.text(text) +
tw@2719
   563
               self.formatter.sub(0))
tw@2719
   564
    _sub_text_repl = _sub_repl
tw-public@0
   565
tw@2719
   566
    def _tt_repl(self, word, groups):
tw@2719
   567
        """Handle inline code."""
tw@2719
   568
        tt_text = groups.get('tt_text', '')
tw@2719
   569
        return (self.formatter.code(1) +
tw@2719
   570
                self.formatter.text(tt_text) +
tw@2719
   571
                self.formatter.code(0))
tw@2719
   572
    _tt_text_repl = _tt_repl
tw-public@0
   573
tw@2719
   574
    def _tt_bt_repl(self, word, groups):
tw@2719
   575
        """Handle backticked inline code."""
tw@2719
   576
        tt_bt_text = groups.get('tt_bt_text', '')
tw@2719
   577
        return (self.formatter.code(1, css="backtick") +
tw@2719
   578
                self.formatter.text(tt_bt_text) +
tw@2719
   579
                self.formatter.code(0))
tw@2719
   580
    _tt_bt_text_repl = _tt_bt_repl
tw@2719
   581
tw@2719
   582
    def _rule_repl(self, word, groups):
tw-public@0
   583
        """Handle sequences of dashes."""
tw-public@0
   584
        result = self._undent() + self._closeP()
tw-public@0
   585
        if len(word) <= 4:
tw@2719
   586
            result += self.formatter.rule()
tw-public@0
   587
        else:
tw-public@0
   588
            # Create variable rule size 1 - 6. Actual size defined in css.
tw-public@0
   589
            size = min(len(word), 10) - 4
tw@2719
   590
            result += self.formatter.rule(size)
tw-public@0
   591
        return result
tw-public@0
   592
tw@2719
   593
    def _interwiki_repl(self, word, groups):
tw@2719
   594
        """Handle InterWiki links."""
tw@2719
   595
        wiki = groups.get('interwiki_wiki')
tw@2719
   596
        page = groups.get('interwiki_page')
tw@2719
   597
tw@2728
   598
        wikitag_bad = wikiutil.resolve_interwiki(self.request, wiki, page)[3]
tw@2719
   599
        if wikitag_bad:
tw@2730
   600
            text = groups.get('interwiki')
tw@2730
   601
            return self.formatter.text(text)
tw@2719
   602
        else:
tw@4493
   603
            page, anchor = wikiutil.split_anchor(page)
tw@4493
   604
            return (self.formatter.interwikilink(1, wiki, page, anchor=anchor) +
tw@2719
   605
                    self.formatter.text(page) +
tw@2719
   606
                    self.formatter.interwikilink(0, wiki, page))
tw@2719
   607
    _interwiki_wiki_repl = _interwiki_repl
tw@2719
   608
    _interwiki_page_repl = _interwiki_repl
tw@2719
   609
tw@2719
   610
    def _word_repl(self, word, groups):
tw-public@0
   611
        """Handle WikiNames."""
tw@2778
   612
        bang = ''
tw@2778
   613
        bang_present = groups.get('word_bang')
tw@2778
   614
        if bang_present:
tw@2778
   615
            if self.cfg.bang_meta:
tw@2778
   616
                # handle !NotWikiNames
tw@2778
   617
                return self.formatter.nowikiword(word)
tw@2778
   618
            else:
tw@2778
   619
                bang = self.formatter.text('!')
tw@2719
   620
        name = groups.get('word_name')
tw@2719
   621
        current_page = self.formatter.page.page_name
tw@2778
   622
        abs_name = wikiutil.AbsPageName(current_page, name)
tw@2719
   623
        # if a simple, self-referencing link, emit it as plain text
tw@2778
   624
        if abs_name == current_page:
tw@2778
   625
            return self.formatter.text(word)
tw@2719
   626
        else:
tw@4493
   627
            abs_name, anchor = wikiutil.split_anchor(abs_name)
tw@2778
   628
            return (bang +
tw@2778
   629
                    self.formatter.pagelink(1, abs_name, anchor=anchor) +
tw@2778
   630
                    self.formatter.text(word) +
tw@2778
   631
                    self.formatter.pagelink(0, abs_name))
tw@2719
   632
    _word_bang_repl = _word_repl
tw@2719
   633
    _word_name_repl = _word_repl
tw@2778
   634
    _word_anchor_repl = _word_repl
tw-public@0
   635
tw@2721
   636
    def _url_repl(self, word, groups):
tw@2719
   637
        """Handle literal URLs."""
tw@2719
   638
        scheme = groups.get('url_scheme', 'http')
tw@2719
   639
        target = groups.get('url_target', '')
tw@2719
   640
        return (self.formatter.url(1, target, css=scheme) +
tw@2719
   641
                self.formatter.text(target) +
tw@2719
   642
                self.formatter.url(0))
tw@2721
   643
    _url_target_repl = _url_repl
tw@2721
   644
    _url_scheme_repl = _url_repl
MoinMoinBugs/BracketURLwithAnchorParsedAndLinkedWrong">Florian@147
   645
tw@2809
   646
    def _transclude_description(self, desc, default_text=''):
tw@2809
   647
        """ parse a string <desc> valid as transclude description (text, ...)
tw@3255
   648
            and return the description.
tw@3255
   649
tw@3255
   650
            We do NOT use wikiutil.escape here because it is html specific (the
tw@3255
   651
            html formatter, if used, does this for all html attributes).
tw@3255
   652
tw@3255
   653
            We do NOT call formatter.text here because it sometimes is just used
tw@3255
   654
            for some alt and/or title attribute, but not emitted as text.
tw@2809
   655
tw@2809
   656
            @param desc: the transclude description to parse
tw@2809
   657
            @param default_text: use this text if parsing desc returns nothing.
tw@2809
   658
        """
tw@2809
   659
        m = self.transclude_desc_re.match(desc)
tw@2809
   660
        if m:
tw@2809
   661
            if m.group('simple_text'):
tw@2809
   662
                desc = m.group('simple_text')
tw@2809
   663
        else:
tw@2809
   664
            desc = default_text
tw@2809
   665
        return desc
tw@2809
   666
tw@3052
   667
    def _get_params(self, params, tag_attrs=None, acceptable_attrs=None, query_args=None):
tw@2812
   668
        """ parse the parameters of link/transclusion markup,
tw@2812
   669
            defaults can be a dict with some default key/values
tw@2812
   670
            that will be in the result as given, unless overriden
tw@2812
   671
            by the params.
tw@2812
   672
        """
tw@3052
   673
        if tag_attrs is None:
tw@3052
   674
            tag_attrs = {}
tw@3052
   675
        if query_args is None:
tw@3052
   676
            query_args = {}
tw@2812
   677
        if params:
tw@2812
   678
            fixed, kw, trailing = wikiutil.parse_quoted_separated(params)
tw@2812
   679
            # we ignore fixed and trailing args and only use kw args:
tw@3052
   680
            if acceptable_attrs is None:
tw@3052
   681
                acceptable_attrs = []
tw@2812
   682
            for key, val in kw.items():
tw@3257
   683
                # wikiutil.escape for key/val must be done by (html) formatter!
tw@3052
   684
                if key in acceptable_attrs:
johannes@3336
   685
                    # tag attributes must be string type
johannes@3336
   686
                    tag_attrs[str(key)] = val
tw@3052
   687
                elif key.startswith('&'):
tw@3052
   688
                    key = key[1:]
tw@3052
   689
                    query_args[key] = val
tw@3052
   690
        return tag_attrs, query_args
tw@2812
   691
tw@2721
   692
    def _transclude_repl(self, word, groups):
tw@2721
   693
        """Handles transcluding content, usually embedding images."""
tw@2811
   694
        target = groups.get('transclude_target', '')
tw@4569
   695
        target = wikiutil.url_unquote(target)
tw@2811
   696
        desc = groups.get('transclude_desc', '') or ''
tw@2812
   697
        params = groups.get('transclude_params', u'') or u''
tw@3052
   698
        acceptable_attrs_img = ['class', 'title', 'longdesc', 'width', 'height', 'align', ] # no style because of JS
tw@3052
   699
        acceptable_attrs_object = ['class', 'title', 'width', 'height', # no style because of JS
tw@2813
   700
                                  'type', 'standby', ] # we maybe need a hack for <PARAM> here
tw@2723
   701
        m = self.link_target_re.match(target)
tw@2722
   702
        if m:
tw@2722
   703
            if m.group('extern_addr'):
tw@3441
   704
                # currently only supports ext. image inclusion
tw@2722
   705
                target = m.group('extern_addr')
tw@2809
   706
                desc = self._transclude_description(desc, target)
tw@3441
   707
                tag_attrs, query_args = self._get_params(params,
tw@3441
   708
                                                         tag_attrs={'class': 'external_image',
tw@3441
   709
                                                                    'alt': desc,
tw@3441
   710
                                                                    'title': desc, },
tw@3441
   711
                                                         acceptable_attrs=acceptable_attrs_img)
tw@3441
   712
                return self.formatter.image(src=target, **tag_attrs)
tw@3441
   713
                # FF2 has a bug with target mimetype detection, it looks at the url path
tw@3441
   714
                # and expects to find some "filename extension" there (like .png) and this
tw@3441
   715
                # (not the response http headers) will set the default content-type of
tw@3441
   716
                # the object. This will often work for staticly served files, but
tw@3441
   717
                # fails for MoinMoin attachments (they don't have the filename.ext in the
tw@3441
   718
                # path, but in the query string). FF3 seems to have this bug fixed, opera 9.2
tw@3441
   719
                # also works.
tw@3441
   720
                #return (self.formatter.transclusion(1, data=target) +
tw@3441
   721
                #        desc +
tw@3441
   722
                #        self.formatter.transclusion(0))
tw-public@0
   723
tw@2722
   724
            elif m.group('attach_scheme'):
tw@2722
   725
                scheme = m.group('attach_scheme')
tw@4569
   726
                url = wikiutil.url_unquote(m.group('attach_addr'))
tw@2722
   727
                if scheme == 'attachment':
tw@2722
   728
                    mt = wikiutil.MimeType(filename=url)
tw@2722
   729
                    if mt.major == 'text':
tw@2809
   730
                        desc = self._transclude_description(desc, url)
tw@2723
   731
                        return self.formatter.attachment_inlined(url, desc)
rb@3338
   732
                    # destinguishs if browser need a plugin in place
rb@3338
   733
                    elif mt.major == 'image' and mt.minor in config.browser_supported_images:
tw@2809
   734
                        desc = self._transclude_description(desc, url)
tw@3052
   735
                        tag_attrs, query_args = self._get_params(params,
tw@3052
   736
                                                                 tag_attrs={'alt': desc,
tw@3052
   737
                                                                            'title': desc, },
tw@3052
   738
                                                                 acceptable_attrs=acceptable_attrs_img)
tw@3052
   739
                        return self.formatter.attachment_image(url, **tag_attrs)
tw@2722
   740
                    else:
tw@2755
   741
                        from MoinMoin.action import AttachFile
tw@2755
   742
                        pagename = self.formatter.page.page_name
tw@3052
   743
                        if AttachFile.exists(self.request, pagename, url):
tw@3052
   744
                            href = AttachFile.getAttachUrl(pagename, url, self.request, escaped=0)
tw@3052
   745
                            tag_attrs, query_args = self._get_params(params,
tw@3052
   746
                                                                     tag_attrs={'title': desc, },
tw@3052
   747
                                                                     acceptable_attrs=acceptable_attrs_object)
tw@3052
   748
                            return (self.formatter.transclusion(1, data=href, type=mt.spoil(), **tag_attrs) +
tw@3255
   749
                                    self.formatter.text(self._transclude_description(desc, url)) +
tw@3052
   750
                                    self.formatter.transclusion(0))
tw@3052
   751
                        else:
tw@3052
   752
                            return (self.formatter.attachment_link(1, url) +
tw@3255
   753
                                    self.formatter.text(self._transclude_description(desc, url)) +
tw@3052
   754
                                    self.formatter.attachment_link(0))
tw-public@0
   755
tw@2755
   756
                        #NOT USED CURRENTLY:
tw@2755
   757
tw@2722
   758
                        # use EmbedObject for other mimetypes
tw@2722
   759
                        if mt is not None:
rb@3266
   760
                            from MoinMoin import macro
tw@2722
   761
                            macro.request = self.request
tw@2722
   762
                            macro.formatter = self.request.html_formatter
rb@3266
   763
                            p = Parser("##\n", request)
rb@3266
   764
                            m = macro.Macro(p)
tw@2722
   765
                            pagename = self.formatter.page.page_name
rb@3266
   766
                            return m.execute('EmbedObject', u'target=%s' % url)
tw@2722
   767
                elif scheme == 'drawing':
tw@2809
   768
                    desc = self._transclude_description(desc, url)
tw@4476
   769
                    if desc:
tw@4476
   770
                        tag_attrs= {'alt': desc, 'title': desc, }
tw@4476
   771
                    else:
tw@4476
   772
                        tag_attrs = {}
tw@4476
   773
                    tag_attrs, query_args = self._get_params(params,
tw@4476
   774
                                                             tag_attrs=tag_attrs,
tw@4476
   775
                                                             acceptable_attrs=acceptable_attrs_img)
tw@4476
   776
                    return self.formatter.attachment_drawing(url, desc, **tag_attrs)
tw@2701
   777
tw@2755
   778
            elif m.group('page_name'):
tw@2755
   779
                # experimental client side transclusion
tw@3052
   780
                page_name_all = m.group('page_name')
tw@3052
   781
                if ':' in page_name_all:
tw@3052
   782
                    wiki_name, page_name = page_name_all.split(':', 1)
tw@3052
   783
                    wikitag, wikiurl, wikitail, err = wikiutil.resolve_interwiki(self.request, wiki_name, page_name)
tw@3052
   784
                else:
tw@3052
   785
                    err = True
tw@3052
   786
                if err: # not a interwiki link / not in interwiki map
tw@3052
   787
                    tag_attrs, query_args = self._get_params(params,
tw@3052
   788
                                                             tag_attrs={'type': 'text/html',
tw@3052
   789
                                                                        'width': '100%', },
tw@3052
   790
                                                             acceptable_attrs=acceptable_attrs_object)
tw@3052
   791
                    if 'action' not in query_args:
tw@3052
   792
                        query_args['action'] = 'content'
tw@3234
   793
                    url = Page(self.request, page_name_all).url(self.request, querystr=query_args)
tw@3052
   794
                    return (self.formatter.transclusion(1, data=url, **tag_attrs) +
tw@3255
   795
                            self.formatter.text(self._transclude_description(desc, page_name_all)) +
tw@3052
   796
                            self.formatter.transclusion(0))
tw@3052
   797
                    #return u"Error: <<Include(%s,%s)>> emulation missing..." % (page_name, args)
tw@3052
   798
                else: # looks like a valid interwiki link
tw@3052
   799
                    url = wikiutil.join_wiki(wikiurl, wikitail)
tw@3052
   800
                    tag_attrs, query_args = self._get_params(params,
tw@3052
   801
                                                             tag_attrs={'type': 'text/html',
tw@3052
   802
                                                                        'width': '100%', },
tw@3052
   803
                                                             acceptable_attrs=acceptable_attrs_object)
tw@3052
   804
                    if 'action' not in query_args:
tw@3052
   805
                        query_args['action'] = 'content' # XXX moin specific
tw@3052
   806
                    url += '?%s' % wikiutil.makeQueryString(query_args)
tw@3052
   807
                    return (self.formatter.transclusion(1, data=url, **tag_attrs) +
tw@3255
   808
                            self.formatter.text(self._transclude_description(desc, page_name)) +
tw@3052
   809
                            self.formatter.transclusion(0))
tw@3052
   810
                    #return u"Error: <<RemoteInclude(%s:%s,%s)>> still missing." % (wiki_name, page_name, args)
tw@2701
   811
tw@2722
   812
            else:
tw@2809
   813
                desc = self._transclude_description(desc, target)
tw@3052
   814
                return self.formatter.text('{{%s|%s|%s}}' % (target, desc, params))
tw@2722
   815
        return word +'???'
tw@2721
   816
    _transclude_target_repl = _transclude_repl
tw@2809
   817
    _transclude_desc_repl = _transclude_repl
tw@2811
   818
    _transclude_params_repl = _transclude_repl
tw@2701
   819
tw@2735
   820
    def _link_description(self, desc, target='', default_text=''):
tw@2735
   821
        """ parse a string <desc> valid as link description (text, transclusion, ...)
tw@2735
   822
            and return formatted content.
tw@2735
   823
tw@2735
   824
            @param desc: the link description to parse
tw@2735
   825
            @param default_text: use this text (formatted as text) if parsing
tw@2735
   826
                                 desc returns nothing.
tw@2735
   827
            @param target: target of the link (as readable markup) - used for
tw@2735
   828
                           transcluded image's description
tw@2735
   829
        """
tw@2735
   830
        m = self.link_desc_re.match(desc)
tw@2735
   831
        if m:
tw@2735
   832
            if m.group('simple_text'):
tw@2735
   833
                desc = m.group('simple_text')
tw@2735
   834
                desc = self.formatter.text(desc)
tw@2735
   835
            elif m.group('transclude'):
tw@2735
   836
                groupdict = m.groupdict()
tw@2809
   837
                if groupdict.get('transclude_desc') is None:
tw@2735
   838
                    # if transcluded obj (image) has no description, use target for it
tw@2809
   839
                    groupdict['transclude_desc'] = target
tw@2735
   840
                desc = m.group('transclude')
tw@2735
   841
                desc = self._transclude_repl(desc, groupdict)
tw-public@0
   842
        else:
tw@2735
   843
            desc = default_text
tw@2735
   844
            if desc:
tw@2735
   845
                desc = self.formatter.text(desc)
tw@2735
   846
        return desc
tw-public@0
   847
tw@2721
   848
    def _link_repl(self, word, groups):
tw@2719
   849
        """Handle [[target|text]] links."""
tw@2719
   850
        target = groups.get('link_target', '')
tw@2811
   851
        desc = groups.get('link_desc', '') or ''
tw@2812
   852
        params = groups.get('link_params', u'') or u''
tw@3052
   853
        acceptable_attrs = ['class', 'title', 'target', 'accesskey', ] # no style because of JS
tw@2730
   854
        mt = self.link_target_re.match(target)
tw@2730
   855
        if mt:
tw@2730
   856
            if mt.group('page_name'):
tw@2778
   857
                page_name_and_anchor = mt.group('page_name')
tw@3052
   858
                if ':' in page_name_and_anchor:
tw@3052
   859
                    wiki_name, page_name = page_name_and_anchor.split(':', 1)
tw@3052
   860
                    wikitag, wikiurl, wikitail, err = wikiutil.resolve_interwiki(self.request, wiki_name, page_name)
tw@3052
   861
                else:
tw@3052
   862
                    err = True
tw@3052
   863
                if err: # not a interwiki link / not in interwiki map
tw@4493
   864
                    page_name, anchor = wikiutil.split_anchor(page_name_and_anchor)
tw@3052
   865
                    current_page = self.formatter.page.page_name
tw@3052
   866
                    if not page_name:
tw@3052
   867
                        page_name = current_page
tw@3052
   868
                    # handle relative links
tw@3052
   869
                    abs_page_name = wikiutil.AbsPageName(current_page, page_name)
tw@3052
   870
                    tag_attrs, query_args = self._get_params(params,
tw@3052
   871
                                                             tag_attrs={},
tw@3052
   872
                                                             acceptable_attrs=acceptable_attrs)
tw@3052
   873
                    return (self.formatter.pagelink(1, abs_page_name, anchor=anchor, querystr=query_args, **tag_attrs) +
tw@3052
   874
                            self._link_description(desc, target, page_name_and_anchor) +
tw@3052
   875
                            self.formatter.pagelink(0, abs_page_name))
tw@3052
   876
                else: # interwiki link
tw@4493
   877
                    page_name, anchor = wikiutil.split_anchor(page_name)
tw@3052
   878
                    tag_attrs, query_args = self._get_params(params,
tw@3052
   879
                                                             tag_attrs={},
tw@3052
   880
                                                             acceptable_attrs=acceptable_attrs)
tw@4493
   881
                    return (self.formatter.interwikilink(1, wiki_name, page_name, anchor=anchor, querystr=query_args, **tag_attrs) +
tw@3052
   882
                            self._link_description(desc, target, page_name) +
tw@3052
   883
                            self.formatter.interwikilink(0, wiki_name, page_name))
tw-public@0
   884
tw@2730
   885
            elif mt.group('extern_addr'):
tw@2730
   886
                scheme = mt.group('extern_scheme')
tw@2730
   887
                target = mt.group('extern_addr')
tw@3052
   888
                tag_attrs, query_args = self._get_params(params,
tw@3052
   889
                                                         tag_attrs={'class': scheme, },
tw@3052
   890
                                                         acceptable_attrs=acceptable_attrs)
tw@3052
   891
                return (self.formatter.url(1, target, **tag_attrs) +
tw@2735
   892
                        self._link_description(desc, target, target) +
tw@2719
   893
                        self.formatter.url(0))
tw@828
   894
tw@2730
   895
            elif mt.group('attach_scheme'):
tw@2730
   896
                scheme = mt.group('attach_scheme')
tw@4569
   897
                url = wikiutil.url_unquote(mt.group('attach_addr'))
tw@3052
   898
                tag_attrs, query_args = self._get_params(params,
tw@3052
   899
                                                         tag_attrs={'title': desc, },
tw@3052
   900
                                                         acceptable_attrs=acceptable_attrs)
tw@2722
   901
                if scheme == 'attachment':
tw@3052
   902
                    return (self.formatter.attachment_link(1, url, querystr=query_args, **tag_attrs) +
tw@2735
   903
                            self._link_description(desc, target, url) +
tw@2735
   904
                            self.formatter.attachment_link(0))
tw@2719
   905
                elif scheme == 'drawing':
tw@3052
   906
                    return self.formatter.attachment_drawing(url, desc, alt=desc, **tag_attrs)
tw@2719
   907
            else:
tw@2735
   908
                if desc:
tw@2735
   909
                    desc = '|' + desc
tw@2735
   910
                return self.formatter.text('[[%s%s]]' % (target, desc))
tw@2721
   911
    _link_target_repl = _link_repl
tw@2809
   912
    _link_desc_repl = _link_repl
tw@2811
   913
    _link_params_repl = _link_repl
tw-public@0
   914
tw@2719
   915
    def _email_repl(self, word, groups):
tw-public@0
   916
        """Handle email addresses (without a leading mailto:)."""
tw@2719
   917
        return (self.formatter.url(1, "mailto:%s" % word, css='mailto') +
tw-public@0
   918
                self.formatter.text(word) +
tw-public@0
   919
                self.formatter.url(0))
tw-public@0
   920
tw@2719
   921
    def _sgml_entity_repl(self, word, groups):
tw-public@0
   922
        """Handle SGML entities."""
tw-public@0
   923
        return self.formatter.text(word)
tw-public@0
   924
tw@2719
   925
    def _entity_repl(self, word, groups):
tw@2719
   926
        """Handle numeric (decimal and hexadecimal) and symbolic SGML entities."""
tw-public@0
   927
        return self.formatter.rawHTML(word)
tw-public@0
   928
tw@2719
   929
    def _indent_repl(self, match, groups):
tw@407
   930
        """Handle pure indentation (no - * 1. markup)."""
tw@407
   931
        result = []
tw@521
   932
        if not (self.in_li or self.in_dd):
tw@407
   933
            self._close_item(result)
tw@407
   934
            self.in_li = 1
tw@407
   935
            css_class = None
tw@407
   936
            if self.line_was_empty and not self.first_list_item:
tw@407
   937
                css_class = 'gap'
tw@407
   938
            result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
tw@407
   939
        return ''.join(result)
tw@407
   940
tw@2719
   941
    def _li_none_repl(self, match, groups):
tw@407
   942
        """Handle type=none (" .") lists."""
tw@407
   943
        result = []
tw@407
   944
        self._close_item(result)
tw@407
   945
        self.in_li = 1
tw@407
   946
        css_class = None
tw@407
   947
        if self.line_was_empty and not self.first_list_item:
tw@407
   948
            css_class = 'gap'
tw@407
   949
        result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
tw@407
   950
        return ''.join(result)
tw-public@0
   951
tw@2719
   952
    def _li_repl(self, match, groups):
tw@407
   953
        """Handle bullet (" *") lists."""
tw-public@0
   954
        result = []
tw-public@0
   955
        self._close_item(result)
tw-public@0
   956
        self.in_li = 1
tw@407
   957
        css_class = None
tw-public@0
   958
        if self.line_was_empty and not self.first_list_item:
tw-public@0
   959
            css_class = 'gap'
tw@407
   960
        result.append(self.formatter.listitem(1, css_class=css_class))
tw-public@0
   961
        return ''.join(result)
tw-public@0
   962
tw@2719
   963
    def _ol_repl(self, match, groups):
tw-public@0
   964
        """Handle numbered lists."""
tw@2719
   965
        return self._li_repl(match, groups)
tw-public@0
   966
tw@2719
   967
    def _dl_repl(self, match, groups):
tw-public@0
   968
        """Handle definition lists."""
tw-public@0
   969
        result = []
tw-public@0
   970
        self._close_item(result)
tw-public@0
   971
        self.in_dd = 1
tw-public@0
   972
        result.extend([
tw-public@0
   973
            self.formatter.definition_term(1),
tw-public@0
   974
            self.formatter.text(match[1:-3].lstrip(' ')),
tw-public@0
   975
            self.formatter.definition_term(0),
tw-public@0
   976
            self.formatter.definition_desc(1),
tw-public@0
   977
        ])
tw-public@0
   978
        return ''.join(result)
tw-public@0
   979
tw-public@0
   980
    def _indent_level(self):
tw-public@0
   981
        """Return current char-wise indent level."""
tw-public@0
   982
        return len(self.list_indents) and self.list_indents[-1]
tw-public@0
   983
tw-public@0
   984
    def _indent_to(self, new_level, list_type, numtype, numstart):
tw-public@0
   985
        """Close and open lists."""
tw@1360
   986
        openlist = []   # don't make one out of these two statements!
tw@1360
   987
        closelist = []
tw-public@0
   988
tw-public@0
   989
        if self._indent_level() != new_level and self.in_table:
tw@1360
   990
            closelist.append(self.formatter.table(0))
tw-public@0
   991
            self.in_table = 0
tw@1356
   992
tw@407
   993
        while self._indent_level() > new_level:
tw@1360
   994
            self._close_item(closelist)
tw-public@0
   995
            if self.list_types[-1] == 'ol':
tw-public@0
   996
                tag = self.formatter.number_list(0)
tw-public@0
   997
            elif self.list_types[-1] == 'dl':
tw-public@0
   998
                tag = self.formatter.definition_list(0)
tw-public@0
   999
            else:
tw-public@0
  1000
                tag = self.formatter.bullet_list(0)
tw@1360
  1001
            closelist.append(tag)
tw-public@0
  1002
tw@521
  1003
            del self.list_indents[-1]
tw@521
  1004
            del self.list_types[-1]
tw@1356
  1005
tw-public@0
  1006
            if self.list_types: # we are still in a list
tw-public@0
  1007
                if self.list_types[-1] == 'dl':
tw-public@0
  1008
                    self.in_dd = 1
tw-public@0
  1009
                else:
tw-public@0
  1010
                    self.in_li = 1
tw@1356
  1011
tw-public@0
  1012
        # Open new list, if necessary
tw-public@0
  1013
        if self._indent_level() < new_level:
tw-public@0
  1014
            self.list_indents.append(new_level)
tw-public@0
  1015
            self.list_types.append(list_type)
tw-public@0
  1016
tw-public@0
  1017
            if self.formatter.in_p:
tw@1360
  1018
                closelist.append(self.formatter.paragraph(0))
tw@1356
  1019
tw-public@0
  1020
            if list_type == 'ol':
tw-public@0
  1021
                tag = self.formatter.number_list(1, numtype, numstart)
tw-public@0
  1022
            elif list_type == 'dl':
tw-public@0
  1023
                tag = self.formatter.definition_list(1)
tw-public@0
  1024
            else:
tw-public@0
  1025
                tag = self.formatter.bullet_list(1)
tw@1360
  1026
            openlist.append(tag)
tw@1356
  1027
tw-public@0
  1028
            self.first_list_item = 1
tw-public@0
  1029
            self.in_li = 0
tw-public@0
  1030
            self.in_dd = 0
tw@1356
  1031
tw-public@0
  1032
        # If list level changes, close an open table
tw@1360
  1033
        if self.in_table and (openlist or closelist):
tw@1360
  1034
            closelist[0:0] = [self.formatter.table(0)]
tw-public@0
  1035
            self.in_table = 0
tw@1356
  1036
tw@553
  1037
        self.in_list = self.list_types != []
tw@1360
  1038
        return ''.join(closelist) + ''.join(openlist)
tw-public@0
  1039
tw-public@0
  1040
    def _undent(self):
tw-public@0
  1041
        """Close all open lists."""
tw-public@0
  1042
        result = []
tw-public@0
  1043
        #result.append("<!-- _undent start -->\n")
tw-public@0
  1044
        self._close_item(result)
tw@407
  1045
        for type in self.list_types[::-1]:
tw-public@0
  1046
            if type == 'ol':
tw-public@0
  1047
                result.append(self.formatter.number_list(0))
tw-public@0
  1048
            elif type == 'dl':
tw-public@0
  1049
                result.append(self.formatter.definition_list(0))
tw-public@0
  1050
            else:
tw-public@0
  1051
                result.append(self.formatter.bullet_list(0))
tw-public@0
  1052
        #result.append("<!-- _undent end -->\n")
tw-public@0
  1053
        self.list_indents = []
tw-public@0
  1054
        self.list_types = []
tw-public@0
  1055
        return ''.join(result)
tw-public@0
  1056
tw-public@0
  1057
    def _getTableAttrs(self, attrdef):
johannes@2925
  1058
        attr_rule = r'^(\|\|)*<(?!<)(?P<attrs>[^>]*?)>'
johannes@2925
  1059
        m = re.match(attr_rule, attrdef, re.U)
johannes@2925
  1060
        if not m:
tw-public@0
  1061
            return {}, ''
johannes@2925
  1062
        attrdef = m.group('attrs')
tw-public@0
  1063
tw-public@0
  1064
        # extension for special table markup
tw-public@0
  1065
        def table_extension(key, parser, attrs, wiki_parser=self):
tw@517
  1066
            """ returns: tuple (found_flag, msg)
tw@517
  1067
                found_flag: whether we found something and were able to process it here
tw@517
  1068
                  true for special stuff like 100% or - or #AABBCC
tw@517
  1069
                  false for style xxx="yyy" attributes
tw@517
  1070
                msg: "" or an error msg
tw@517
  1071
            """
tw-public@0
  1072
            _ = wiki_parser._
tw@517
  1073
            found = False
tw-public@0
  1074
            msg = ''
tw-public@0
  1075
            if key[0] in "0123456789":
tw-public@0
  1076
                token = parser.get_token()
tw-public@0
  1077
                if token != '%':
tw-public@0
  1078
                    wanted = '%'
tw@3122
  1079
                    msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"') % {
tw-public@0
  1080
                        'wanted': wanted, 'key': key, 'token': token}
tw-public@0
  1081
                else:
tw-public@0
  1082
                    try:
tw-public@0
  1083
                        dummy = int(key)
tw-public@0
  1084
                    except ValueError:
tw@3122
  1085
                        msg = _('Expected an integer "%(key)s" before "%(token)s"') % {
tw-public@0
  1086
                            'key': key, 'token': token}
tw-public@0
  1087
                    else:
tw@517
  1088
                        found = True
tw-public@0
  1089
                        attrs['width'] = '"%s%%"' % key
tw-public@0
  1090
            elif key == '-':
tw-public@0
  1091
                arg = parser.get_token()
tw-public@0
  1092
                try:
tw-public@0
  1093
                    dummy = int(arg)
tw-public@0
  1094
                except ValueError:
tw@3122
  1095
                    msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
tw-public@0
  1096
                        'arg': arg, 'key': key}
tw-public@0
  1097
                else:
tw@517
  1098
                    found = True
tw-public@0
  1099
                    attrs['colspan'] = '"%s"' % arg
tw-public@0
  1100
            elif key == '|':
tw-public@0
  1101
                arg = parser.get_token()
tw-public@0
  1102
                try:
tw-public@0
  1103
                    dummy = int(arg)
tw-public@0
  1104
                except ValueError:
tw@3122
  1105
                    msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
tw-public@0
  1106
                        'arg': arg, 'key': key}
tw-public@0
  1107
                else:
tw@517
  1108
                    found = True
tw-public@0
  1109
                    attrs['rowspan'] = '"%s"' % arg
tw-public@0
  1110
            elif key == '(':
tw@517
  1111
                found = True
tw-public@0
  1112
                attrs['align'] = '"left"'
tw-public@0
  1113
            elif key == ':':
tw@517
  1114
                found = True
tw-public@0
  1115
                attrs['align'] = '"center"'
tw-public@0
  1116
            elif key == ')':
tw@517
  1117
                found = True
tw-public@0
  1118
                attrs['align'] = '"right"'
tw-public@0
  1119
            elif key == '^':
tw@517
  1120
                found = True
tw-public@0
  1121
                attrs['valign'] = '"top"'
tw-public@0
  1122
            elif key == 'v':
tw@517
  1123
                found = True
tw-public@0
  1124
                attrs['valign'] = '"bottom"'
tw-public@0
  1125
            elif key == '#':
tw-public@0
  1126
                arg = parser.get_token()
tw-public@0
  1127
                try:
tw@3052
  1128
                    if len(arg) != 6:
tw@3052
  1129
                        raise ValueError
tw-public@0
  1130
                    dummy = int(arg, 16)
tw-public@0
  1131
                except ValueError:
tw@3122
  1132
                    msg = _('Expected a color value "%(arg)s" after "%(key)s"') % {
tw-public@0
  1133
                        'arg': arg, 'key': key}
tw-public@0
  1134
                else:
tw@517
  1135
                    found = True
tw-public@0
  1136
                    attrs['bgcolor'] = '"#%s"' % arg
tw@517
  1137
            return found, self.formatter.rawHTML(msg)
tw-public@0
  1138
tw-public@0
  1139
        # scan attributes
tw-public@0
  1140
        attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension)
tw@529
  1141
        if msg:
tw@529
  1142
            msg = '<strong class="highlight">%s</strong>' % msg
tw@3127
  1143
        #logging.debug("parseAttributes returned %r" % attr)
tw-public@0
  1144
        return attr, msg
tw-public@0
  1145
tw@2719
  1146
    def _tableZ_repl(self, word, groups):
tw-public@0
  1147
        """Handle table row end."""
tw-public@0
  1148
        if self.in_table:
tw-public@0
  1149
            result = ''
tw-public@0
  1150
            # REMOVED: check for self.in_li, p should always close
tw-public@0
  1151
            if self.formatter.in_p:
tw-public@0
  1152
                result = self.formatter.paragraph(0)
tw-public@0
  1153
            result += self.formatter.table_cell(0) + self.formatter.table_row(0)
tw-public@0
  1154
            return result
tw-public@0
  1155
        else:
alex@279
  1156
            return self.formatter.text(word)
tw-public@0
  1157
tw@2719
  1158
    def _table_repl(self, word, groups):
tw-public@0
  1159
        """Handle table cell separator."""
tw-public@0
  1160
        if self.in_table:
tw-public@0
  1161
            result = []
tw-public@0
  1162
            # check for attributes
tw-public@0
  1163
            attrs, attrerr = self._getTableAttrs(word)
tw-public@0
  1164
tw-public@0
  1165
            # start the table row?
tw-public@0
  1166
            if self.table_rowstart:
tw-public@0
  1167
                self.table_rowstart = 0
tw-public@0
  1168
                result.append(self.formatter.table_row(1, attrs))
tw-public@0
  1169
            else:
tw-public@0
  1170
                # Close table cell, first closing open p
tw-public@0
  1171
                # REMOVED check for self.in_li, paragraph should close always!
tw-public@0
  1172
                if self.formatter.in_p:
tw-public@0
  1173
                    result.append(self.formatter.paragraph(0))
tw-public@0
  1174
                result.append(self.formatter.table_cell(0))
tw-public@0
  1175
tw-public@0
  1176
            # check for adjacent cell markers
tw-public@0
  1177
            if word.count("|") > 2:
tw@1868
  1178
                if 'align' not in attrs and \
tw@1868
  1179
                   not ('style' in attrs and 'text-align' in attrs['style'].lower()):
MoinMoinBugs/TableAlignmentProbsWithGUI">tw@1572
  1180
                    # add center alignment if we don't have some alignment already
tw-public@0
  1181
                    attrs['align'] = '"center"'
tw@1868
  1182
                if 'colspan' not in attrs:
tw-public@0
  1183
                    attrs['colspan'] = '"%d"' % (word.count("|")/2)
tw-public@0
  1184
tw-public@0
  1185
            # return the complete cell markup
tw@1356
  1186
            result.append(self.formatter.table_cell(1, attrs) + attrerr)
tw@525
  1187
            result.append(self._line_anchordef())
tw@1356
  1188
            return ''.join(result)
tw-public@0
  1189
        else:
alex@279
  1190
            return self.formatter.text(word)
tw-public@0
  1191
tw@2719
  1192
    def _heading_repl(self, word, groups):
tw-public@0
  1193
        """Handle section headings."""
tw@2811
  1194
        heading_text = groups.get('heading_text', '')
tw@2719
  1195
        depth = min(len(groups.get('hmarker')), 5)
johannes@2566
  1196
        return ''.join([
johannes@2566
  1197
            self._closeP(),
tw@2719
  1198
            self.formatter.heading(1, depth, id=heading_text),
tw@2719
  1199
            self.formatter.text(heading_text),
johannes@2566
  1200
            self.formatter.heading(0, depth),
johannes@2566
  1201
        ])
tw@2719
  1202
    _heading_text_repl = _heading_repl
tw@1356
  1203
tw@2719
  1204
    def _parser_repl(self, word, groups):
tw@639
  1205
        """Handle parsed code displays."""
tw@639
  1206
        self.parser = None
tw@639
  1207
        self.parser_name = None
tw@3052
  1208
        self.parser_lines = []
tw@3052
  1209
        parser_line = word = groups.get('parser_line', u'')
tw@3052
  1210
        parser_name = groups.get('parser_name', None)
tw@3052
  1211
        parser_args = groups.get('parser_args', None)
tw@3052
  1212
        parser_nothing = groups.get('parser_nothing', None)
tw@3052
  1213
        parser_unique = groups.get('parser_unique', u'') or u''
tw@3052
  1214
        #logging.debug("_parser_repl: parser_name %r parser_args %r parser_unique %r" % (parser_name, parser_args, parser_unique))
tw@3052
  1215
        if set(parser_unique) == set('{'): # just some more {{{{{{
tw@3052
  1216
            parser_unique = u'}' * len(parser_unique) # for symmetry cosmetic reasons
tw@3052
  1217
        self.parser_unique = parser_unique
tw@3052
  1218
        if parser_name is not None:
alex@1520
  1219
            # First try to find a parser for this
tw@3052
  1220
            if parser_name == u'':
tw@3052
  1221
                # empty bang paths lead to a normal code display
tw@3052
  1222
                # can be used to escape real, non-empty bang paths
tw@3052
  1223
                #logging.debug("_parser_repl: empty bangpath")
tw@3052
  1224
                parser_name = 'text'
tw@3052
  1225
                word = ''
tw@3052
  1226
        elif parser_nothing is None:
tw@3052
  1227
            # there was something non-whitespace following the {{{
tw@3052
  1228
            parser_name = 'text'
tw@3052
  1229
tw@3052
  1230
        self.setParser(parser_name)
tw@3052
  1231
        if not self.parser and parser_name:
tw@3052
  1232
            # loading the desired parser didn't work, retry a safe option:
tw@3052
  1233
            wanted_parser = parser_name
tw@3052
  1234
            parser_name = 'text'
tw@639
  1235
            self.setParser(parser_name)
tw@3052
  1236
            word = '%s %s (-)' % (wanted_parser, parser_args)  # indication that it did not work
tw-public@0
  1237
tw@639
  1238
        if self.parser:
tw@639
  1239
            self.parser_name = parser_name
tw@1400
  1240
            self.in_pre = 'found_parser'
tw@3052
  1241
            if word:
tw@3052
  1242
                self.parser_lines.append(word)
tw-public@0
  1243
        else:
tw@1400
  1244
            self.in_pre = 'search_parser'
rb@3070
  1245
tw@3052
  1246
        #logging.debug("_parser_repl: in_pre %r line %d" % (self.in_pre, self.lineno))
tw@3052
  1247
        return ''
tw@3052
  1248
    _parser_unique_repl = _parser_repl
tw@3052
  1249
    _parser_line_repl = _parser_repl
tw@3052
  1250
    _parser_name_repl = _parser_repl
tw@3052
  1251
    _parser_args_repl = _parser_repl
tw@3052
  1252
    _parser_nothing_repl = _parser_repl
tw-public@0
  1253
tw@3052
  1254
    def _parser_content(self, line):
tw@3052
  1255
        """ handle state and collecting lines for parser in pre/parser sections """
tw@3052
  1256
        #logging.debug("parser_content: %r" % line)
tw@3052
  1257
        if self.in_pre == 'search_parser' and line.strip():
tw@3052
  1258
            # try to find a parser specification
tw@3901
  1259
            parser_name = ''
tw@3052
  1260
            if line.strip().startswith("#!"):
tw@3901
  1261
                parser_name = line.strip()[2:]
tw@3901
  1262
            if parser_name:
tw@3901
  1263
                parser_name = parser_name.split()[0]
tw@3052
  1264
            else:
tw@3052
  1265
                parser_name = 'text'
tw@3052
  1266
            self.setParser(parser_name)
tw@3052
  1267
tw@3052
  1268
            if not self.parser:
tw@3052
  1269
                parser_name = 'text'
tw@3052
  1270
                self.setParser(parser_name)
tw@3052
  1271
tw@3052
  1272
            if self.parser:
tw@3052
  1273
                self.in_pre = 'found_parser'
tw@3052
  1274
                self.parser_lines.append(line)
tw@3052
  1275
                self.parser_name = parser_name
tw@3052
  1276
tw@3052
  1277
        elif self.in_pre == 'found_parser':
tw@3052
  1278
            # collect the content lines
tw@3052
  1279
            self.parser_lines.append(line)
tw@3052
  1280
tw@3052
  1281
        return ''  # we emit the content after reaching the end of the parser/pre section
tw@3052
  1282
tw@3052
  1283
    def _parser_end_repl(self, word, groups):
tw@3052
  1284
        """ when we reach the end of a parser/pre section,
tw@3052
  1285
            we call the parser with the lines we collected
tw@3052
  1286
        """
tw@3052
  1287
        #if self.in_pre:
tw@3052
  1288
        self.in_pre = None
tw@3052
  1289
        self.inhibit_p = 0
tw@3052
  1290
        #logging.debug("_parser_end_repl: in_pre %r line %d" % (self.in_pre, self.lineno))
tw@3052
  1291
        self.request.write(self._closeP())
tw@3052
  1292
        if self.parser_name is None:
tw@3052
  1293
            # we obviously did not find a parser specification
tw@3052
  1294
            self.parser_name = 'text'
tw@3052
  1295
        result = self.formatter.parser(self.parser_name, self.parser_lines)
tw@3052
  1296
        del self.parser_lines
tw@3052
  1297
        self.in_pre = None
tw@3052
  1298
        self.parser = None
tw@3052
  1299
        return result
tw-public@0
  1300
tw@2719
  1301
    def _smiley_repl(self, word, groups):
tw-public@0
  1302
        """Handle smileys."""
tw-public@0
  1303
        return self.formatter.smiley(word)
tw-public@0
  1304
tw@2719
  1305
    def _comment_repl(self, word, groups):
tw@565
  1306
        # if we are in a paragraph, we must close it so that normal text following
tw@565
  1307
        # in the line below the comment will reopen a new paragraph.
tw@565
  1308
        if self.formatter.in_p:
tw@565
  1309
            self.formatter.paragraph(0)
tw@565
  1310
        self.line_is_empty = 1 # markup following comment lines treats them as if they were empty
tw-public@0
  1311
        return self.formatter.comment(word)
tw-public@0
  1312
tw-public@0
  1313
    def _closeP(self):
tw-public@0
  1314
        if self.formatter.in_p:
tw-public@0
  1315
            return self.formatter.paragraph(0)
tw-public@0
  1316
        return ''
tw@1356
  1317
tw@2719
  1318
    def _macro_repl(self, word, groups):
tw@2777
  1319
        """Handle macros."""
tw@2719
  1320
        macro_name = groups.get('macro_name')
tw@2719
  1321
        macro_args = groups.get('macro_args')
tw@3458
  1322
        self.inhibit_p = 0 # 1 fixed macros like UserPreferences (in the past, gone now), 0 fixes paragraph formatting for macros
tw-public@0
  1323
tw-public@0
  1324
        # create macro instance
tw-public@0
  1325
        if self.macro is None:
tw@635
  1326
            self.macro = macro.Macro(self)
tw@2779
  1327
        return self.formatter.macro(self.macro, macro_name, macro_args, markup=groups.get('macro'))
tw@2719
  1328
    _macro_name_repl = _macro_repl
tw@2719
  1329
    _macro_args_repl = _macro_repl
tw-public@0
  1330
tw@3052
  1331
    def scan(self, line, inhibit_p=False):
tw-public@0
  1332
        """ Scans one line
tw@545
  1333
        Append text before match, invoke replace() with match, and add text after match.
tw-public@0
  1334
        """
tw-public@0
  1335
        result = []
tw@3052
  1336
        lastpos = 0 # absolute position within line
tw@3052
  1337
        line_length = len(line)
tw-public@0
  1338
tw-public@0
  1339
        ###result.append(u'<span class="info">[scan: <tt>"%s"</tt>]</span>' % line)
tw@3052
  1340
        while lastpos <= line_length: # it is <=, not <, because we need to process the empty line also
tw@3052
  1341
            parser_scan_re = re.compile(self.parser_scan_rule % re.escape(self.parser_unique), re.VERBOSE|re.UNICODE)
tw@3052
  1342
            scan_re = self.in_pre and parser_scan_re or self.scan_re
tw@3052
  1343
            match = scan_re.search(line, lastpos)
tw@3052
  1344
            if match:
tw@3052
  1345
                start = match.start()
tw@3052
  1346
                if lastpos < start:
tw@3052
  1347
                    if self.in_pre:
tw@3052
  1348
                        self._parser_content(line[lastpos:start])
tw@3052
  1349
                    else:
tw@3052
  1350
                        ###result.append(u'<span class="info">[add text before match: <tt>"%s"</tt>]</span>' % line[lastpos:match.start()])
tw@3222
  1351
                        if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p):
tw@3052
  1352
                            result.append(self.formatter.paragraph(1, css_class="line862"))
tw@3052
  1353
                        # add the simple text in between lastpos and beginning of current match
tw@3052
  1354
                        result.append(self.formatter.text(line[lastpos:start]))
tw@1356
  1355
tw@3052
  1356
                # Replace match with markup
tw@3052
  1357
                if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or
tw@3052
  1358
                        self.in_table or self.in_list):
tw@3052
  1359
                    result.append(self.formatter.paragraph(1, css_class="line867"))
tw@3052
  1360
                result.append(self.replace(match, inhibit_p))
tw@3052
  1361
                end = match.end()
tw@3052
  1362
                lastpos = end
tw@3052
  1363
                if start == end:
tw@3052
  1364
                    # we matched an empty string
tw@3052
  1365
                    lastpos += 1 # proceed, we don't want to match this again
tw@3052
  1366
            else:
tw@3052
  1367
                if self.in_pre:
MoinMoinBugs/GuiEditorDestroysParserSyntax">Byeongweon@3996
  1368
                    # ilastpos is more then 0 and result of line slice is empty make useless line
MoinMoinBugs/GuiEditorDestroysParserSyntax">Byeongweon@3996
  1369
                    if not (lastpos > 0 and line[lastpos:] == ''):
MoinMoinBugs/GuiEditorDestroysParserSyntax">Byeongweon@3996
  1370
                        self._parser_content(line[lastpos:])
tw@3052
  1371
                elif line[lastpos:]:
tw@3052
  1372
                    ###result.append('<span class="info">[no match, add rest: <tt>"%s"<tt>]</span>' % line[lastpos:])
tw@3052
  1373
                    if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or
tw@3052
  1374
                            self.in_li or self.in_dd):
tw@3052
  1375
                        result.append(self.formatter.paragraph(1, css_class="line874"))
tw@3052
  1376
                    # add the simple text (no markup) after last match
tw@3052
  1377
                    result.append(self.formatter.text(line[lastpos:]))
tw@3052
  1378
                break # nothing left to do!
tw-public@0
  1379
        return u''.join(result)
tw-public@0
  1380
tw@2723
  1381
    def _replace(self, match):
tw@2723
  1382
        """ Same as replace() but with no magic """
tw@2723
  1383
        for name, text in match.groupdict().iteritems():
tw@2723
  1384
            if text is not None:
tw@2723
  1385
                # Get replace method and replace text
tw@2723
  1386
                replace_func = getattr(self, '_%s_repl' % name)
tw@2723
  1387
                result = replace_func(text, match.groupdict())
tw@2723
  1388
                return result
tw@2723
  1389
rb@1812
  1390
    def replace(self, match, inhibit_p=False):
tw-public@0
  1391
        """ Replace match using type name """
tw-public@0
  1392
        result = []
tw-public@0
  1393
        for type, hit in match.groupdict().items():
tw@827
  1394
            if hit is not None and not type in ["hmarker", ]:
tw@1356
  1395
tw@1402
  1396
                ##result.append(u'<span class="info">[replace: %s: "%s"]</span>' % (type, hit))
tw@1402
  1397
                # Open p for certain types
rb@1812
  1398
                if not (inhibit_p or self.inhibit_p or self.formatter.in_p
tw@1402
  1399
                        or self.in_pre or (type in self.no_new_p_before)):
tw@1402
  1400
                    result.append(self.formatter.paragraph(1, css_class="line891"))
tw@1356
  1401
tw@1402
  1402
                # Get replace method and replace hit
tw@2719
  1403
                replace_func = getattr(self, '_%s_repl' % type)
tw@2723
  1404
                result.append(replace_func(hit, match.groupdict()))
tw@1402
  1405
                return ''.join(result)
tw-public@0
  1406
        else:
tw-public@0
  1407
            # We should never get here
tw-public@0
  1408
            import pprint
tw@1867
  1409
            raise Exception("Can't handle match %r\n%s\n%s" % (
tw@1867
  1410
                match,
tw@1867
  1411
                pprint.pformat(match.groupdict()),
tw@1867
  1412
                pprint.pformat(match.groups()),
tw@1867
  1413
            ))
tw-public@0
  1414
tw-public@0
  1415
        return ""
tw-public@0
  1416
tw@525
  1417
    def _line_anchordef(self):
tw@525
  1418
        if self.line_anchors and not self.line_anchor_printed:
tw@525
  1419
            self.line_anchor_printed = 1
tw@525
  1420
            return self.formatter.line_anchordef(self.lineno)
tw@525
  1421
        else:
tw@525
  1422
            return ''
tw@525
  1423
rb@1812
  1424
    def format(self, formatter, inhibit_p=False):
tw-public@0
  1425
        """ For each line, scan through looking for magic
tw-public@0
  1426
            strings, outputting verbatim any intervening text.
tw-public@0
  1427
        """
tw-public@0
  1428
        self.formatter = formatter
tw-public@0
  1429
        self.hilite_re = self.formatter.page.hilite_re
tw-public@0
  1430
tw-public@0
  1431
        # get text and replace TABs
tw-public@0
  1432
        rawtext = self.raw.expandtabs()
tw-public@0
  1433
tw-public@0
  1434
        # go through the lines
MoinMoinBugs/DiffAnchorOffset">Florian@150
  1435
        self.lineno = self.start_line
tw@2779
  1436
        self.lines = self.eol_re.split(rawtext)
tw-public@0
  1437
        self.line_is_empty = 0
tw-public@0
  1438
tw-public@0
  1439
        self.in_processing_instructions = 1
tw-public@0
  1440
tw@1752
  1441
        if self.wrapping_div_class:
tw@2195
  1442
            self.request.write(self.formatter.div(1, css_class=self.wrapping_div_class))
tw@1752
  1443
tw-public@0
  1444
        # Main loop
tw-public@0
  1445
        for line in self.lines:
tw@2838
  1446
            self.lineno += 1
johannes@2925
  1447
tw@525
  1448
            self.line_anchor_printed = 0
tw@525
  1449
            if not self.in_table:
tw@525
  1450
                self.request.write(self._line_anchordef())
tw-public@0
  1451
            self.table_rowstart = 1
tw-public@0
  1452
            self.line_was_empty = self.line_is_empty
tw-public@0
  1453
            self.line_is_empty = 0
tw-public@0
  1454
            self.first_list_item = 0
tw-public@0
  1455
            self.inhibit_p = 0
tw-public@0
  1456
tw-public@0
  1457
            # ignore processing instructions
tw-public@0
  1458
            if self.in_processing_instructions:
tw-public@0
  1459
                found = False
tw-public@0
  1460
                for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
tw-public@0
  1461
                           "#pragma", "#form", "#acl", "#language"):
tw-public@0
  1462
                    if line.lower().startswith(pi):
tw-public@0
  1463
                        self.request.write(self.formatter.comment(line))
tw-public@0
  1464
                        found = True
tw-public@0
  1465
                        break
tw-public@0
  1466
                if not found:
tw-public@0
  1467
                    self.in_processing_instructions = 0
tw-public@0
  1468
                else:
tw-public@0
  1469
                    continue # do not parse this line
nirs@51
  1470
tw@3052
  1471
            if not self.in_pre:
tw-public@0
  1472
                # we don't have \n as whitespace any more
tw-public@0
  1473
                # This is the space between lines we join to one paragraph
tw-public@0
  1474
                line += ' '
tw@1356
  1475
tw-public@0
  1476
                # Paragraph break on empty lines
tw-public@0
  1477
                if not line.strip():
tw-public@0
  1478
                    if self.in_table:
tw-public@0
  1479
                        self.request.write(self.formatter.table(0))
tw@525
  1480
                        self.request.write(self._line_anchordef())
tw-public@0
  1481
                        self.in_table = 0
tw-public@0
  1482
                    # CHANGE: removed check for not self.list_types
tw-public@0
  1483
                    # p should close on every empty line
tw@407
  1484
                    if self.formatter.in_p:
tw-public@0
  1485
                        self.request.write(self.formatter.paragraph(0))
tw-public@0
  1486
                    self.line_is_empty = 1
tw-public@0
  1487
                    continue
tw-public@0
  1488
tw-public@0
  1489
                # Check indent level
tw@2779
  1490
                indent = self.indent_re.match(line)
tw-public@0
  1491
                indlen = len(indent.group(0))
tw-public@0
  1492
                indtype = "ul"
tw-public@0
  1493
                numtype = None
tw-public@0
  1494
                numstart = None
tw-public@0
  1495
                if indlen:
tw@2719
  1496
                    match = self.ol_re.match(line)
tw-public@0
  1497
                    if match:
tw-public@0
  1498
                        numtype, numstart = match.group(0).strip().split('.')
tw-public@0
  1499
                        numtype = numtype[0]
tw-public@0
  1500
tw-public@0
  1501
                        if numstart and numstart[0] == "#":
tw-public@0
  1502
                            numstart = int(numstart[1:])
tw-public@0
  1503
                        else:
tw-public@0
  1504
                            numstart = None
tw-public@0
  1505
tw-public@0
  1506
                        indtype = "ol"
tw-public@0
  1507
                    else:
tw@2719
  1508
                        match = self.dl_re.match(line)
tw-public@0
  1509
                        if match:
tw-public@0
  1510
                            indtype = "dl"
tw-public@0
  1511
tw-public@0
  1512
                # output proper indentation tags
tw@407
  1513
                self.request.write(self._indent_to(indlen, indtype, numtype, numstart))
tw-public@0
  1514
tw-public@0
  1515
                # Table mode
tw@2286
  1516
                # TODO: move into function?
tw-public@0
  1517
                if (not self.in_table and line[indlen:indlen + 2] == "||"
tw@1184
  1518
                    and line.endswith("|| ") and len(line) >= 5 + indlen):
tw-public@0
  1519
                    # Start table
tw-public@0
  1520
                    if self.list_types and not self.in_li:
tw@407
  1521
                        self.request.write(self.formatter.listitem(1, style="list-style-type:none"))
tw-public@0
  1522
                        ## CHANGE: no automatic p on li
tw-public@0
  1523
                        ##self.request.write(self.formatter.paragraph(1))
tw-public@0
  1524
                        self.in_li = 1
tw@1356
  1525
tw-public@0
  1526
                    # CHANGE: removed check for self.in_li
tw-public@0
  1527
                    # paragraph should end before table, always!
tw-public@0
  1528
                    if self.formatter.in_p:
tw-public@0
  1529
                        self.request.write(self.formatter.paragraph(0))
tw-public@0
  1530
                    attrs, attrerr = self._getTableAttrs(line[indlen+2:])
tw-public@0
  1531
                    self.request.write(self.formatter.table(1, attrs) + attrerr)
tw-public@0
  1532
                    self.in_table = True # self.lineno
tw-public@0
  1533
                elif (self.in_table and not
tw-public@0
  1534
                      # intra-table comments should not break a table
tw@1184
  1535
                      (line.startswith("##") or
tw-public@0
  1536
                       line[indlen:indlen + 2] == "||" and
tw@1184
  1537
                       line.endswith("|| ") and
tw-public@0
  1538
                       len(line) >= 5 + indlen)):
tw@1356
  1539
tw-public@0
  1540
                    # Close table
tw-public@0
  1541
                    self.request.write(self.formatter.table(0))
tw@525
  1542
                    self.request.write(self._line_anchordef())
tw-public@0
  1543
                    self.in_table = 0
tw@1356
  1544
tw-public@0
  1545
            # Scan line, format and write
tw@3052
  1546
            formatted_line = self.scan(line, inhibit_p=inhibit_p)
tw-public@0
  1547
            self.request.write(formatted_line)
tw@2286
  1548
tw-public@0
  1549
tw-public@0
  1550
        # Close code displays, paragraphs, tables and open lists
tw-public@0
  1551
        self.request.write(self._undent())
tw-public@0
  1552
        if self.in_pre: self.request.write(self.formatter.preformatted(0))
tw-public@0
  1553
        if self.formatter.in_p: self.request.write(self.formatter.paragraph(0))
tw-public@0
  1554
        if self.in_table: self.request.write(self.formatter.table(0))
tw-public@0
  1555
tw@1752
  1556
        if self.wrapping_div_class:
tw@1752
  1557
            self.request.write(self.formatter.div(0))
tw@1752
  1558
rb@2258
  1559
tw@639
  1560
    # Private helpers ------------------------------------------------------------
tw@1356
  1561
tw@639
  1562
    def setParser(self, name):
tw@639
  1563
        """ Set parser to parser named 'name' """
alex@1520
  1564
        # XXX this is done by the formatter as well
alex@1520
  1565
        try:
alex@1520
  1566
            self.parser = wikiutil.searchAndImportPlugin(self.request.cfg, "parser", name)
alex@1520
  1567
        except wikiutil.PluginMissingError:
alex@1520
  1568
            self.parser = None
tw@2719
  1569
johannes@3784
  1570
del _