MoinMoin/parser/text_moin_wiki.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Wed, 11 Feb 2009 02:34:33 +0100
changeset 4569 3caaa8c74c41
parent 4498 910474dded06
child 5101 d8ccac2f24c5
permissions -rw-r--r--
wikiutil: replace moin's cgi/urllib wrappers by calls to werkzeug.utils code
     1 # -*- coding: iso-8859-1 -*-
     2 """
     3     MoinMoin - MoinMoin Wiki Markup Parser
     4 
     5     @copyright: 2000-2002 Juergen Hermann <jh@web.de>,
     6                 2006-2008 MoinMoin:ThomasWaldmann,
     7                 2007 by MoinMoin:ReimarBauer
     8     @license: GNU GPL, see COPYING for details.
     9 """
    10 
    11 import re
    12 
    13 from MoinMoin import log
    14 logging = log.getLogger(__name__)
    15 
    16 from MoinMoin import config, wikiutil, macro
    17 from MoinMoin.Page import Page
    18 from MoinMoin.support.python_compatibility import set
    19 
    20 Dependencies = ['user'] # {{{#!wiki comment ... }}} has different output depending on the user's profile settings
    21 
    22 
    23 _ = lambda x: x
    24 
    25 class Parser:
    26     """
    27         Parse wiki format markup (and call the formatter to generate output).
    28 
    29         All formatting commands can be parsed one line at a time, though
    30         some state is carried over between lines.
    31 
    32         Methods named like _*_repl() are responsible to handle the named regex patterns.
    33     """
    34 
    35     # allow caching
    36     caching = 1
    37     Dependencies = Dependencies
    38     quickhelp = _(u"""\
    39  Emphasis:: <<Verbatim('')>>''italics''<<Verbatim('')>>; <<Verbatim(''')>>'''bold'''<<Verbatim(''')>>; <<Verbatim(''''')>>'''''bold italics'''''<<Verbatim(''''')>>; <<Verbatim('')>>''mixed ''<<Verbatim(''')>>'''''bold'''<<Verbatim(''')>> and italics''<<Verbatim('')>>; <<Verbatim(----)>> horizontal rule.
    40  Headings:: = Title 1 =; == Title 2 ==; === Title 3 ===; ==== Title 4 ====; ===== Title 5 =====.
    41  Lists:: space and one of: * bullets; 1., a., A., i., I. numbered items; 1.#n start numbering at n; space alone indents.
    42  Links:: <<Verbatim(JoinCapitalizedWords)>>; <<Verbatim([[target|linktext]])>>.
    43  Tables:: || cell text |||| cell text spanning 2 columns ||;    no trailing white space allowed after tables or titles.
    44 
    45 (!) For more help, see HelpOnEditing or HelpOnMoinWikiSyntax.
    46 """)
    47 
    48     # some common strings
    49     CHILD_PREFIX = wikiutil.CHILD_PREFIX
    50     CHILD_PREFIX_LEN = wikiutil.CHILD_PREFIX_LEN
    51     PARENT_PREFIX = wikiutil.PARENT_PREFIX
    52     PARENT_PREFIX_LEN = wikiutil.PARENT_PREFIX_LEN
    53 
    54     punct_pattern = re.escape(u'''"\'}]|:,.)?!''')
    55     url_scheme = u'|'.join(config.url_schemas)
    56 
    57     # some common rules
    58     url_rule = ur'''
    59         (?:^|(?<=\W))  # require either beginning of line or some non-alphanum char (whitespace, punctuation) to the left
    60         (?P<url_target>  # capture whole url there
    61          (?P<url_scheme>%(url_scheme)s)  # some scheme
    62          \:
    63          \S+?  # anything non-whitespace
    64         )
    65         (?:$|(?=\s|[%(punct)s]+(\s|$)))  # require either end of line or some whitespace or some punctuation+blank/eol afterwards
    66     ''' % {
    67         'url_scheme': url_scheme,
    68         'punct': punct_pattern,
    69     }
    70 
    71     # this is for a free (non-bracketed) interwiki link - to avoid false positives,
    72     # we are rather restrictive here (same as in moin 1.5: require that the
    73     # interwiki_wiki name starts with an uppercase letter A-Z. Later, the code
    74     # also checks whether the wiki name is in the interwiki map (if not, it renders
    75     # normal text, no link):
    76     interwiki_rule = ur'''
    77         (?:^|(?<=\W))  # require either beginning of line or some non-alphanum char (whitespace, punctuation) to the left
    78         (?P<interwiki_wiki>[A-Z][a-zA-Z]+)  # interwiki wiki name
    79         \:
    80         (?P<interwiki_page>  # interwiki page name
    81          (?=[^ ]*[%(u)s%(l)s0..9][^ ]*\ )  # make sure there is something non-blank with at least one alphanum letter following
    82          [^\s%(punct)s]+  # we take all until we hit some blank or punctuation char ...
    83         )
    84     ''' % {
    85         'u': config.chars_upper,
    86         'l': config.chars_lower,
    87         'punct': punct_pattern,
    88     }
    89 
    90     # BE CAREFUL: if you do changes to word_rule, consider doing them also to word_rule_js (see below)
    91     word_rule = ur'''
    92         (?:
    93          (?<![%(u)s%(l)s/])  # require anything not upper/lower/slash before
    94          |
    95          ^  # ... or beginning of line
    96         )
    97         (?P<word_bang>\!)?  # configurable: avoid getting CamelCase rendered as link
    98         (?P<word_name>
    99          (?:
   100           (%(parent)s)*  # there might be either ../ parent prefix(es)
   101           |
   102           ((?<!%(child)s)%(child)s)?  # or maybe a single / child prefix (but not if we already had it before)
   103          )
   104          (
   105           ((?<!%(child)s)%(child)s)?  # there might be / child prefix (but not if we already had it before)
   106           (?:[%(u)s][%(l)s]+){2,}  # at least 2 upper>lower transitions make CamelCase
   107          )+  # we can have MainPage/SubPage/SubSubPage ...
   108          (?:
   109           \#  # anchor separator          TODO check if this does not make trouble at places where word_rule is used
   110           (?P<word_anchor>\S+)  # some anchor name
   111          )?
   112         )
   113         (?:
   114          (?![%(u)s%(l)s/])  # require anything not upper/lower/slash following
   115          |
   116          $  # ... or end of line
   117         )
   118     ''' % {
   119         'u': config.chars_upper,
   120         'l': config.chars_lower,
   121         'child': re.escape(CHILD_PREFIX),
   122         'parent': re.escape(PARENT_PREFIX),
   123     }
   124     # simplified word_rule for FCKeditor's "unlink" plugin (puts a ! in front of a WikiName if WikiName matches word_rule_js),
   125     # because JavaScript can not use group names and verbose regular expressions!
   126     word_rule_js = (
   127         ur'''(?:(?<![%(u)s%(l)s/])|^)'''
   128         ur'''(?:'''
   129          ur'''(?:(%(parent)s)*|((?<!%(child)s)%(child)s)?)'''
   130          ur'''(((?<!%(child)s)%(child)s)?(?:[%(u)s][%(l)s]+){2,})+'''
   131          ur'''(?:\#(?:\S+))?'''
   132         ur''')'''
   133         ur'''(?:(?![%(u)s%(l)s/])|$)'''
   134     ) % {
   135         'u': config.chars_upper,
   136         'l': config.chars_lower,
   137         'child': re.escape(CHILD_PREFIX),
   138         'parent': re.escape(PARENT_PREFIX),
   139     }
   140 
   141     # link targets:
   142     extern_rule = r'(?P<extern_addr>(?P<extern_scheme>%s)\:.*)' % url_scheme
   143     attach_rule = r'(?P<attach_scheme>attachment|drawing)\:(?P<attach_addr>.*)'
   144     page_rule = r'(?P<page_name>.*)'
   145 
   146     link_target_rules = r'|'.join([
   147         extern_rule,
   148         attach_rule,
   149         page_rule,
   150     ])
   151     link_target_re = re.compile(link_target_rules, re.VERBOSE|re.UNICODE)
   152 
   153     link_rule = r"""
   154         (?P<link>
   155             \[\[  # link target
   156             \s*  # strip space
   157             (?P<link_target>[^|]+?)
   158             \s*  # strip space
   159             (
   160                 \|  # link description
   161                 \s*  # strip space
   162                 (?P<link_desc>
   163                     (?:  # 1. we have either a transclusion here (usually a image)
   164                         \{\{
   165                         \s*[^|]+?\s*  # usually image target (strip space)
   166                         (\|\s*[^|]*?\s*  # usually image alt text (optional, strip space)
   167                             (\|\s*[^|]*?\s*  # transclusion parameters (usually key="value" format, optional, strip space)
   168                             )?
   169                         )?
   170                         \}\}
   171                     )
   172                     |
   173                     (?:  # 2. or we have simple text here.
   174                         [^|]+?
   175                     )
   176                 )?
   177                 \s*  # strip space
   178                 (
   179                     \|  # link parameters
   180                     \s*  # strip space
   181                     (?P<link_params>[^|]+?)?
   182                     \s*  # strip space
   183                 )?
   184             )?
   185             \]\]
   186         )
   187     """
   188 
   189     transclude_rule = r"""
   190         (?P<transclude>
   191             \{\{
   192             \s*(?P<transclude_target>[^|]+?)\s*  # usually image target (strip space)
   193             (\|\s*(?P<transclude_desc>[^|]+?)?\s*  # usually image alt text (optional, strip space)
   194                 (\|\s*(?P<transclude_params>[^|]+?)?\s*  # transclusion parameters (usually key="value" format, optional, strip space)
   195                 )?
   196             )?
   197             \}\}
   198         )
   199     """
   200     text_rule = r"""
   201         (?P<simple_text>
   202             [^|]+  # some text (not empty, does not contain separator)
   203         )
   204     """
   205     # link descriptions:
   206     link_desc_rules = r'|'.join([
   207             transclude_rule,
   208             text_rule,
   209     ])
   210     link_desc_re = re.compile(link_desc_rules, re.VERBOSE|re.UNICODE)
   211 
   212     # transclude descriptions:
   213     transclude_desc_rules = r'|'.join([
   214             text_rule,
   215     ])
   216     transclude_desc_re = re.compile(transclude_desc_rules, re.VERBOSE|re.UNICODE)
   217 
   218     # lists:
   219     ol_rule = ur"""
   220         ^\s+  # indentation
   221         (?:[0-9]+|[aAiI])\. # arabic, alpha, roman counting
   222         (?:\#\d+)?  # optional start number
   223         \s  # require one blank afterwards
   224     """
   225     ol_re = re.compile(ol_rule, re.VERBOSE|re.UNICODE)
   226 
   227     dl_rule = ur"""
   228         ^\s+  # indentation
   229         .*?::  # definition term::
   230         \s  # require on blank afterwards
   231     """
   232     dl_re = re.compile(dl_rule, re.VERBOSE|re.UNICODE)
   233 
   234     # others
   235     indent_re = re.compile(ur"^\s*", re.UNICODE)
   236     eol_re = re.compile(r'\r?\n', re.UNICODE)
   237 
   238     # this is used inside parser/pre sections (we just want to know when it's over):
   239     parser_unique = u''
   240     parser_scan_rule = ur"""
   241 (?P<parser_end>
   242     %s\}\}\}  # in parser/pre, we only look for the end of the parser/pre
   243 )
   244 """
   245 
   246 
   247     # the big, fat, less ugly one ;)
   248     # please be very careful: blanks and # must be escaped with \ !
   249     scan_rules = ur"""
   250 (?P<emph_ibb>
   251     '''''(?=[^']+''')  # italic on, bold on, ..., bold off
   252 )|(?P<emph_ibi>
   253     '''''(?=[^']+'')  # italic on, bold on, ..., italic off
   254 )|(?P<emph_ib_or_bi>
   255     '{5}(?=[^'])  # italic and bold or bold and italic
   256 )|(?P<emph>
   257     '{2,3}  # italic or bold
   258 )|(?P<u>
   259     __ # underline
   260 )|(?P<small>
   261     (
   262      (?P<small_on>\~-\ ?)  # small on (we eat a trailing blank if it is there)
   263     |
   264      (?P<small_off>-\~)  # small off
   265     )
   266 )|(?P<big>
   267     (
   268      (?P<big_on>\~\+\ ?)  # big on (eat trailing blank)
   269     |
   270      (?P<big_off>\+\~)  # big off
   271     )
   272 )|(?P<strike>
   273     (
   274      (?P<strike_on>--\()  # strike-through on
   275     |
   276      (?P<strike_off>\)--)  # strike-through off
   277     )
   278 )|(?P<remark>
   279     (
   280      (^|(?<=\s))  # we require either beginning of line or some whitespace before a remark begin
   281      (?P<remark_on>/\*\s)  # inline remark on (require and eat whitespace after it)
   282     )
   283     |
   284     (
   285      (?P<remark_off>\s\*/)  # off (require and eat whitespace before it)
   286      (?=\s)  # we require some whitespace after a remark end
   287     )
   288 )|(?P<sup>
   289     \^  # superscript on
   290     (?P<sup_text>.*?)  # capture the text
   291     \^  # off
   292 )|(?P<sub>
   293     ,,  # subscript on
   294     (?P<sub_text>.*?)  # capture the text
   295     ,,  # off
   296 )|(?P<tt>
   297     \{\{\{  # teletype on
   298     (?P<tt_text>.*?)  # capture the text
   299     \}\}\}  # off
   300 )|(?P<tt_bt>
   301     `  # teletype (using a backtick) on
   302     (?P<tt_bt_text>.*?)  # capture the text
   303     `  # off
   304 )|(?P<interwiki>
   305     %(interwiki_rule)s  # OtherWiki:PageName
   306 )|(?P<word>  # must come AFTER interwiki rule!
   307     %(word_rule)s  # CamelCase wiki words
   308 )|
   309 %(link_rule)s
   310 |
   311 %(transclude_rule)s
   312 |(?P<url>
   313     %(url_rule)s
   314 )|(?P<email>
   315     [-\w._+]+  # name
   316     \@  # at
   317     [\w-]+(\.[\w-]+)+  # server/domain
   318 )|(?P<smiley>
   319     (^|(?<=\s))  # we require either beginning of line or some space before a smiley
   320     (%(smiley)s)  # one of the smileys
   321     (?=\s)  # we require some space after the smiley
   322 )|(?P<macro>
   323     <<
   324     (?P<macro_name>\w+)  # name of the macro
   325     (?:\((?P<macro_args>.*?)\))?  # optionally macro arguments
   326     >>
   327 )|(?P<heading>
   328     ^(?P<hmarker>=+)\s+  # some === at beginning of line, eat trailing blanks
   329     (?P<heading_text>.*?)  # capture heading text
   330     \s+(?P=hmarker)\s$  # some === at end of line (matching amount as we have seen), eat blanks
   331 )|(?P<parser>
   332     \{\{\{  # parser on
   333     (?P<parser_unique>(\{*|\w*))  # either some more {{{{ or some chars to solve the nesting problem
   334     (?P<parser_line>
   335      (
   336       \#!  # hash bang
   337       (?P<parser_name>\w*)  # we have a parser name (can be empty) directly following the {{{
   338       (
   339        \s+  # some space ...
   340        (?P<parser_args>.+?)  # followed by parser args
   341       )?  # parser args are optional
   342       \s*  # followed by whitespace (eat it) until EOL
   343      )
   344     |
   345      (?P<parser_nothing>\s*)  # no parser name, only whitespace up to EOL (eat it)
   346     )$
   347     # "parser off" detection is done with parser_scan_rule!
   348 )|(?P<comment>
   349     ^\#\#.*$  # src code comment, rest of line
   350 )|(?P<ol>
   351     %(ol_rule)s  # ordered list
   352 )|(?P<dl>
   353     %(dl_rule)s  # definition list
   354 )|(?P<li>
   355     ^\s+\*\s*  # unordered list
   356 )|(?P<li_none>
   357     ^\s+\.\s*  # unordered list, no bullets
   358 )|(?P<indent>
   359     ^\s+  # indented by some spaces
   360 )|(?P<tableZ>
   361     \|\|\ $  # the right end of a table row
   362 )|(?P<table>
   363     (?:\|\|)+(?:<(?!<)[^>]*?>)?(?!\|?\s$) # a table
   364 )|(?P<rule>
   365     -{4,}  # hor. rule, min. 4 -
   366 )|(?P<entity>
   367     &(
   368       ([a-zA-Z]+)  # symbolic entity, like &uuml;
   369       |
   370       (\#(\d{1,5}|x[0-9a-fA-F]+))  # numeric entities, like &#42; or &#x42;
   371      );
   372 )|(?P<sgml_entity>  # must come AFTER entity rule!
   373     [<>&]  # needs special treatment for html/xml
   374 )"""  % {
   375         'url_scheme': url_scheme,
   376         'url_rule': url_rule,
   377         'punct': punct_pattern,
   378         'ol_rule': ol_rule,
   379         'dl_rule': dl_rule,
   380         'interwiki_rule': interwiki_rule,
   381         'word_rule': word_rule,
   382         'link_rule': link_rule,
   383         'transclude_rule': transclude_rule,
   384         'u': config.chars_upper,
   385         'l': config.chars_lower,
   386         'smiley': u'|'.join([re.escape(s) for s in config.smileys])}
   387     scan_re = re.compile(scan_rules, re.UNICODE|re.VERBOSE)
   388 
   389     # Don't start p before these
   390     no_new_p_before = ("heading rule table tableZ tr td "
   391                        "ul ol dl dt dd li li_none indent "
   392                        "macro parser")
   393     no_new_p_before = no_new_p_before.split()
   394     no_new_p_before = dict(zip(no_new_p_before, [1] * len(no_new_p_before)))
   395 
   396     def __init__(self, raw, request, **kw):
   397         self.raw = raw
   398         self.request = request
   399         self.form = request.form # Macro object uses this
   400         self._ = request.getText
   401         self.cfg = request.cfg
   402         self.line_anchors = kw.get('line_anchors', True)
   403         self.start_line = kw.get('start_line', 0)
   404         self.macro = None
   405 
   406         # currently, there is only a single, optional argument to this parser and
   407         # (when given), it is used as class(es) for a div wrapping the formatter output
   408         # either use a single class like "comment" or multiple like "comment/red/dotted"
   409         self.wrapping_div_class = kw.get('format_args', '').strip().replace('/', ' ')
   410 
   411         self.is_em = 0 # must be int
   412         self.is_b = 0 # must be int
   413         self.is_u = False
   414         self.is_strike = False
   415         self.is_big = False
   416         self.is_small = False
   417         self.is_remark = False
   418 
   419         self.lineno = 0
   420         self.in_list = 0 # between <ul/ol/dl> and </ul/ol/dl>
   421         self.in_li = 0 # between <li> and </li>
   422         self.in_dd = 0 # between <dd> and </dd>
   423 
   424         # states of the parser concerning being inside/outside of some "pre" section:
   425         # None == we are not in any kind of pre section (was: 0)
   426         # 'search_parser' == we didn't get a parser yet, still searching for it (was: 1)
   427         # 'found_parser' == we found a valid parser (was: 2)
   428         self.in_pre = None
   429 
   430         self.in_table = 0
   431         self.inhibit_p = 0 # if set, do not auto-create a <p>aragraph
   432 
   433         # holds the nesting level (in chars) of open lists
   434         self.list_indents = []
   435         self.list_types = []
   436 
   437     def _close_item(self, result):
   438         #result.append("<!-- close item begin -->\n")
   439         if self.in_table:
   440             result.append(self.formatter.table(0))
   441             self.in_table = 0
   442         if self.in_li:
   443             self.in_li = 0
   444             if self.formatter.in_p:
   445                 result.append(self.formatter.paragraph(0))
   446             result.append(self.formatter.listitem(0))
   447         if self.in_dd:
   448             self.in_dd = 0
   449             if self.formatter.in_p:
   450                 result.append(self.formatter.paragraph(0))
   451             result.append(self.formatter.definition_desc(0))
   452         #result.append("<!-- close item end -->\n")
   453 
   454     def _u_repl(self, word, groups):
   455         """Handle underline."""
   456         self.is_u = not self.is_u
   457         return self.formatter.underline(self.is_u)
   458 
   459     def _remark_repl(self, word, groups):
   460         """Handle remarks."""
   461         on = groups.get('remark_on')
   462         if on and self.is_remark:
   463             return self.formatter.text(word)
   464         off = groups.get('remark_off')
   465         if off and not self.is_remark:
   466             return self.formatter.text(word)
   467         self.is_remark = not self.is_remark
   468         return self.formatter.span(self.is_remark, css_class='comment')
   469     _remark_on_repl = _remark_repl
   470     _remark_off_repl = _remark_repl
   471 
   472     def _strike_repl(self, word, groups):
   473         """Handle strikethrough."""
   474         on = groups.get('strike_on')
   475         if on and self.is_strike:
   476             return self.formatter.text(word)
   477         off = groups.get('strike_off')
   478         if off and not self.is_strike:
   479             return self.formatter.text(word)
   480         self.is_strike = not self.is_strike
   481         return self.formatter.strike(self.is_strike)
   482     _strike_on_repl = _strike_repl
   483     _strike_off_repl = _strike_repl
   484 
   485     def _small_repl(self, word, groups):
   486         """Handle small."""
   487         on = groups.get('small_on')
   488         if on and self.is_small:
   489             return self.formatter.text(word)
   490         off = groups.get('small_off')
   491         if off and not self.is_small:
   492             return self.formatter.text(word)
   493         self.is_small = not self.is_small
   494         return self.formatter.small(self.is_small)
   495     _small_on_repl = _small_repl
   496     _small_off_repl = _small_repl
   497 
   498     def _big_repl(self, word, groups):
   499         """Handle big."""
   500         on = groups.get('big_on')
   501         if on and self.is_big:
   502             return self.formatter.text(word)
   503         off = groups.get('big_off')
   504         if off and not self.is_big:
   505             return self.formatter.text(word)
   506         self.is_big = not self.is_big
   507         return self.formatter.big(self.is_big)
   508     _big_on_repl = _big_repl
   509     _big_off_repl = _big_repl
   510 
   511     def _emph_repl(self, word, groups):
   512         """Handle emphasis, i.e. '' and '''."""
   513         if len(word) == 3:
   514             self.is_b = not self.is_b
   515             if self.is_em and self.is_b:
   516                 self.is_b = 2
   517             return self.formatter.strong(self.is_b)
   518         else:
   519             self.is_em = not self.is_em
   520             if self.is_em and self.is_b:
   521                 self.is_em = 2
   522             return self.formatter.emphasis(self.is_em)
   523 
   524     def _emph_ibb_repl(self, word, groups):
   525         """Handle mixed emphasis, i.e. ''''' followed by '''."""
   526         self.is_b = not self.is_b
   527         self.is_em = not self.is_em
   528         if self.is_em and self.is_b:
   529             self.is_b = 2
   530         return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
   531 
   532     def _emph_ibi_repl(self, word, groups):
   533         """Handle mixed emphasis, i.e. ''''' followed by ''."""
   534         self.is_b = not self.is_b
   535         self.is_em = not self.is_em
   536         if self.is_em and self.is_b:
   537             self.is_em = 2
   538         return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
   539 
   540     def _emph_ib_or_bi_repl(self, word, groups):
   541         """Handle mixed emphasis, exactly five '''''."""
   542         b_before_em = self.is_b > self.is_em > 0
   543         self.is_b = not self.is_b
   544         self.is_em = not self.is_em
   545         if b_before_em:
   546             return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
   547         else:
   548             return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
   549 
   550     def _sup_repl(self, word, groups):
   551         """Handle superscript."""
   552         text = groups.get('sup_text', '')
   553         return (self.formatter.sup(1) +
   554                 self.formatter.text(text) +
   555                 self.formatter.sup(0))
   556     _sup_text_repl = _sup_repl
   557 
   558     def _sub_repl(self, word, groups):
   559         """Handle subscript."""
   560         text = groups.get('sub_text', '')
   561         return (self.formatter.sub(1) +
   562                self.formatter.text(text) +
   563                self.formatter.sub(0))
   564     _sub_text_repl = _sub_repl
   565 
   566     def _tt_repl(self, word, groups):
   567         """Handle inline code."""
   568         tt_text = groups.get('tt_text', '')
   569         return (self.formatter.code(1) +
   570                 self.formatter.text(tt_text) +
   571                 self.formatter.code(0))
   572     _tt_text_repl = _tt_repl
   573 
   574     def _tt_bt_repl(self, word, groups):
   575         """Handle backticked inline code."""
   576         tt_bt_text = groups.get('tt_bt_text', '')
   577         return (self.formatter.code(1, css="backtick") +
   578                 self.formatter.text(tt_bt_text) +
   579                 self.formatter.code(0))
   580     _tt_bt_text_repl = _tt_bt_repl
   581 
   582     def _rule_repl(self, word, groups):
   583         """Handle sequences of dashes."""
   584         result = self._undent() + self._closeP()
   585         if len(word) <= 4:
   586             result += self.formatter.rule()
   587         else:
   588             # Create variable rule size 1 - 6. Actual size defined in css.
   589             size = min(len(word), 10) - 4
   590             result += self.formatter.rule(size)
   591         return result
   592 
   593     def _interwiki_repl(self, word, groups):
   594         """Handle InterWiki links."""
   595         wiki = groups.get('interwiki_wiki')
   596         page = groups.get('interwiki_page')
   597 
   598         wikitag_bad = wikiutil.resolve_interwiki(self.request, wiki, page)[3]
   599         if wikitag_bad:
   600             text = groups.get('interwiki')
   601             return self.formatter.text(text)
   602         else:
   603             page, anchor = wikiutil.split_anchor(page)
   604             return (self.formatter.interwikilink(1, wiki, page, anchor=anchor) +
   605                     self.formatter.text(page) +
   606                     self.formatter.interwikilink(0, wiki, page))
   607     _interwiki_wiki_repl = _interwiki_repl
   608     _interwiki_page_repl = _interwiki_repl
   609 
   610     def _word_repl(self, word, groups):
   611         """Handle WikiNames."""
   612         bang = ''
   613         bang_present = groups.get('word_bang')
   614         if bang_present:
   615             if self.cfg.bang_meta:
   616                 # handle !NotWikiNames
   617                 return self.formatter.nowikiword(word)
   618             else:
   619                 bang = self.formatter.text('!')
   620         name = groups.get('word_name')
   621         current_page = self.formatter.page.page_name
   622         abs_name = wikiutil.AbsPageName(current_page, name)
   623         # if a simple, self-referencing link, emit it as plain text
   624         if abs_name == current_page:
   625             return self.formatter.text(word)
   626         else:
   627             abs_name, anchor = wikiutil.split_anchor(abs_name)
   628             return (bang +
   629                     self.formatter.pagelink(1, abs_name, anchor=anchor) +
   630                     self.formatter.text(word) +
   631                     self.formatter.pagelink(0, abs_name))
   632     _word_bang_repl = _word_repl
   633     _word_name_repl = _word_repl
   634     _word_anchor_repl = _word_repl
   635 
   636     def _url_repl(self, word, groups):
   637         """Handle literal URLs."""
   638         scheme = groups.get('url_scheme', 'http')
   639         target = groups.get('url_target', '')
   640         return (self.formatter.url(1, target, css=scheme) +
   641                 self.formatter.text(target) +
   642                 self.formatter.url(0))
   643     _url_target_repl = _url_repl
   644     _url_scheme_repl = _url_repl
   645 
   646     def _transclude_description(self, desc, default_text=''):
   647         """ parse a string <desc> valid as transclude description (text, ...)
   648             and return the description.
   649 
   650             We do NOT use wikiutil.escape here because it is html specific (the
   651             html formatter, if used, does this for all html attributes).
   652 
   653             We do NOT call formatter.text here because it sometimes is just used
   654             for some alt and/or title attribute, but not emitted as text.
   655 
   656             @param desc: the transclude description to parse
   657             @param default_text: use this text if parsing desc returns nothing.
   658         """
   659         m = self.transclude_desc_re.match(desc)
   660         if m:
   661             if m.group('simple_text'):
   662                 desc = m.group('simple_text')
   663         else:
   664             desc = default_text
   665         return desc
   666 
   667     def _get_params(self, params, tag_attrs=None, acceptable_attrs=None, query_args=None):
   668         """ parse the parameters of link/transclusion markup,
   669             defaults can be a dict with some default key/values
   670             that will be in the result as given, unless overriden
   671             by the params.
   672         """
   673         if tag_attrs is None:
   674             tag_attrs = {}
   675         if query_args is None:
   676             query_args = {}
   677         if params:
   678             fixed, kw, trailing = wikiutil.parse_quoted_separated(params)
   679             # we ignore fixed and trailing args and only use kw args:
   680             if acceptable_attrs is None:
   681                 acceptable_attrs = []
   682             for key, val in kw.items():
   683                 # wikiutil.escape for key/val must be done by (html) formatter!
   684                 if key in acceptable_attrs:
   685                     # tag attributes must be string type
   686                     tag_attrs[str(key)] = val
   687                 elif key.startswith('&'):
   688                     key = key[1:]
   689                     query_args[key] = val
   690         return tag_attrs, query_args
   691 
   692     def _transclude_repl(self, word, groups):
   693         """Handles transcluding content, usually embedding images."""
   694         target = groups.get('transclude_target', '')
   695         target = wikiutil.url_unquote(target)
   696         desc = groups.get('transclude_desc', '') or ''
   697         params = groups.get('transclude_params', u'') or u''
   698         acceptable_attrs_img = ['class', 'title', 'longdesc', 'width', 'height', 'align', ] # no style because of JS
   699         acceptable_attrs_object = ['class', 'title', 'width', 'height', # no style because of JS
   700                                   'type', 'standby', ] # we maybe need a hack for <PARAM> here
   701         m = self.link_target_re.match(target)
   702         if m:
   703             if m.group('extern_addr'):
   704                 # currently only supports ext. image inclusion
   705                 target = m.group('extern_addr')
   706                 desc = self._transclude_description(desc, target)
   707                 tag_attrs, query_args = self._get_params(params,
   708                                                          tag_attrs={'class': 'external_image',
   709                                                                     'alt': desc,
   710                                                                     'title': desc, },
   711                                                          acceptable_attrs=acceptable_attrs_img)
   712                 return self.formatter.image(src=target, **tag_attrs)
   713                 # FF2 has a bug with target mimetype detection, it looks at the url path
   714                 # and expects to find some "filename extension" there (like .png) and this
   715                 # (not the response http headers) will set the default content-type of
   716                 # the object. This will often work for staticly served files, but
   717                 # fails for MoinMoin attachments (they don't have the filename.ext in the
   718                 # path, but in the query string). FF3 seems to have this bug fixed, opera 9.2
   719                 # also works.
   720                 #return (self.formatter.transclusion(1, data=target) +
   721                 #        desc +
   722                 #        self.formatter.transclusion(0))
   723 
   724             elif m.group('attach_scheme'):
   725                 scheme = m.group('attach_scheme')
   726                 url = wikiutil.url_unquote(m.group('attach_addr'))
   727                 if scheme == 'attachment':
   728                     mt = wikiutil.MimeType(filename=url)
   729                     if mt.major == 'text':
   730                         desc = self._transclude_description(desc, url)
   731                         return self.formatter.attachment_inlined(url, desc)
   732                     # destinguishs if browser need a plugin in place
   733                     elif mt.major == 'image' and mt.minor in config.browser_supported_images:
   734                         desc = self._transclude_description(desc, url)
   735                         tag_attrs, query_args = self._get_params(params,
   736                                                                  tag_attrs={'alt': desc,
   737                                                                             'title': desc, },
   738                                                                  acceptable_attrs=acceptable_attrs_img)
   739                         return self.formatter.attachment_image(url, **tag_attrs)
   740                     else:
   741                         from MoinMoin.action import AttachFile
   742                         pagename = self.formatter.page.page_name
   743                         if AttachFile.exists(self.request, pagename, url):
   744                             href = AttachFile.getAttachUrl(pagename, url, self.request, escaped=0)
   745                             tag_attrs, query_args = self._get_params(params,
   746                                                                      tag_attrs={'title': desc, },
   747                                                                      acceptable_attrs=acceptable_attrs_object)
   748                             return (self.formatter.transclusion(1, data=href, type=mt.spoil(), **tag_attrs) +
   749                                     self.formatter.text(self._transclude_description(desc, url)) +
   750                                     self.formatter.transclusion(0))
   751                         else:
   752                             return (self.formatter.attachment_link(1, url) +
   753                                     self.formatter.text(self._transclude_description(desc, url)) +
   754                                     self.formatter.attachment_link(0))
   755 
   756                         #NOT USED CURRENTLY:
   757 
   758                         # use EmbedObject for other mimetypes
   759                         if mt is not None:
   760                             from MoinMoin import macro
   761                             macro.request = self.request
   762                             macro.formatter = self.request.html_formatter
   763                             p = Parser("##\n", request)
   764                             m = macro.Macro(p)
   765                             pagename = self.formatter.page.page_name
   766                             return m.execute('EmbedObject', u'target=%s' % url)
   767                 elif scheme == 'drawing':
   768                     desc = self._transclude_description(desc, url)
   769                     if desc:
   770                         tag_attrs= {'alt': desc, 'title': desc, }
   771                     else:
   772                         tag_attrs = {}
   773                     tag_attrs, query_args = self._get_params(params,
   774                                                              tag_attrs=tag_attrs,
   775                                                              acceptable_attrs=acceptable_attrs_img)
   776                     return self.formatter.attachment_drawing(url, desc, **tag_attrs)
   777 
   778             elif m.group('page_name'):
   779                 # experimental client side transclusion
   780                 page_name_all = m.group('page_name')
   781                 if ':' in page_name_all:
   782                     wiki_name, page_name = page_name_all.split(':', 1)
   783                     wikitag, wikiurl, wikitail, err = wikiutil.resolve_interwiki(self.request, wiki_name, page_name)
   784                 else:
   785                     err = True
   786                 if err: # not a interwiki link / not in interwiki map
   787                     tag_attrs, query_args = self._get_params(params,
   788                                                              tag_attrs={'type': 'text/html',
   789                                                                         'width': '100%', },
   790                                                              acceptable_attrs=acceptable_attrs_object)
   791                     if 'action' not in query_args:
   792                         query_args['action'] = 'content'
   793                     url = Page(self.request, page_name_all).url(self.request, querystr=query_args)
   794                     return (self.formatter.transclusion(1, data=url, **tag_attrs) +
   795                             self.formatter.text(self._transclude_description(desc, page_name_all)) +
   796                             self.formatter.transclusion(0))
   797                     #return u"Error: <<Include(%s,%s)>> emulation missing..." % (page_name, args)
   798                 else: # looks like a valid interwiki link
   799                     url = wikiutil.join_wiki(wikiurl, wikitail)
   800                     tag_attrs, query_args = self._get_params(params,
   801                                                              tag_attrs={'type': 'text/html',
   802                                                                         'width': '100%', },
   803                                                              acceptable_attrs=acceptable_attrs_object)
   804                     if 'action' not in query_args:
   805                         query_args['action'] = 'content' # XXX moin specific
   806                     url += '?%s' % wikiutil.makeQueryString(query_args)
   807                     return (self.formatter.transclusion(1, data=url, **tag_attrs) +
   808                             self.formatter.text(self._transclude_description(desc, page_name)) +
   809                             self.formatter.transclusion(0))
   810                     #return u"Error: <<RemoteInclude(%s:%s,%s)>> still missing." % (wiki_name, page_name, args)
   811 
   812             else:
   813                 desc = self._transclude_description(desc, target)
   814                 return self.formatter.text('{{%s|%s|%s}}' % (target, desc, params))
   815         return word +'???'
   816     _transclude_target_repl = _transclude_repl
   817     _transclude_desc_repl = _transclude_repl
   818     _transclude_params_repl = _transclude_repl
   819 
   820     def _link_description(self, desc, target='', default_text=''):
   821         """ parse a string <desc> valid as link description (text, transclusion, ...)
   822             and return formatted content.
   823 
   824             @param desc: the link description to parse
   825             @param default_text: use this text (formatted as text) if parsing
   826                                  desc returns nothing.
   827             @param target: target of the link (as readable markup) - used for
   828                            transcluded image's description
   829         """
   830         m = self.link_desc_re.match(desc)
   831         if m:
   832             if m.group('simple_text'):
   833                 desc = m.group('simple_text')
   834                 desc = self.formatter.text(desc)
   835             elif m.group('transclude'):
   836                 groupdict = m.groupdict()
   837                 if groupdict.get('transclude_desc') is None:
   838                     # if transcluded obj (image) has no description, use target for it
   839                     groupdict['transclude_desc'] = target
   840                 desc = m.group('transclude')
   841                 desc = self._transclude_repl(desc, groupdict)
   842         else:
   843             desc = default_text
   844             if desc:
   845                 desc = self.formatter.text(desc)
   846         return desc
   847 
   848     def _link_repl(self, word, groups):
   849         """Handle [[target|text]] links."""
   850         target = groups.get('link_target', '')
   851         desc = groups.get('link_desc', '') or ''
   852         params = groups.get('link_params', u'') or u''
   853         acceptable_attrs = ['class', 'title', 'target', 'accesskey', ] # no style because of JS
   854         mt = self.link_target_re.match(target)
   855         if mt:
   856             if mt.group('page_name'):
   857                 page_name_and_anchor = mt.group('page_name')
   858                 if ':' in page_name_and_anchor:
   859                     wiki_name, page_name = page_name_and_anchor.split(':', 1)
   860                     wikitag, wikiurl, wikitail, err = wikiutil.resolve_interwiki(self.request, wiki_name, page_name)
   861                 else:
   862                     err = True
   863                 if err: # not a interwiki link / not in interwiki map
   864                     page_name, anchor = wikiutil.split_anchor(page_name_and_anchor)
   865                     current_page = self.formatter.page.page_name
   866                     if not page_name:
   867                         page_name = current_page
   868                     # handle relative links
   869                     abs_page_name = wikiutil.AbsPageName(current_page, page_name)
   870                     tag_attrs, query_args = self._get_params(params,
   871                                                              tag_attrs={},
   872                                                              acceptable_attrs=acceptable_attrs)
   873                     return (self.formatter.pagelink(1, abs_page_name, anchor=anchor, querystr=query_args, **tag_attrs) +
   874                             self._link_description(desc, target, page_name_and_anchor) +
   875                             self.formatter.pagelink(0, abs_page_name))
   876                 else: # interwiki link
   877                     page_name, anchor = wikiutil.split_anchor(page_name)
   878                     tag_attrs, query_args = self._get_params(params,
   879                                                              tag_attrs={},
   880                                                              acceptable_attrs=acceptable_attrs)
   881                     return (self.formatter.interwikilink(1, wiki_name, page_name, anchor=anchor, querystr=query_args, **tag_attrs) +
   882                             self._link_description(desc, target, page_name) +
   883                             self.formatter.interwikilink(0, wiki_name, page_name))
   884 
   885             elif mt.group('extern_addr'):
   886                 scheme = mt.group('extern_scheme')
   887                 target = mt.group('extern_addr')
   888                 tag_attrs, query_args = self._get_params(params,
   889                                                          tag_attrs={'class': scheme, },
   890                                                          acceptable_attrs=acceptable_attrs)
   891                 return (self.formatter.url(1, target, **tag_attrs) +
   892                         self._link_description(desc, target, target) +
   893                         self.formatter.url(0))
   894 
   895             elif mt.group('attach_scheme'):
   896                 scheme = mt.group('attach_scheme')
   897                 url = wikiutil.url_unquote(mt.group('attach_addr'))
   898                 tag_attrs, query_args = self._get_params(params,
   899                                                          tag_attrs={'title': desc, },
   900                                                          acceptable_attrs=acceptable_attrs)
   901                 if scheme == 'attachment':
   902                     return (self.formatter.attachment_link(1, url, querystr=query_args, **tag_attrs) +
   903                             self._link_description(desc, target, url) +
   904                             self.formatter.attachment_link(0))
   905                 elif scheme == 'drawing':
   906                     return self.formatter.attachment_drawing(url, desc, alt=desc, **tag_attrs)
   907             else:
   908                 if desc:
   909                     desc = '|' + desc
   910                 return self.formatter.text('[[%s%s]]' % (target, desc))
   911     _link_target_repl = _link_repl
   912     _link_desc_repl = _link_repl
   913     _link_params_repl = _link_repl
   914 
   915     def _email_repl(self, word, groups):
   916         """Handle email addresses (without a leading mailto:)."""
   917         return (self.formatter.url(1, "mailto:%s" % word, css='mailto') +
   918                 self.formatter.text(word) +
   919                 self.formatter.url(0))
   920 
   921     def _sgml_entity_repl(self, word, groups):
   922         """Handle SGML entities."""
   923         return self.formatter.text(word)
   924 
   925     def _entity_repl(self, word, groups):
   926         """Handle numeric (decimal and hexadecimal) and symbolic SGML entities."""
   927         return self.formatter.rawHTML(word)
   928 
   929     def _indent_repl(self, match, groups):
   930         """Handle pure indentation (no - * 1. markup)."""
   931         result = []
   932         if not (self.in_li or self.in_dd):
   933             self._close_item(result)
   934             self.in_li = 1
   935             css_class = None
   936             if self.line_was_empty and not self.first_list_item:
   937                 css_class = 'gap'
   938             result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
   939         return ''.join(result)
   940 
   941     def _li_none_repl(self, match, groups):
   942         """Handle type=none (" .") lists."""
   943         result = []
   944         self._close_item(result)
   945         self.in_li = 1
   946         css_class = None
   947         if self.line_was_empty and not self.first_list_item:
   948             css_class = 'gap'
   949         result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
   950         return ''.join(result)
   951 
   952     def _li_repl(self, match, groups):
   953         """Handle bullet (" *") lists."""
   954         result = []
   955         self._close_item(result)
   956         self.in_li = 1
   957         css_class = None
   958         if self.line_was_empty and not self.first_list_item:
   959             css_class = 'gap'
   960         result.append(self.formatter.listitem(1, css_class=css_class))
   961         return ''.join(result)
   962 
   963     def _ol_repl(self, match, groups):
   964         """Handle numbered lists."""
   965         return self._li_repl(match, groups)
   966 
   967     def _dl_repl(self, match, groups):
   968         """Handle definition lists."""
   969         result = []
   970         self._close_item(result)
   971         self.in_dd = 1
   972         result.extend([
   973             self.formatter.definition_term(1),
   974             self.formatter.text(match[1:-3].lstrip(' ')),
   975             self.formatter.definition_term(0),
   976             self.formatter.definition_desc(1),
   977         ])
   978         return ''.join(result)
   979 
   980     def _indent_level(self):
   981         """Return current char-wise indent level."""
   982         return len(self.list_indents) and self.list_indents[-1]
   983 
   984     def _indent_to(self, new_level, list_type, numtype, numstart):
   985         """Close and open lists."""
   986         openlist = []   # don't make one out of these two statements!
   987         closelist = []
   988 
   989         if self._indent_level() != new_level and self.in_table:
   990             closelist.append(self.formatter.table(0))
   991             self.in_table = 0
   992 
   993         while self._indent_level() > new_level:
   994             self._close_item(closelist)
   995             if self.list_types[-1] == 'ol':
   996                 tag = self.formatter.number_list(0)
   997             elif self.list_types[-1] == 'dl':
   998                 tag = self.formatter.definition_list(0)
   999             else:
  1000                 tag = self.formatter.bullet_list(0)
  1001             closelist.append(tag)
  1002 
  1003             del self.list_indents[-1]
  1004             del self.list_types[-1]
  1005 
  1006             if self.list_types: # we are still in a list
  1007                 if self.list_types[-1] == 'dl':
  1008                     self.in_dd = 1
  1009                 else:
  1010                     self.in_li = 1
  1011 
  1012         # Open new list, if necessary
  1013         if self._indent_level() < new_level:
  1014             self.list_indents.append(new_level)
  1015             self.list_types.append(list_type)
  1016 
  1017             if self.formatter.in_p:
  1018                 closelist.append(self.formatter.paragraph(0))
  1019 
  1020             if list_type == 'ol':
  1021                 tag = self.formatter.number_list(1, numtype, numstart)
  1022             elif list_type == 'dl':
  1023                 tag = self.formatter.definition_list(1)
  1024             else:
  1025                 tag = self.formatter.bullet_list(1)
  1026             openlist.append(tag)
  1027 
  1028             self.first_list_item = 1
  1029             self.in_li = 0
  1030             self.in_dd = 0
  1031 
  1032         # If list level changes, close an open table
  1033         if self.in_table and (openlist or closelist):
  1034             closelist[0:0] = [self.formatter.table(0)]
  1035             self.in_table = 0
  1036 
  1037         self.in_list = self.list_types != []
  1038         return ''.join(closelist) + ''.join(openlist)
  1039 
  1040     def _undent(self):
  1041         """Close all open lists."""
  1042         result = []
  1043         #result.append("<!-- _undent start -->\n")
  1044         self._close_item(result)
  1045         for type in self.list_types[::-1]:
  1046             if type == 'ol':
  1047                 result.append(self.formatter.number_list(0))
  1048             elif type == 'dl':
  1049                 result.append(self.formatter.definition_list(0))
  1050             else:
  1051                 result.append(self.formatter.bullet_list(0))
  1052         #result.append("<!-- _undent end -->\n")
  1053         self.list_indents = []
  1054         self.list_types = []
  1055         return ''.join(result)
  1056 
  1057     def _getTableAttrs(self, attrdef):
  1058         attr_rule = r'^(\|\|)*<(?!<)(?P<attrs>[^>]*?)>'
  1059         m = re.match(attr_rule, attrdef, re.U)
  1060         if not m:
  1061             return {}, ''
  1062         attrdef = m.group('attrs')
  1063 
  1064         # extension for special table markup
  1065         def table_extension(key, parser, attrs, wiki_parser=self):
  1066             """ returns: tuple (found_flag, msg)
  1067                 found_flag: whether we found something and were able to process it here
  1068                   true for special stuff like 100% or - or #AABBCC
  1069                   false for style xxx="yyy" attributes
  1070                 msg: "" or an error msg
  1071             """
  1072             _ = wiki_parser._
  1073             found = False
  1074             msg = ''
  1075             if key[0] in "0123456789":
  1076                 token = parser.get_token()
  1077                 if token != '%':
  1078                     wanted = '%'
  1079                     msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"') % {
  1080                         'wanted': wanted, 'key': key, 'token': token}
  1081                 else:
  1082                     try:
  1083                         dummy = int(key)
  1084                     except ValueError:
  1085                         msg = _('Expected an integer "%(key)s" before "%(token)s"') % {
  1086                             'key': key, 'token': token}
  1087                     else:
  1088                         found = True
  1089                         attrs['width'] = '"%s%%"' % key
  1090             elif key == '-':
  1091                 arg = parser.get_token()
  1092                 try:
  1093                     dummy = int(arg)
  1094                 except ValueError:
  1095                     msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
  1096                         'arg': arg, 'key': key}
  1097                 else:
  1098                     found = True
  1099                     attrs['colspan'] = '"%s"' % arg
  1100             elif key == '|':
  1101                 arg = parser.get_token()
  1102                 try:
  1103                     dummy = int(arg)
  1104                 except ValueError:
  1105                     msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
  1106                         'arg': arg, 'key': key}
  1107                 else:
  1108                     found = True
  1109                     attrs['rowspan'] = '"%s"' % arg
  1110             elif key == '(':
  1111                 found = True
  1112                 attrs['align'] = '"left"'
  1113             elif key == ':':
  1114                 found = True
  1115                 attrs['align'] = '"center"'
  1116             elif key == ')':
  1117                 found = True
  1118                 attrs['align'] = '"right"'
  1119             elif key == '^':
  1120                 found = True
  1121                 attrs['valign'] = '"top"'
  1122             elif key == 'v':
  1123                 found = True
  1124                 attrs['valign'] = '"bottom"'
  1125             elif key == '#':
  1126                 arg = parser.get_token()
  1127                 try:
  1128                     if len(arg) != 6:
  1129                         raise ValueError
  1130                     dummy = int(arg, 16)
  1131                 except ValueError:
  1132                     msg = _('Expected a color value "%(arg)s" after "%(key)s"') % {
  1133                         'arg': arg, 'key': key}
  1134                 else:
  1135                     found = True
  1136                     attrs['bgcolor'] = '"#%s"' % arg
  1137             return found, self.formatter.rawHTML(msg)
  1138 
  1139         # scan attributes
  1140         attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension)
  1141         if msg:
  1142             msg = '<strong class="highlight">%s</strong>' % msg
  1143         #logging.debug("parseAttributes returned %r" % attr)
  1144         return attr, msg
  1145 
  1146     def _tableZ_repl(self, word, groups):
  1147         """Handle table row end."""
  1148         if self.in_table:
  1149             result = ''
  1150             # REMOVED: check for self.in_li, p should always close
  1151             if self.formatter.in_p:
  1152                 result = self.formatter.paragraph(0)
  1153             result += self.formatter.table_cell(0) + self.formatter.table_row(0)
  1154             return result
  1155         else:
  1156             return self.formatter.text(word)
  1157 
  1158     def _table_repl(self, word, groups):
  1159         """Handle table cell separator."""
  1160         if self.in_table:
  1161             result = []
  1162             # check for attributes
  1163             attrs, attrerr = self._getTableAttrs(word)
  1164 
  1165             # start the table row?
  1166             if self.table_rowstart:
  1167                 self.table_rowstart = 0
  1168                 result.append(self.formatter.table_row(1, attrs))
  1169             else:
  1170                 # Close table cell, first closing open p
  1171                 # REMOVED check for self.in_li, paragraph should close always!
  1172                 if self.formatter.in_p:
  1173                     result.append(self.formatter.paragraph(0))
  1174                 result.append(self.formatter.table_cell(0))
  1175 
  1176             # check for adjacent cell markers
  1177             if word.count("|") > 2:
  1178                 if 'align' not in attrs and \
  1179                    not ('style' in attrs and 'text-align' in attrs['style'].lower()):
  1180                     # add center alignment if we don't have some alignment already
  1181                     attrs['align'] = '"center"'
  1182                 if 'colspan' not in attrs:
  1183                     attrs['colspan'] = '"%d"' % (word.count("|")/2)
  1184 
  1185             # return the complete cell markup
  1186             result.append(self.formatter.table_cell(1, attrs) + attrerr)
  1187             result.append(self._line_anchordef())
  1188             return ''.join(result)
  1189         else:
  1190             return self.formatter.text(word)
  1191 
  1192     def _heading_repl(self, word, groups):
  1193         """Handle section headings."""
  1194         heading_text = groups.get('heading_text', '')
  1195         depth = min(len(groups.get('hmarker')), 5)
  1196         return ''.join([
  1197             self._closeP(),
  1198             self.formatter.heading(1, depth, id=heading_text),
  1199             self.formatter.text(heading_text),
  1200             self.formatter.heading(0, depth),
  1201         ])
  1202     _heading_text_repl = _heading_repl
  1203 
  1204     def _parser_repl(self, word, groups):
  1205         """Handle parsed code displays."""
  1206         self.parser = None
  1207         self.parser_name = None
  1208         self.parser_lines = []
  1209         parser_line = word = groups.get('parser_line', u'')
  1210         parser_name = groups.get('parser_name', None)
  1211         parser_args = groups.get('parser_args', None)
  1212         parser_nothing = groups.get('parser_nothing', None)
  1213         parser_unique = groups.get('parser_unique', u'') or u''
  1214         #logging.debug("_parser_repl: parser_name %r parser_args %r parser_unique %r" % (parser_name, parser_args, parser_unique))
  1215         if set(parser_unique) == set('{'): # just some more {{{{{{
  1216             parser_unique = u'}' * len(parser_unique) # for symmetry cosmetic reasons
  1217         self.parser_unique = parser_unique
  1218         if parser_name is not None:
  1219             # First try to find a parser for this
  1220             if parser_name == u'':
  1221                 # empty bang paths lead to a normal code display
  1222                 # can be used to escape real, non-empty bang paths
  1223                 #logging.debug("_parser_repl: empty bangpath")
  1224                 parser_name = 'text'
  1225                 word = ''
  1226         elif parser_nothing is None:
  1227             # there was something non-whitespace following the {{{
  1228             parser_name = 'text'
  1229 
  1230         self.setParser(parser_name)
  1231         if not self.parser and parser_name:
  1232             # loading the desired parser didn't work, retry a safe option:
  1233             wanted_parser = parser_name
  1234             parser_name = 'text'
  1235             self.setParser(parser_name)
  1236             word = '%s %s (-)' % (wanted_parser, parser_args)  # indication that it did not work
  1237 
  1238         if self.parser:
  1239             self.parser_name = parser_name
  1240             self.in_pre = 'found_parser'
  1241             if word:
  1242                 self.parser_lines.append(word)
  1243         else:
  1244             self.in_pre = 'search_parser'
  1245 
  1246         #logging.debug("_parser_repl: in_pre %r line %d" % (self.in_pre, self.lineno))
  1247         return ''
  1248     _parser_unique_repl = _parser_repl
  1249     _parser_line_repl = _parser_repl
  1250     _parser_name_repl = _parser_repl
  1251     _parser_args_repl = _parser_repl
  1252     _parser_nothing_repl = _parser_repl
  1253 
  1254     def _parser_content(self, line):
  1255         """ handle state and collecting lines for parser in pre/parser sections """
  1256         #logging.debug("parser_content: %r" % line)
  1257         if self.in_pre == 'search_parser' and line.strip():
  1258             # try to find a parser specification
  1259             parser_name = ''
  1260             if line.strip().startswith("#!"):
  1261                 parser_name = line.strip()[2:]
  1262             if parser_name:
  1263                 parser_name = parser_name.split()[0]
  1264             else:
  1265                 parser_name = 'text'
  1266             self.setParser(parser_name)
  1267 
  1268             if not self.parser:
  1269                 parser_name = 'text'
  1270                 self.setParser(parser_name)
  1271 
  1272             if self.parser:
  1273                 self.in_pre = 'found_parser'
  1274                 self.parser_lines.append(line)
  1275                 self.parser_name = parser_name
  1276 
  1277         elif self.in_pre == 'found_parser':
  1278             # collect the content lines
  1279             self.parser_lines.append(line)
  1280 
  1281         return ''  # we emit the content after reaching the end of the parser/pre section
  1282 
  1283     def _parser_end_repl(self, word, groups):
  1284         """ when we reach the end of a parser/pre section,
  1285             we call the parser with the lines we collected
  1286         """
  1287         #if self.in_pre:
  1288         self.in_pre = None
  1289         self.inhibit_p = 0
  1290         #logging.debug("_parser_end_repl: in_pre %r line %d" % (self.in_pre, self.lineno))
  1291         self.request.write(self._closeP())
  1292         if self.parser_name is None:
  1293             # we obviously did not find a parser specification
  1294             self.parser_name = 'text'
  1295         result = self.formatter.parser(self.parser_name, self.parser_lines)
  1296         del self.parser_lines
  1297         self.in_pre = None
  1298         self.parser = None
  1299         return result
  1300 
  1301     def _smiley_repl(self, word, groups):
  1302         """Handle smileys."""
  1303         return self.formatter.smiley(word)
  1304 
  1305     def _comment_repl(self, word, groups):
  1306         # if we are in a paragraph, we must close it so that normal text following
  1307         # in the line below the comment will reopen a new paragraph.
  1308         if self.formatter.in_p:
  1309             self.formatter.paragraph(0)
  1310         self.line_is_empty = 1 # markup following comment lines treats them as if they were empty
  1311         return self.formatter.comment(word)
  1312 
  1313     def _closeP(self):
  1314         if self.formatter.in_p:
  1315             return self.formatter.paragraph(0)
  1316         return ''
  1317 
  1318     def _macro_repl(self, word, groups):
  1319         """Handle macros."""
  1320         macro_name = groups.get('macro_name')
  1321         macro_args = groups.get('macro_args')
  1322         self.inhibit_p = 0 # 1 fixed macros like UserPreferences (in the past, gone now), 0 fixes paragraph formatting for macros
  1323 
  1324         # create macro instance
  1325         if self.macro is None:
  1326             self.macro = macro.Macro(self)
  1327         return self.formatter.macro(self.macro, macro_name, macro_args, markup=groups.get('macro'))
  1328     _macro_name_repl = _macro_repl
  1329     _macro_args_repl = _macro_repl
  1330 
  1331     def scan(self, line, inhibit_p=False):
  1332         """ Scans one line
  1333         Append text before match, invoke replace() with match, and add text after match.
  1334         """
  1335         result = []
  1336         lastpos = 0 # absolute position within line
  1337         line_length = len(line)
  1338 
  1339         ###result.append(u'<span class="info">[scan: <tt>"%s"</tt>]</span>' % line)
  1340         while lastpos <= line_length: # it is <=, not <, because we need to process the empty line also
  1341             parser_scan_re = re.compile(self.parser_scan_rule % re.escape(self.parser_unique), re.VERBOSE|re.UNICODE)
  1342             scan_re = self.in_pre and parser_scan_re or self.scan_re
  1343             match = scan_re.search(line, lastpos)
  1344             if match:
  1345                 start = match.start()
  1346                 if lastpos < start:
  1347                     if self.in_pre:
  1348                         self._parser_content(line[lastpos:start])
  1349                     else:
  1350                         ###result.append(u'<span class="info">[add text before match: <tt>"%s"</tt>]</span>' % line[lastpos:match.start()])
  1351                         if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p):
  1352                             result.append(self.formatter.paragraph(1, css_class="line862"))
  1353                         # add the simple text in between lastpos and beginning of current match
  1354                         result.append(self.formatter.text(line[lastpos:start]))
  1355 
  1356                 # Replace match with markup
  1357                 if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or
  1358                         self.in_table or self.in_list):
  1359                     result.append(self.formatter.paragraph(1, css_class="line867"))
  1360                 result.append(self.replace(match, inhibit_p))
  1361                 end = match.end()
  1362                 lastpos = end
  1363                 if start == end:
  1364                     # we matched an empty string
  1365                     lastpos += 1 # proceed, we don't want to match this again
  1366             else:
  1367                 if self.in_pre:
  1368                     # ilastpos is more then 0 and result of line slice is empty make useless line
  1369                     if not (lastpos > 0 and line[lastpos:] == ''):
  1370                         self._parser_content(line[lastpos:])
  1371                 elif line[lastpos:]:
  1372                     ###result.append('<span class="info">[no match, add rest: <tt>"%s"<tt>]</span>' % line[lastpos:])
  1373                     if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or
  1374                             self.in_li or self.in_dd):
  1375                         result.append(self.formatter.paragraph(1, css_class="line874"))
  1376                     # add the simple text (no markup) after last match
  1377                     result.append(self.formatter.text(line[lastpos:]))
  1378                 break # nothing left to do!
  1379         return u''.join(result)
  1380 
  1381     def _replace(self, match):
  1382         """ Same as replace() but with no magic """
  1383         for name, text in match.groupdict().iteritems():
  1384             if text is not None:
  1385                 # Get replace method and replace text
  1386                 replace_func = getattr(self, '_%s_repl' % name)
  1387                 result = replace_func(text, match.groupdict())
  1388                 return result
  1389 
  1390     def replace(self, match, inhibit_p=False):
  1391         """ Replace match using type name """
  1392         result = []
  1393         for type, hit in match.groupdict().items():
  1394             if hit is not None and not type in ["hmarker", ]:
  1395 
  1396                 ##result.append(u'<span class="info">[replace: %s: "%s"]</span>' % (type, hit))
  1397                 # Open p for certain types
  1398                 if not (inhibit_p or self.inhibit_p or self.formatter.in_p
  1399                         or self.in_pre or (type in self.no_new_p_before)):
  1400                     result.append(self.formatter.paragraph(1, css_class="line891"))
  1401 
  1402                 # Get replace method and replace hit
  1403                 replace_func = getattr(self, '_%s_repl' % type)
  1404                 result.append(replace_func(hit, match.groupdict()))
  1405                 return ''.join(result)
  1406         else:
  1407             # We should never get here
  1408             import pprint
  1409             raise Exception("Can't handle match %r\n%s\n%s" % (
  1410                 match,
  1411                 pprint.pformat(match.groupdict()),
  1412                 pprint.pformat(match.groups()),
  1413             ))
  1414 
  1415         return ""
  1416 
  1417     def _line_anchordef(self):
  1418         if self.line_anchors and not self.line_anchor_printed:
  1419             self.line_anchor_printed = 1
  1420             return self.formatter.line_anchordef(self.lineno)
  1421         else:
  1422             return ''
  1423 
  1424     def format(self, formatter, inhibit_p=False):
  1425         """ For each line, scan through looking for magic
  1426             strings, outputting verbatim any intervening text.
  1427         """
  1428         self.formatter = formatter
  1429         self.hilite_re = self.formatter.page.hilite_re
  1430 
  1431         # get text and replace TABs
  1432         rawtext = self.raw.expandtabs()
  1433 
  1434         # go through the lines
  1435         self.lineno = self.start_line
  1436         self.lines = self.eol_re.split(rawtext)
  1437         self.line_is_empty = 0
  1438 
  1439         self.in_processing_instructions = 1
  1440 
  1441         if self.wrapping_div_class:
  1442             self.request.write(self.formatter.div(1, css_class=self.wrapping_div_class))
  1443 
  1444         # Main loop
  1445         for line in self.lines:
  1446             self.lineno += 1
  1447 
  1448             self.line_anchor_printed = 0
  1449             if not self.in_table:
  1450                 self.request.write(self._line_anchordef())
  1451             self.table_rowstart = 1
  1452             self.line_was_empty = self.line_is_empty
  1453             self.line_is_empty = 0
  1454             self.first_list_item = 0
  1455             self.inhibit_p = 0
  1456 
  1457             # ignore processing instructions
  1458             if self.in_processing_instructions:
  1459                 found = False
  1460                 for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
  1461                            "#pragma", "#form", "#acl", "#language"):
  1462                     if line.lower().startswith(pi):
  1463                         self.request.write(self.formatter.comment(line))
  1464                         found = True
  1465                         break
  1466                 if not found:
  1467                     self.in_processing_instructions = 0
  1468                 else:
  1469                     continue # do not parse this line
  1470 
  1471             if not self.in_pre:
  1472                 # we don't have \n as whitespace any more
  1473                 # This is the space between lines we join to one paragraph
  1474                 line += ' '
  1475 
  1476                 # Paragraph break on empty lines
  1477                 if not line.strip():
  1478                     if self.in_table:
  1479                         self.request.write(self.formatter.table(0))
  1480                         self.request.write(self._line_anchordef())
  1481                         self.in_table = 0
  1482                     # CHANGE: removed check for not self.list_types
  1483                     # p should close on every empty line
  1484                     if self.formatter.in_p:
  1485                         self.request.write(self.formatter.paragraph(0))
  1486                     self.line_is_empty = 1
  1487                     continue
  1488 
  1489                 # Check indent level
  1490                 indent = self.indent_re.match(line)
  1491                 indlen = len(indent.group(0))
  1492                 indtype = "ul"
  1493                 numtype = None
  1494                 numstart = None
  1495                 if indlen:
  1496                     match = self.ol_re.match(line)
  1497                     if match:
  1498                         numtype, numstart = match.group(0).strip().split('.')
  1499                         numtype = numtype[0]
  1500 
  1501                         if numstart and numstart[0] == "#":
  1502                             numstart = int(numstart[1:])
  1503                         else:
  1504                             numstart = None
  1505 
  1506                         indtype = "ol"
  1507                     else:
  1508                         match = self.dl_re.match(line)
  1509                         if match:
  1510                             indtype = "dl"
  1511 
  1512                 # output proper indentation tags
  1513                 self.request.write(self._indent_to(indlen, indtype, numtype, numstart))
  1514 
  1515                 # Table mode
  1516                 # TODO: move into function?
  1517                 if (not self.in_table and line[indlen:indlen + 2] == "||"
  1518                     and line.endswith("|| ") and len(line) >= 5 + indlen):
  1519                     # Start table
  1520                     if self.list_types and not self.in_li:
  1521                         self.request.write(self.formatter.listitem(1, style="list-style-type:none"))
  1522                         ## CHANGE: no automatic p on li
  1523                         ##self.request.write(self.formatter.paragraph(1))
  1524                         self.in_li = 1
  1525 
  1526                     # CHANGE: removed check for self.in_li
  1527                     # paragraph should end before table, always!
  1528                     if self.formatter.in_p:
  1529                         self.request.write(self.formatter.paragraph(0))
  1530                     attrs, attrerr = self._getTableAttrs(line[indlen+2:])
  1531                     self.request.write(self.formatter.table(1, attrs) + attrerr)
  1532                     self.in_table = True # self.lineno
  1533                 elif (self.in_table and not
  1534                       # intra-table comments should not break a table
  1535                       (line.startswith("##") or
  1536                        line[indlen:indlen + 2] == "||" and
  1537                        line.endswith("|| ") and
  1538                        len(line) >= 5 + indlen)):
  1539 
  1540                     # Close table
  1541                     self.request.write(self.formatter.table(0))
  1542                     self.request.write(self._line_anchordef())
  1543                     self.in_table = 0
  1544 
  1545             # Scan line, format and write
  1546             formatted_line = self.scan(line, inhibit_p=inhibit_p)
  1547             self.request.write(formatted_line)
  1548 
  1549 
  1550         # Close code displays, paragraphs, tables and open lists
  1551         self.request.write(self._undent())
  1552         if self.in_pre: self.request.write(self.formatter.preformatted(0))
  1553         if self.formatter.in_p: self.request.write(self.formatter.paragraph(0))
  1554         if self.in_table: self.request.write(self.formatter.table(0))
  1555 
  1556         if self.wrapping_div_class:
  1557             self.request.write(self.formatter.div(0))
  1558 
  1559 
  1560     # Private helpers ------------------------------------------------------------
  1561 
  1562     def setParser(self, name):
  1563         """ Set parser to parser named 'name' """
  1564         # XXX this is done by the formatter as well
  1565         try:
  1566             self.parser = wikiutil.searchAndImportPlugin(self.request.cfg, "parser", name)
  1567         except wikiutil.PluginMissingError:
  1568             self.parser = None
  1569 
  1570 del _