MoinMoin/script/migration/text_moin158_wiki.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Wed, 11 Feb 2009 02:34:33 +0100
changeset 4569 3caaa8c74c41
parent 4363 817d99d715fe
permissions -rw-r--r--
wikiutil: replace moin's cgi/urllib wrappers by calls to werkzeug.utils code
     1 # -*- coding: iso-8859-1 -*-
     2 """
     3     MoinMoin - MoinMoin Wiki Markup Parser
     4 
     5     @copyright: 2000, 2001, 2002 by Jürgen Hermann <jh@web.de>
     6     @license: GNU GPL, see COPYING for details.
     7 """
     8 
     9 import os, re
    10 from MoinMoin import config, wikiutil
    11 from MoinMoin import macro as wikimacro
    12 from MoinMoin.Page import Page
    13 from MoinMoin.util import web
    14 
    15 Dependencies = []
    16 
    17 class Parser:
    18     """
    19         Object that turns Wiki markup into HTML.
    20 
    21         All formatting commands can be parsed one line at a time, though
    22         some state is carried over between lines.
    23 
    24         Methods named like _*_repl() are responsible to handle the named regex
    25         patterns defined in print_html().
    26     """
    27 
    28     # allow caching
    29     caching = 1
    30     Dependencies = []
    31 
    32     # some common strings
    33     PARENT_PREFIX = wikiutil.PARENT_PREFIX
    34     punct_pattern = re.escape(u'''"\'}]|:,.)?!''')
    35     attachment_schemas = ["attachment", "inline", "drawing", ]
    36     url_schemas = ['http', 'https', 'ftp', 'wiki', 'mailto', 'nntp', 'news',
    37                    'telnet', 'file', 'irc', 'ircs',
    38                    'webcal', 'ed2k', 'xmpp', 'rootz',
    39                   ]
    40     url_pattern = u'|'.join(url_schemas + attachment_schemas)
    41 
    42     # some common rules
    43     word_rule = ur'(?:(?<![%(u)s%(l)s])|^)%(parent)s(?:%(subpages)s(?:[%(u)s][%(l)s]+){2,})+(?![%(u)s%(l)s]+)' % {
    44         'u': config.chars_upper,
    45         'l': config.chars_lower,
    46         'subpages': wikiutil.CHILD_PREFIX + '?',
    47         'parent': ur'(?:%s)?' % re.escape(PARENT_PREFIX),
    48     }
    49     url_rule = ur'%(url_guard)s(%(url)s)\:([^\s\<%(punct)s]|([%(punct)s][^\s\<%(punct)s]))+' % {
    50         'url_guard': u'(^|(?<!\w))',
    51         'url': url_pattern,
    52         'punct': punct_pattern,
    53     }
    54 
    55     ol_rule = ur"^\s+(?:[0-9]+|[aAiI])\.(?:#\d+)?\s"
    56     dl_rule = ur"^\s+.*?::\s"
    57 
    58     config_smileys = dict([(key, None) for key in config.smileys])
    59 
    60     # the big, fat, ugly one ;)
    61     formatting_rules = ur"""(?P<ent_numeric>&#(\d{1,5}|x[0-9a-fA-F]+);)
    62 (?:(?P<emph_ibb>'''''(?=[^']+'''))
    63 (?P<emph_ibi>'''''(?=[^']+''))
    64 (?P<emph_ib_or_bi>'{5}(?=[^']))
    65 (?P<emph>'{2,3})
    66 (?P<u>__)
    67 (?P<sup>\^.*?\^)
    68 (?P<sub>,,[^,]{1,40},,)
    69 (?P<tt>\{\{\{.*?\}\}\})
    70 (?P<processor>(\{\{\{(#!.*|\s*$)))
    71 (?P<pre>(\{\{\{ ?|\}\}\}))
    72 (?P<small>(\~- ?|-\~))
    73 (?P<big>(\~\+ ?|\+\~))
    74 (?P<strike>(--\(|\)--))
    75 (?P<rule>-{4,})
    76 (?P<comment>^\#\#.*$)
    77 (?P<macro>\[\[(%%(macronames)s)(?:\(.*?\))?\]\]))
    78 (?P<ol>%(ol_rule)s)
    79 (?P<dl>%(dl_rule)s)
    80 (?P<li>^\s+\*\s*)
    81 (?P<li_none>^\s+\.\s*)
    82 (?P<indent>^\s+)
    83 (?P<tableZ>\|\| $)
    84 (?P<table>(?:\|\|)+(?:<[^>]*?>)?(?!\|? $))
    85 (?P<heading>^\s*(?P<hmarker>=+)\s.*\s(?P=hmarker) $)
    86 (?P<interwiki>[A-Z][a-zA-Z]+\:[^\s'\"\:\<\|]([^\s%(punct)s]|([%(punct)s][^\s%(punct)s]))+)
    87 (?P<word>%(word_rule)s)
    88 (?P<url_bracket>\[((%(url)s)\:|#|\:)[^\s\]]+(\s[^\]]+)?\])
    89 (?P<url>%(url_rule)s)
    90 (?P<email>[-\w._+]+\@[\w-]+(\.[\w-]+)+)
    91 (?P<smiley>(?<=\s)(%(smiley)s)(?=\s))
    92 (?P<smileyA>^(%(smiley)s)(?=\s))
    93 (?P<ent_symbolic>&\w+;)
    94 (?P<ent>[<>&])
    95 (?P<wikiname_bracket>\[".*?"\])
    96 (?P<tt_bt>`.*?`)"""  % {
    97 
    98         'url': url_pattern,
    99         'punct': punct_pattern,
   100         'ol_rule': ol_rule,
   101         'dl_rule': dl_rule,
   102         'url_rule': url_rule,
   103         'word_rule': word_rule,
   104         'smiley': u'|'.join(map(re.escape, config_smileys.keys()))}
   105 
   106     # Don't start p before these
   107     no_new_p_before = ("heading rule table tableZ tr td "
   108                        "ul ol dl dt dd li li_none indent "
   109                        "macro processor pre")
   110     no_new_p_before = no_new_p_before.split()
   111     no_new_p_before = dict(zip(no_new_p_before, [1] * len(no_new_p_before)))
   112 
   113     def __init__(self, raw, request, **kw):
   114         self.raw = raw
   115         self.request = request
   116         self.form = request.form
   117         self._ = request.getText
   118         self.cfg = request.cfg
   119         self.line_anchors = kw.get('line_anchors', True)
   120         self.macro = None
   121         self.start_line = kw.get('start_line', 0)
   122 
   123         self.is_em = 0
   124         self.is_b = 0
   125         self.is_u = 0
   126         self.is_strike = 0
   127         self.lineno = 0
   128         self.in_list = 0 # between <ul/ol/dl> and </ul/ol/dl>
   129         self.in_li = 0 # between <li> and </li>
   130         self.in_dd = 0 # between <dd> and </dd>
   131         self.in_pre = 0
   132         self.in_table = 0
   133         self.is_big = False
   134         self.is_small = False
   135         self.inhibit_p = 0 # if set, do not auto-create a <p>aragraph
   136         self.titles = request._page_headings
   137 
   138         # holds the nesting level (in chars) of open lists
   139         self.list_indents = []
   140         self.list_types = []
   141 
   142         self.formatting_rules = self.formatting_rules % {'macronames': u'|'.join(wikimacro.getNames(self.cfg))}
   143 
   144     def _close_item(self, result):
   145         #result.append("<!-- close item begin -->\n")
   146         if self.in_table:
   147             result.append(self.formatter.table(0))
   148             self.in_table = 0
   149         if self.in_li:
   150             self.in_li = 0
   151             if self.formatter.in_p:
   152                 result.append(self.formatter.paragraph(0))
   153             result.append(self.formatter.listitem(0))
   154         if self.in_dd:
   155             self.in_dd = 0
   156             if self.formatter.in_p:
   157                 result.append(self.formatter.paragraph(0))
   158             result.append(self.formatter.definition_desc(0))
   159         #result.append("<!-- close item end -->\n")
   160 
   161 
   162     def interwiki(self, url_and_text, **kw):
   163         # TODO: maybe support [wiki:Page http://wherever/image.png] ?
   164         if len(url_and_text) == 1:
   165             url = url_and_text[0]
   166             text = None
   167         else:
   168             url, text = url_and_text
   169 
   170         # keep track of whether this is a self-reference, so links
   171         # are always shown even the page doesn't exist.
   172         is_self_reference = 0
   173         url2 = url.lower()
   174         if url2.startswith('wiki:self:'):
   175             url = url[10:] # remove "wiki:self:"
   176             is_self_reference = 1
   177         elif url2.startswith('wiki:'):
   178             url = url[5:] # remove "wiki:"
   179 
   180         tag, tail = wikiutil.split_wiki(url)
   181         if text is None:
   182             if tag:
   183                 text = tail
   184             else:
   185                 text = url
   186                 url = ""
   187         elif (url.startswith(wikiutil.CHILD_PREFIX) or # fancy link to subpage [wiki:/SubPage text]
   188               is_self_reference or # [wiki:Self:LocalPage text] or [:LocalPage:text]
   189               Page(self.request, url).exists()): # fancy link to local page [wiki:LocalPage text]
   190             return self._word_repl(url, text)
   191 
   192         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, url)
   193         href = wikiutil.join_wiki(wikiurl, wikitail)
   194 
   195         # check for image URL, and possibly return IMG tag
   196         if not kw.get('pretty_url', 0) and wikiutil.isPicture(wikitail):
   197             return self.formatter.image(src=href)
   198 
   199         # link to self?
   200         if wikitag is None:
   201             return self._word_repl(wikitail)
   202 
   203         return (self.formatter.interwikilink(1, tag, tail) +
   204                 self.formatter.text(text) +
   205                 self.formatter.interwikilink(0, tag, tail))
   206 
   207     def attachment(self, url_and_text, **kw):
   208         """ This gets called on attachment URLs.
   209         """
   210         _ = self._
   211         if len(url_and_text) == 1:
   212             url = url_and_text[0]
   213             text = None
   214         else:
   215             url, text = url_and_text
   216 
   217         inline = url[0] == 'i'
   218         drawing = url[0] == 'd'
   219         url = url.split(":", 1)[1]
   220         url = wikiutil.url_unquote(url)
   221         text = text or url
   222 
   223         from MoinMoin.action import AttachFile
   224         if drawing:
   225             return self.formatter.attachment_drawing(url, text)
   226 
   227         # check for image URL, and possibly return IMG tag
   228         # (images are always inlined, just like for other URLs)
   229         if not kw.get('pretty_url', 0) and wikiutil.isPicture(url):
   230             return self.formatter.attachment_image(url)
   231 
   232         # inline the attachment
   233         if inline:
   234             return self.formatter.attachment_inlined(url, text)
   235 
   236         return self.formatter.attachment_link(url, text)
   237 
   238     def _u_repl(self, word):
   239         """Handle underline."""
   240         self.is_u = not self.is_u
   241         return self.formatter.underline(self.is_u)
   242 
   243     def _strike_repl(self, word):
   244         """Handle strikethrough."""
   245         # XXX we don't really enforce the correct sequence --( ... )-- here
   246         self.is_strike = not self.is_strike
   247         return self.formatter.strike(self.is_strike)
   248 
   249     def _small_repl(self, word):
   250         """Handle small."""
   251         if word.strip() == '~-' and self.is_small:
   252             return self.formatter.text(word)
   253         if word.strip() == '-~' and not self.is_small:
   254             return self.formatter.text(word)
   255         self.is_small = not self.is_small
   256         return self.formatter.small(self.is_small)
   257 
   258     def _big_repl(self, word):
   259         """Handle big."""
   260         if word.strip() == '~+' and self.is_big:
   261             return self.formatter.text(word)
   262         if word.strip() == '+~' and not self.is_big:
   263             return self.formatter.text(word)
   264         self.is_big = not self.is_big
   265         return self.formatter.big(self.is_big)
   266 
   267     def _emph_repl(self, word):
   268         """Handle emphasis, i.e. '' and '''."""
   269         ##print "#", self.is_b, self.is_em, "#"
   270         if len(word) == 3:
   271             self.is_b = not self.is_b
   272             if self.is_em and self.is_b:
   273                 self.is_b = 2
   274             return self.formatter.strong(self.is_b)
   275         else:
   276             self.is_em = not self.is_em
   277             if self.is_em and self.is_b:
   278                 self.is_em = 2
   279             return self.formatter.emphasis(self.is_em)
   280 
   281     def _emph_ibb_repl(self, word):
   282         """Handle mixed emphasis, i.e. ''''' followed by '''."""
   283         self.is_b = not self.is_b
   284         self.is_em = not self.is_em
   285         if self.is_em and self.is_b:
   286             self.is_b = 2
   287         return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
   288 
   289     def _emph_ibi_repl(self, word):
   290         """Handle mixed emphasis, i.e. ''''' followed by ''."""
   291         self.is_b = not self.is_b
   292         self.is_em = not self.is_em
   293         if self.is_em and self.is_b:
   294             self.is_em = 2
   295         return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
   296 
   297     def _emph_ib_or_bi_repl(self, word):
   298         """Handle mixed emphasis, exactly five '''''."""
   299         ##print "*", self.is_b, self.is_em, "*"
   300         b_before_em = self.is_b > self.is_em > 0
   301         self.is_b = not self.is_b
   302         self.is_em = not self.is_em
   303         if b_before_em:
   304             return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em)
   305         else:
   306             return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b)
   307 
   308 
   309     def _sup_repl(self, word):
   310         """Handle superscript."""
   311         return self.formatter.sup(1) + \
   312             self.formatter.text(word[1:-1]) + \
   313             self.formatter.sup(0)
   314 
   315     def _sub_repl(self, word):
   316         """Handle subscript."""
   317         return self.formatter.sub(1) + \
   318             self.formatter.text(word[2:-2]) + \
   319             self.formatter.sub(0)
   320 
   321 
   322     def _rule_repl(self, word):
   323         """Handle sequences of dashes."""
   324         result = self._undent() + self._closeP()
   325         if len(word) <= 4:
   326             result = result + self.formatter.rule()
   327         else:
   328             # Create variable rule size 1 - 6. Actual size defined in css.
   329             size = min(len(word), 10) - 4
   330             result = result + self.formatter.rule(size)
   331         return result
   332 
   333 
   334     def _word_repl(self, word, text=None):
   335         """Handle WikiNames."""
   336 
   337         # check for parent links
   338         # !!! should use wikiutil.AbsPageName here, but setting `text`
   339         # correctly prevents us from doing this for now
   340         if word.startswith(wikiutil.PARENT_PREFIX):
   341             if not text:
   342                 text = word
   343             word = '/'.join(filter(None, self.formatter.page.page_name.split('/')[:-1] + [word[wikiutil.PARENT_PREFIX_LEN:]]))
   344 
   345         if not text:
   346             # if a simple, self-referencing link, emit it as plain text
   347             if word == self.formatter.page.page_name:
   348                 return self.formatter.text(word)
   349             text = word
   350         if word.startswith(wikiutil.CHILD_PREFIX):
   351             word = self.formatter.page.page_name + '/' + word[wikiutil.CHILD_PREFIX_LEN:]
   352 
   353         # handle anchors
   354         parts = word.split("#", 1)
   355         anchor = ""
   356         if len(parts) == 2:
   357             word, anchor = parts
   358 
   359         return (self.formatter.pagelink(1, word, anchor=anchor) +
   360                 self.formatter.text(text) +
   361                 self.formatter.pagelink(0, word))
   362 
   363     def _notword_repl(self, word):
   364         """Handle !NotWikiNames."""
   365         return self.formatter.nowikiword(word[1:])
   366 
   367     def _interwiki_repl(self, word):
   368         """Handle InterWiki links."""
   369         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word)
   370         if wikitag_bad:
   371             return self.formatter.text(word)
   372         else:
   373             return self.interwiki(["wiki:" + word])
   374 
   375 
   376     def _url_repl(self, word):
   377         """Handle literal URLs including inline images."""
   378         scheme = word.split(":", 1)[0]
   379 
   380         if scheme == "wiki":
   381             return self.interwiki([word])
   382         if scheme in self.attachment_schemas:
   383             return self.attachment([word])
   384 
   385         if wikiutil.isPicture(word):
   386             word = wikiutil.mapURL(self.request, word)
   387             # Get image name http://here.com/dir/image.gif -> image
   388             name = word.split('/')[-1]
   389             name = ''.join(name.split('.')[:-1])
   390             return self.formatter.image(src=word, alt=name)
   391         else:
   392             return (self.formatter.url(1, word, css=scheme) +
   393                     self.formatter.text(word) +
   394                     self.formatter.url(0))
   395 
   396 
   397     def _wikiname_bracket_repl(self, word):
   398         """Handle special-char wikinames."""
   399         wikiname = word[2:-2]
   400         if wikiname:
   401             return self._word_repl(wikiname)
   402         else:
   403             return self.formatter.text(word)
   404 
   405 
   406     def _url_bracket_repl(self, word):
   407         """Handle bracketed URLs."""
   408 
   409         # Local extended link?
   410         if word[1] == ':':
   411             words = word[2:-1].split(':', 1)
   412             if len(words) == 1:
   413                 words = words * 2
   414             words[0] = 'wiki:Self:%s' % words[0]
   415             return self.interwiki(words, pretty_url=1)
   416             #return self._word_repl(words[0], words[1])
   417 
   418         # Traditional split on space
   419         words = word[1:-1].split(None, 1)
   420         if len(words) == 1:
   421             words = words * 2
   422 
   423         if words[0][0] == '#':
   424             # anchor link
   425             return (self.formatter.url(1, words[0]) +
   426                     self.formatter.text(words[1]) +
   427                     self.formatter.url(0))
   428 
   429         scheme = words[0].split(":", 1)[0]
   430         if scheme == "wiki":
   431             return self.interwiki(words, pretty_url=1)
   432         if scheme in self.attachment_schemas:
   433             return self.attachment(words, pretty_url=1)
   434 
   435         if wikiutil.isPicture(words[1]) and re.match(self.url_rule, words[1]):
   436             return (self.formatter.url(1, words[0], css='external') +
   437                     self.formatter.image(title=words[0], alt=words[0], src=words[1]) +
   438                     self.formatter.url(0))
   439         else:
   440             return (self.formatter.url(1, words[0], css=scheme) +
   441                     self.formatter.text(words[1]) +
   442                     self.formatter.url(0))
   443 
   444 
   445     def _email_repl(self, word):
   446         """Handle email addresses (without a leading mailto:)."""
   447         return (self.formatter.url(1, "mailto:" + word, css='mailto') +
   448                 self.formatter.text(word) +
   449                 self.formatter.url(0))
   450 
   451 
   452     def _ent_repl(self, word):
   453         """Handle SGML entities."""
   454         return self.formatter.text(word)
   455         #return {'&': '&amp;',
   456         #        '<': '&lt;',
   457         #        '>': '&gt;'}[word]
   458 
   459     def _ent_numeric_repl(self, word):
   460         """Handle numeric (decimal and hexadecimal) SGML entities."""
   461         return self.formatter.rawHTML(word)
   462 
   463     def _ent_symbolic_repl(self, word):
   464         """Handle symbolic SGML entities."""
   465         return self.formatter.rawHTML(word)
   466 
   467     def _indent_repl(self, match):
   468         """Handle pure indentation (no - * 1. markup)."""
   469         result = []
   470         if not (self.in_li or self.in_dd):
   471             self._close_item(result)
   472             self.in_li = 1
   473             css_class = None
   474             if self.line_was_empty and not self.first_list_item:
   475                 css_class = 'gap'
   476             result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
   477         return ''.join(result)
   478 
   479     def _li_none_repl(self, match):
   480         """Handle type=none (" .") lists."""
   481         result = []
   482         self._close_item(result)
   483         self.in_li = 1
   484         css_class = None
   485         if self.line_was_empty and not self.first_list_item:
   486             css_class = 'gap'
   487         result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none"))
   488         return ''.join(result)
   489 
   490     def _li_repl(self, match):
   491         """Handle bullet (" *") lists."""
   492         result = []
   493         self._close_item(result)
   494         self.in_li = 1
   495         css_class = None
   496         if self.line_was_empty and not self.first_list_item:
   497             css_class = 'gap'
   498         result.append(self.formatter.listitem(1, css_class=css_class))
   499         return ''.join(result)
   500 
   501     def _ol_repl(self, match):
   502         """Handle numbered lists."""
   503         return self._li_repl(match)
   504 
   505     def _dl_repl(self, match):
   506         """Handle definition lists."""
   507         result = []
   508         self._close_item(result)
   509         self.in_dd = 1
   510         result.extend([
   511             self.formatter.definition_term(1),
   512             self.formatter.text(match[1:-3].lstrip(' ')),
   513             self.formatter.definition_term(0),
   514             self.formatter.definition_desc(1),
   515         ])
   516         return ''.join(result)
   517 
   518 
   519     def _indent_level(self):
   520         """Return current char-wise indent level."""
   521         return len(self.list_indents) and self.list_indents[-1]
   522 
   523 
   524     def _indent_to(self, new_level, list_type, numtype, numstart):
   525         """Close and open lists."""
   526         open = []   # don't make one out of these two statements!
   527         close = []
   528 
   529         if self._indent_level() != new_level and self.in_table:
   530             close.append(self.formatter.table(0))
   531             self.in_table = 0
   532 
   533         while self._indent_level() > new_level:
   534             self._close_item(close)
   535             if self.list_types[-1] == 'ol':
   536                 tag = self.formatter.number_list(0)
   537             elif self.list_types[-1] == 'dl':
   538                 tag = self.formatter.definition_list(0)
   539             else:
   540                 tag = self.formatter.bullet_list(0)
   541             close.append(tag)
   542 
   543             del self.list_indents[-1]
   544             del self.list_types[-1]
   545 
   546             if self.list_types: # we are still in a list
   547                 if self.list_types[-1] == 'dl':
   548                     self.in_dd = 1
   549                 else:
   550                     self.in_li = 1
   551 
   552         # Open new list, if necessary
   553         if self._indent_level() < new_level:
   554             self.list_indents.append(new_level)
   555             self.list_types.append(list_type)
   556 
   557             if self.formatter.in_p:
   558                 close.append(self.formatter.paragraph(0))
   559 
   560             if list_type == 'ol':
   561                 tag = self.formatter.number_list(1, numtype, numstart)
   562             elif list_type == 'dl':
   563                 tag = self.formatter.definition_list(1)
   564             else:
   565                 tag = self.formatter.bullet_list(1)
   566             open.append(tag)
   567 
   568             self.first_list_item = 1
   569             self.in_li = 0
   570             self.in_dd = 0
   571 
   572         # If list level changes, close an open table
   573         if self.in_table and (open or close):
   574             close[0:0] = [self.formatter.table(0)]
   575             self.in_table = 0
   576 
   577         self.in_list = self.list_types != []
   578         return ''.join(close) + ''.join(open)
   579 
   580 
   581     def _undent(self):
   582         """Close all open lists."""
   583         result = []
   584         #result.append("<!-- _undent start -->\n")
   585         self._close_item(result)
   586         for type in self.list_types[::-1]:
   587             if type == 'ol':
   588                 result.append(self.formatter.number_list(0))
   589             elif type == 'dl':
   590                 result.append(self.formatter.definition_list(0))
   591             else:
   592                 result.append(self.formatter.bullet_list(0))
   593         #result.append("<!-- _undent end -->\n")
   594         self.list_indents = []
   595         self.list_types = []
   596         return ''.join(result)
   597 
   598 
   599     def _tt_repl(self, word):
   600         """Handle inline code."""
   601         return self.formatter.code(1) + \
   602             self.formatter.text(word[3:-3]) + \
   603             self.formatter.code(0)
   604 
   605 
   606     def _tt_bt_repl(self, word):
   607         """Handle backticked inline code."""
   608         # if len(word) == 2: return "" // removed for FCK editor
   609         return self.formatter.code(1, css="backtick") + \
   610             self.formatter.text(word[1:-1]) + \
   611             self.formatter.code(0)
   612 
   613 
   614     def _getTableAttrs(self, attrdef):
   615         # skip "|" and initial "<"
   616         while attrdef and attrdef[0] == "|":
   617             attrdef = attrdef[1:]
   618         if not attrdef or attrdef[0] != "<":
   619             return {}, ''
   620         attrdef = attrdef[1:]
   621 
   622         # extension for special table markup
   623         def table_extension(key, parser, attrs, wiki_parser=self):
   624             """ returns: tuple (found_flag, msg)
   625                 found_flag: whether we found something and were able to process it here
   626                   true for special stuff like 100% or - or #AABBCC
   627                   false for style xxx="yyy" attributes
   628                 msg: "" or an error msg
   629             """
   630             _ = wiki_parser._
   631             found = False
   632             msg = ''
   633             if key[0] in "0123456789":
   634                 token = parser.get_token()
   635                 if token != '%':
   636                     wanted = '%'
   637                     msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"') % {
   638                         'wanted': wanted, 'key': key, 'token': token}
   639                 else:
   640                     try:
   641                         dummy = int(key)
   642                     except ValueError:
   643                         msg = _('Expected an integer "%(key)s" before "%(token)s"') % {
   644                             'key': key, 'token': token}
   645                     else:
   646                         found = True
   647                         attrs['width'] = '"%s%%"' % key
   648             elif key == '-':
   649                 arg = parser.get_token()
   650                 try:
   651                     dummy = int(arg)
   652                 except ValueError:
   653                     msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
   654                         'arg': arg, 'key': key}
   655                 else:
   656                     found = True
   657                     attrs['colspan'] = '"%s"' % arg
   658             elif key == '|':
   659                 arg = parser.get_token()
   660                 try:
   661                     dummy = int(arg)
   662                 except ValueError:
   663                     msg = _('Expected an integer "%(arg)s" after "%(key)s"') % {
   664                         'arg': arg, 'key': key}
   665                 else:
   666                     found = True
   667                     attrs['rowspan'] = '"%s"' % arg
   668             elif key == '(':
   669                 found = True
   670                 attrs['align'] = '"left"'
   671             elif key == ':':
   672                 found = True
   673                 attrs['align'] = '"center"'
   674             elif key == ')':
   675                 found = True
   676                 attrs['align'] = '"right"'
   677             elif key == '^':
   678                 found = True
   679                 attrs['valign'] = '"top"'
   680             elif key == 'v':
   681                 found = True
   682                 attrs['valign'] = '"bottom"'
   683             elif key == '#':
   684                 arg = parser.get_token()
   685                 try:
   686                     if len(arg) != 6: raise ValueError
   687                     dummy = int(arg, 16)
   688                 except ValueError:
   689                     msg = _('Expected a color value "%(arg)s" after "%(key)s"') % {
   690                         'arg': arg, 'key': key}
   691                 else:
   692                     found = True
   693                     attrs['bgcolor'] = '"#%s"' % arg
   694             return found, self.formatter.rawHTML(msg)
   695 
   696         # scan attributes
   697         attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension)
   698         if msg:
   699             msg = '<strong class="highlight">%s</strong>' % msg
   700         return attr, msg
   701 
   702     def _tableZ_repl(self, word):
   703         """Handle table row end."""
   704         if self.in_table:
   705             result = ''
   706             # REMOVED: check for self.in_li, p should always close
   707             if self.formatter.in_p:
   708                 result = self.formatter.paragraph(0)
   709             result += self.formatter.table_cell(0) + self.formatter.table_row(0)
   710             return result
   711         else:
   712             return self.formatter.text(word)
   713 
   714     def _table_repl(self, word):
   715         """Handle table cell separator."""
   716         if self.in_table:
   717             result = []
   718             # check for attributes
   719             attrs, attrerr = self._getTableAttrs(word)
   720 
   721             # start the table row?
   722             if self.table_rowstart:
   723                 self.table_rowstart = 0
   724                 result.append(self.formatter.table_row(1, attrs))
   725             else:
   726                 # Close table cell, first closing open p
   727                 # REMOVED check for self.in_li, paragraph should close always!
   728                 if self.formatter.in_p:
   729                     result.append(self.formatter.paragraph(0))
   730                 result.append(self.formatter.table_cell(0))
   731 
   732             # check for adjacent cell markers
   733             if word.count("|") > 2:
   734                 if not attrs.has_key('align') and \
   735                    not (attrs.has_key('style') and 'text-align' in attrs['style'].lower()):
   736                     # add center alignment if we don't have some alignment already
   737                     attrs['align'] = '"center"'
   738                 if not attrs.has_key('colspan'):
   739                     attrs['colspan'] = '"%d"' % (word.count("|")/2)
   740 
   741             # return the complete cell markup
   742             result.append(self.formatter.table_cell(1, attrs) + attrerr)
   743             result.append(self._line_anchordef())
   744             return ''.join(result)
   745         else:
   746             return self.formatter.text(word)
   747 
   748 
   749     def _heading_repl(self, word):
   750         """Handle section headings."""
   751         from MoinMoin.support.python_compatibility import hash_new
   752 
   753         h = word.strip()
   754         level = 1
   755         while h[level:level+1] == '=':
   756             level += 1
   757         depth = min(5, level)
   758 
   759         # this is needed for Included pages
   760         # TODO but it might still result in unpredictable results
   761         # when included the same page multiple times
   762         title_text = h[level:-level].strip()
   763         pntt = self.formatter.page.page_name + title_text
   764         self.titles.setdefault(pntt, 0)
   765         self.titles[pntt] += 1
   766 
   767         unique_id = ''
   768         if self.titles[pntt] > 1:
   769             unique_id = '-%d' % self.titles[pntt]
   770         result = self._closeP()
   771         result += self.formatter.heading(1, depth, id="head-"+hash_new('sha1', pntt.encode(config.charset)).hexdigest()+unique_id)
   772 
   773         return (result + self.formatter.text(title_text) +
   774                 self.formatter.heading(0, depth))
   775 
   776     def _processor_repl(self, word):
   777         """Handle processed code displays."""
   778         if word[:3] == '{{{':
   779             word = word[3:]
   780 
   781         self.processor = None
   782         self.processor_name = None
   783         self.processor_is_parser = 0
   784         s_word = word.strip()
   785         if s_word == '#!':
   786             # empty bang paths lead to a normal code display
   787             # can be used to escape real, non-empty bang paths
   788             word = ''
   789             self.in_pre = 3
   790             return self._closeP() + self.formatter.preformatted(1)
   791         elif s_word[:2] == '#!':
   792             # First try to find a processor for this (will go away in 2.0)
   793             processor_name = s_word[2:].split()[0]
   794             self.setProcessor(processor_name)
   795 
   796         if self.processor:
   797             self.processor_name = processor_name
   798             self.in_pre = 2
   799             self.colorize_lines = [word]
   800             return ''
   801         elif s_word:
   802             self.in_pre = 3
   803             return self._closeP() + self.formatter.preformatted(1) + \
   804                    self.formatter.text(s_word + ' (-)')
   805         else:
   806             self.in_pre = 1
   807             return ''
   808 
   809     def _pre_repl(self, word):
   810         """Handle code displays."""
   811         word = word.strip()
   812         if word == '{{{' and not self.in_pre:
   813             self.in_pre = 3
   814             return self._closeP() + self.formatter.preformatted(self.in_pre)
   815         elif word == '}}}' and self.in_pre:
   816             self.in_pre = 0
   817             self.inhibit_p = 0
   818             return self.formatter.preformatted(self.in_pre)
   819         return self.formatter.text(word)
   820 
   821 
   822     def _smiley_repl(self, word):
   823         """Handle smileys."""
   824         return self.formatter.smiley(word)
   825 
   826     _smileyA_repl = _smiley_repl
   827 
   828 
   829     def _comment_repl(self, word):
   830         # if we are in a paragraph, we must close it so that normal text following
   831         # in the line below the comment will reopen a new paragraph.
   832         if self.formatter.in_p:
   833             self.formatter.paragraph(0)
   834         self.line_is_empty = 1 # markup following comment lines treats them as if they were empty
   835         return self.formatter.comment(word)
   836 
   837     def _closeP(self):
   838         if self.formatter.in_p:
   839             return self.formatter.paragraph(0)
   840         return ''
   841 
   842     def _macro_repl(self, word):
   843         """Handle macros ([[macroname]])."""
   844         macro_name = word[2:-2]
   845         self.inhibit_p = 0 # 1 fixes UserPreferences, 0 fixes paragraph formatting for macros
   846 
   847         # check for arguments
   848         args = None
   849         if macro_name.count("("):
   850             macro_name, args = macro_name.split('(', 1)
   851             args = args[:-1]
   852 
   853         # create macro instance
   854         if self.macro is None:
   855             self.macro = wikimacro.Macro(self)
   856         return self.formatter.macro(self.macro, macro_name, args)
   857 
   858     def scan(self, scan_re, line):
   859         """ Scans one line
   860 
   861         Append text before match, invoke replace() with match, and add text after match.
   862         """
   863         result = []
   864         lastpos = 0
   865 
   866         ###result.append(u'<span class="info">[scan: <tt>"%s"</tt>]</span>' % line)
   867 
   868         for match in scan_re.finditer(line):
   869             # Add text before the match
   870             if lastpos < match.start():
   871 
   872                 ###result.append(u'<span class="info">[add text before match: <tt>"%s"</tt>]</span>' % line[lastpos:match.start()])
   873 
   874                 if not (self.inhibit_p or self.in_pre or self.formatter.in_p):
   875                     result.append(self.formatter.paragraph(1, css_class="line862"))
   876                 result.append(self.formatter.text(line[lastpos:match.start()]))
   877 
   878             # Replace match with markup
   879             if not (self.inhibit_p or self.in_pre or self.formatter.in_p or
   880                     self.in_table or self.in_list):
   881                 result.append(self.formatter.paragraph(1, css_class="line867"))
   882             result.append(self.replace(match))
   883             lastpos = match.end()
   884 
   885         ###result.append('<span class="info">[no match, add rest: <tt>"%s"<tt>]</span>' % line[lastpos:])
   886 
   887         # Add paragraph with the remainder of the line
   888         if not (self.in_pre or self.in_li or self.in_dd or self.inhibit_p or
   889                 self.formatter.in_p) and lastpos < len(line):
   890             result.append(self.formatter.paragraph(1, css_class="line874"))
   891         result.append(self.formatter.text(line[lastpos:]))
   892         return u''.join(result)
   893 
   894     def replace(self, match):
   895         """ Replace match using type name """
   896         result = []
   897         for type, hit in match.groupdict().items():
   898             if hit is not None and type != "hmarker":
   899 
   900                 ###result.append(u'<span class="info">[replace: %s: "%s"]</span>' % (type, hit))
   901                 if self.in_pre and type not in ['pre', 'ent']:
   902                     return self.formatter.text(hit)
   903                 else:
   904                     # Open p for certain types
   905                     if not (self.inhibit_p or self.formatter.in_p
   906                             or self.in_pre or (type in self.no_new_p_before)):
   907                         result.append(self.formatter.paragraph(1, css_class="line891"))
   908 
   909                     # Get replace method and replece hit
   910                     replace = getattr(self, '_' + type + '_repl')
   911                     result.append(replace(hit))
   912                     return ''.join(result)
   913         else:
   914             # We should never get here
   915             import pprint
   916             raise Exception("Can't handle match " + `match`
   917                 + "\n" + pprint.pformat(match.groupdict())
   918                 + "\n" + pprint.pformat(match.groups()) )
   919 
   920         return ""
   921 
   922     def _line_anchordef(self):
   923         if self.line_anchors and not self.line_anchor_printed:
   924             self.line_anchor_printed = 1
   925             return self.formatter.line_anchordef(self.lineno)
   926         else:
   927             return ''
   928 
   929     def format(self, formatter):
   930         """ For each line, scan through looking for magic
   931             strings, outputting verbatim any intervening text.
   932         """
   933         self.formatter = formatter
   934         self.hilite_re = self.formatter.page.hilite_re
   935 
   936         # prepare regex patterns
   937         rules = self.formatting_rules.replace('\n', '|')
   938         if self.cfg.bang_meta:
   939             rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % {
   940                 'word_rule': self.word_rule,
   941                 'rules': rules,
   942             }
   943         self.request.clock.start('compile_huge_and_ugly')
   944         scan_re = re.compile(rules, re.UNICODE)
   945         number_re = re.compile(self.ol_rule, re.UNICODE)
   946         term_re = re.compile(self.dl_rule, re.UNICODE)
   947         indent_re = re.compile("^\s*", re.UNICODE)
   948         eol_re = re.compile(r'\r?\n', re.UNICODE)
   949         self.request.clock.stop('compile_huge_and_ugly')
   950 
   951         # get text and replace TABs
   952         rawtext = self.raw.expandtabs()
   953 
   954         # go through the lines
   955         self.lineno = self.start_line
   956         self.lines = eol_re.split(rawtext)
   957         self.line_is_empty = 0
   958 
   959         self.in_processing_instructions = 1
   960 
   961         # Main loop
   962         for line in self.lines:
   963             self.lineno += 1
   964             self.line_anchor_printed = 0
   965             if not self.in_table:
   966                 self.request.write(self._line_anchordef())
   967             self.table_rowstart = 1
   968             self.line_was_empty = self.line_is_empty
   969             self.line_is_empty = 0
   970             self.first_list_item = 0
   971             self.inhibit_p = 0
   972 
   973             # ignore processing instructions
   974             if self.in_processing_instructions:
   975                 found = False
   976                 for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated",
   977                            "#pragma", "#form", "#acl", "#language"):
   978                     if line.lower().startswith(pi):
   979                         self.request.write(self.formatter.comment(line))
   980                         found = True
   981                         break
   982                 if not found:
   983                     self.in_processing_instructions = 0
   984                 else:
   985                     continue # do not parse this line
   986             if self.in_pre:
   987                 # TODO: move this into function
   988                 # still looking for processing instructions
   989                 # TODO: use strings for pre state, not numbers
   990                 if self.in_pre == 1:
   991                     self.processor = None
   992                     self.processor_is_parser = 0
   993                     processor_name = ''
   994                     if (line.strip()[:2] == "#!"):
   995                         processor_name = line.strip()[2:].split()[0]
   996                         self.setProcessor(processor_name)
   997 
   998                     if self.processor:
   999                         self.in_pre = 2
  1000                         self.colorize_lines = [line]
  1001                         self.processor_name = processor_name
  1002                         continue
  1003                     else:
  1004                         self.request.write(self._closeP() +
  1005                                            self.formatter.preformatted(1))
  1006                         self.in_pre = 3
  1007                 if self.in_pre == 2:
  1008                     # processing mode
  1009                     endpos = line.find("}}}")
  1010                     if endpos == -1:
  1011                         self.colorize_lines.append(line)
  1012                         continue
  1013                     if line[:endpos]:
  1014                         self.colorize_lines.append(line[:endpos])
  1015 
  1016                     # Close p before calling processor
  1017                     # TODO: do we really need this?
  1018                     self.request.write(self._closeP())
  1019                     res = self.formatter.processor(self.processor_name,
  1020                                                    self.colorize_lines,
  1021                                                    self.processor_is_parser)
  1022                     self.request.write(res)
  1023                     del self.colorize_lines
  1024                     self.in_pre = 0
  1025                     self.processor = None
  1026 
  1027                     # send rest of line through regex machinery
  1028                     line = line[endpos+3:]
  1029                     if not line.strip(): # just in the case "}}} " when we only have blanks left...
  1030                         continue
  1031             else:
  1032                 # we don't have \n as whitespace any more
  1033                 # This is the space between lines we join to one paragraph
  1034                 line += ' '
  1035 
  1036                 # Paragraph break on empty lines
  1037                 if not line.strip():
  1038                     if self.in_table:
  1039                         self.request.write(self.formatter.table(0))
  1040                         self.request.write(self._line_anchordef())
  1041                         self.in_table = 0
  1042                     # CHANGE: removed check for not self.list_types
  1043                     # p should close on every empty line
  1044                     if self.formatter.in_p:
  1045                         self.request.write(self.formatter.paragraph(0))
  1046                     self.line_is_empty = 1
  1047                     continue
  1048 
  1049                 # Check indent level
  1050                 indent = indent_re.match(line)
  1051                 indlen = len(indent.group(0))
  1052                 indtype = "ul"
  1053                 numtype = None
  1054                 numstart = None
  1055                 if indlen:
  1056                     match = number_re.match(line)
  1057                     if match:
  1058                         numtype, numstart = match.group(0).strip().split('.')
  1059                         numtype = numtype[0]
  1060 
  1061                         if numstart and numstart[0] == "#":
  1062                             numstart = int(numstart[1:])
  1063                         else:
  1064                             numstart = None
  1065 
  1066                         indtype = "ol"
  1067                     else:
  1068                         match = term_re.match(line)
  1069                         if match:
  1070                             indtype = "dl"
  1071 
  1072                 # output proper indentation tags
  1073                 self.request.write(self._indent_to(indlen, indtype, numtype, numstart))
  1074 
  1075                 # Table mode
  1076                 # TODO: move into function?
  1077                 if (not self.in_table and line[indlen:indlen + 2] == "||"
  1078                     and line[-3:] == "|| " and len(line) >= 5 + indlen):
  1079                     # Start table
  1080                     if self.list_types and not self.in_li:
  1081                         self.request.write(self.formatter.listitem(1, style="list-style-type:none"))
  1082                         ## CHANGE: no automatic p on li
  1083                         ##self.request.write(self.formatter.paragraph(1))
  1084                         self.in_li = 1
  1085 
  1086                     # CHANGE: removed check for self.in_li
  1087                     # paragraph should end before table, always!
  1088                     if self.formatter.in_p:
  1089                         self.request.write(self.formatter.paragraph(0))
  1090                     attrs, attrerr = self._getTableAttrs(line[indlen+2:])
  1091                     self.request.write(self.formatter.table(1, attrs) + attrerr)
  1092                     self.in_table = True # self.lineno
  1093                 elif (self.in_table and not
  1094                       # intra-table comments should not break a table
  1095                       (line[:2] == "##" or
  1096                        line[indlen:indlen + 2] == "||" and
  1097                        line[-3:] == "|| " and
  1098                        len(line) >= 5 + indlen)):
  1099 
  1100                     # Close table
  1101                     self.request.write(self.formatter.table(0))
  1102                     self.request.write(self._line_anchordef())
  1103                     self.in_table = 0
  1104 
  1105             # Scan line, format and write
  1106             formatted_line = self.scan(scan_re, line)
  1107             self.request.write(formatted_line)
  1108 
  1109             if self.in_pre == 3:
  1110                 self.request.write(self.formatter.linebreak())
  1111 
  1112         # Close code displays, paragraphs, tables and open lists
  1113         self.request.write(self._undent())
  1114         if self.in_pre: self.request.write(self.formatter.preformatted(0))
  1115         if self.formatter.in_p: self.request.write(self.formatter.paragraph(0))
  1116         if self.in_table: self.request.write(self.formatter.table(0))
  1117 
  1118     # --------------------------------------------------------------------
  1119     # Private helpers
  1120 
  1121     def setProcessor(self, name):
  1122         """ Set processer to either processor or parser named 'name' """
  1123         cfg = self.request.cfg
  1124         try:
  1125             self.processor = wikiutil.importPlugin(cfg, "processor", name,
  1126                                                    "process")
  1127             self.processor_is_parser = 0
  1128         except wikiutil.PluginMissingError:
  1129             try:
  1130                 self.processor = wikiutil.importPlugin(cfg, "parser", name,
  1131                                                    "Parser")
  1132                 self.processor_is_parser = 1
  1133             except wikiutil.PluginMissingError:
  1134                 self.processor = None
  1135 
  1136