MoinMoin/parser/text_creole.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Wed, 11 Feb 2009 02:34:33 +0100
changeset 4569 3caaa8c74c41
parent 4560 050428d1c044
child 5104 b631aca46a48
permissions -rw-r--r--
wikiutil: replace moin's cgi/urllib wrappers by calls to werkzeug.utils code
     1 # -*- coding: iso-8859-1 -*-
     2 """
     3     MoinMoin - Creole wiki markup parser
     4 
     5     See http://wikicreole.org/ for latest specs.
     6 
     7     Notes:
     8     * No markup allowed in headings.
     9       Creole 1.0 does not require us to support this.
    10     * No markup allowed in table headings.
    11       Creole 1.0 does not require us to support this.
    12     * No (non-bracketed) generic url recognition: this is "mission impossible"
    13       except if you want to risk lots of false positives. Only known protocols
    14       are recognized.
    15     * We do not allow ":" before "//" italic markup to avoid urls with
    16       unrecognized schemes (like wtf://server/path) triggering italic rendering
    17       for the rest of the paragraph.
    18 
    19     @copyright: 2007 MoinMoin:RadomirDopieralski (creole 0.5 implementation),
    20                 2007 MoinMoin:ThomasWaldmann (updates)
    21     @license: GNU GPL, see COPYING for details.
    22 """
    23 
    24 import re
    25 import StringIO
    26 from MoinMoin import config, wikiutil
    27 from MoinMoin.macro import Macro
    28 from _creole import Parser as CreoleParser
    29 
    30 Dependencies = []
    31 
    32 _ = lambda x: x
    33 
    34 class Parser:
    35     """
    36     Glue the DocParser and DocEmitter with the
    37     MoinMoin current API.
    38     """
    39 
    40     # Enable caching
    41     caching = 1
    42     Dependencies = Dependencies
    43     quickhelp = _(u"""\
    44  Emphasis:: <<Verbatim(//)>>''italics''<<Verbatim(//)>>; <<Verbatim(**)>>'''bold'''<<Verbatim(**)>>; <<Verbatim(**//)>>'''''bold italics'''''<<Verbatim(//**)>>; <<Verbatim(//)>>''mixed ''<<Verbatim(**)>>'''''bold'''<<Verbatim(**)>> and italics''<<Verbatim(//)>>;
    45  Horizontal Rule:: <<Verbatim(----)>>
    46  Force Linebreak:: <<Verbatim(\\\\)>>
    47  Headings:: = Title 1 =; == Title 2 ==; === Title 3 ===; ==== Title 4 ====; ===== Title 5 =====.
    48  Lists:: * bullets; ** sub-bullets; # numbered items; ## numbered sub items.
    49  Links:: <<Verbatim([[target]])>>; <<Verbatim([[target|linktext]])>>.
    50  Tables:: |= header text | cell text | more cell text |;
    51 
    52 (!) For more help, see HelpOnEditing or HelpOnCreoleSyntax.
    53 """)
    54 
    55     def __init__(self, raw, request, **kw):
    56         """Create a minimal Parser object with required attributes."""
    57 
    58         self.request = request
    59         self.form = request.form
    60         self.raw = raw
    61 
    62     def format(self, formatter):
    63         """Create and call the true parser and emitter."""
    64 
    65         document = CreoleParser(self.raw).parse()
    66         result = Emitter(document, formatter, self.request, Macro(self)).emit()
    67         self.request.write(result)
    68 
    69 class Rules:
    70     # For the link targets:
    71     proto = r'http|https|ftp|nntp|news|mailto|telnet|file|irc'
    72     extern = r'(?P<extern_addr>(?P<extern_proto>%s):.*)' % proto
    73     attach = r'''
    74             (?P<attach_scheme> attachment | drawing | image ):
    75             (?P<attach_addr> .* )
    76         '''
    77     interwiki = r'''
    78             (?P<inter_wiki> [A-Z][a-zA-Z]+ ) :
    79             (?P<inter_page> .* )
    80         '''
    81     page = r'(?P<page_name> .* )'
    82 
    83 
    84 class Emitter:
    85     """
    86     Generate the output for the document
    87     tree consisting of DocNodes.
    88     """
    89 
    90     addr_re = re.compile('|'.join([
    91             Rules.extern,
    92             Rules.attach,
    93             Rules.interwiki,
    94             Rules.page
    95         ]), re.X | re.U) # for addresses
    96 
    97     def __init__(self, root, formatter, request, macro):
    98         self.root = root
    99         self.formatter = formatter
   100         self.request = request
   101         self.form = request.form
   102         self.macro = macro
   103 
   104     def get_text(self, node):
   105         """Try to emit whatever text is in the node."""
   106 
   107         try:
   108             return node.children[0].content or ''
   109         except:
   110             return node.content or ''
   111 
   112     # *_emit methods for emitting nodes of the document:
   113 
   114     def document_emit(self, node):
   115         return self.emit_children(node)
   116 
   117     def text_emit(self, node):
   118         return self.formatter.text(node.content or '')
   119 
   120     def separator_emit(self, node):
   121         return self.formatter.rule()
   122 
   123     def paragraph_emit(self, node):
   124         return ''.join([
   125             self.formatter.paragraph(1),
   126             self.emit_children(node),
   127             self.formatter.paragraph(0),
   128         ])
   129 
   130     def bullet_list_emit(self, node):
   131         return ''.join([
   132             self.formatter.bullet_list(1),
   133             self.emit_children(node),
   134             self.formatter.bullet_list(0),
   135         ])
   136 
   137     def number_list_emit(self, node):
   138         return ''.join([
   139             self.formatter.number_list(1),
   140             self.emit_children(node),
   141             self.formatter.number_list(0),
   142         ])
   143 
   144     def list_item_emit(self, node):
   145         return ''.join([
   146             self.formatter.listitem(1),
   147             self.emit_children(node),
   148             self.formatter.listitem(0),
   149         ])
   150 
   151 # Not used
   152 #    def definition_list_emit(self, node):
   153 #        return ''.join([
   154 #            self.formatter.definition_list(1),
   155 #            self.emit_children(node),
   156 #            self.formatter.definition_list(0),
   157 #        ])
   158 
   159 # Not used
   160 #    def term_emit(self, node):
   161 #        return ''.join([
   162 #            self.formatter.definition_term(1),
   163 #            self.emit_children(node),
   164 #            self.formatter.definition_term(0),
   165 #        ])
   166 
   167 # Not used
   168 #    def definition_emit(self, node):
   169 #        return ''.join([
   170 #            self.formatter.definition_desc(1),
   171 #            self.emit_children(node),
   172 #            self.formatter.definition_desc(0),
   173 #        ])
   174 
   175     def table_emit(self, node):
   176         return ''.join([
   177             self.formatter.table(1, attrs=getattr(node, 'attrs', '')),
   178             self.emit_children(node),
   179             self.formatter.table(0),
   180         ])
   181 
   182     def table_row_emit(self, node):
   183         return ''.join([
   184             self.formatter.table_row(1, attrs=getattr(node, 'attrs', '')),
   185             self.emit_children(node),
   186             self.formatter.table_row(0),
   187         ])
   188 
   189     def table_cell_emit(self, node):
   190         return ''.join([
   191             self.formatter.table_cell(1, attrs=getattr(node, 'attrs', '')),
   192             self.emit_children(node),
   193             self.formatter.table_cell(0),
   194         ])
   195 
   196     def table_head_emit(self, node):
   197         return ''.join([
   198             self.formatter.rawHTML('<th>'),
   199             self.emit_children(node),
   200             self.formatter.rawHTML('</th>'),
   201         ])
   202 
   203     def emphasis_emit(self, node):
   204         return ''.join([
   205             self.formatter.emphasis(1),
   206             self.emit_children(node),
   207             self.formatter.emphasis(0),
   208         ])
   209 
   210 # Not used
   211 #    def quote_emit(self, node):
   212 #        return ''.join([
   213 #            self.formatter.rawHTML('<q>'),
   214 #            self.emit_children(node),
   215 #            self.formatter.rawHTML('</q>'),
   216 #        ])
   217 
   218     def strong_emit(self, node):
   219         return ''.join([
   220             self.formatter.strong(1),
   221             self.emit_children(node),
   222             self.formatter.strong(0),
   223         ])
   224 
   225 # Not used
   226 #    def smiley_emit(self, node):
   227 #        return self.formatter.smiley(node.content)
   228 
   229     def header_emit(self, node):
   230         text = self.get_text(node)
   231         return ''.join([
   232             self.formatter.heading(1, node.level, id=text),
   233             self.formatter.text(text),
   234             self.formatter.heading(0, node.level),
   235         ])
   236 
   237     def code_emit(self, node):
   238 # XXX The current formatter will replace all spaces with &nbsp;, so we need
   239 # to use rawHTML instead, until that is fixed.
   240 #        return ''.join([
   241 #            self.formatter.code(1),
   242 #            self.formatter.text(node.content or ''),
   243 #            self.formatter.code(0),
   244 #        ])
   245         return ''.join([
   246             self.formatter.rawHTML('<tt>'),
   247             self.formatter.text(node.content or ''),
   248             self.formatter.rawHTML('</tt>'),
   249         ])
   250 
   251 # Not used
   252 #    def abbr_emit(self, node):
   253 #        return ''.join([
   254 #            self.formatter.rawHTML('<abbr title="%s">' % node.title),
   255 #            self.formatter.text(node.content or ''),
   256 #            self.formatter.rawHTML('</abbr>'),
   257 #        ])
   258 
   259     def link_emit(self, node):
   260         target = node.content
   261         m = self.addr_re.match(target)
   262         if m:
   263             if m.group('page_name'):
   264                 # link to a page
   265                 word = m.group('page_name')
   266                 if word.startswith(wikiutil.PARENT_PREFIX):
   267                     word = word[wikiutil.PARENT_PREFIX_LEN:]
   268                 elif word.startswith(wikiutil.CHILD_PREFIX):
   269                     word = "%s/%s" % (self.formatter.page.page_name,
   270                         word[wikiutil.CHILD_PREFIX_LEN:])
   271                 word, anchor = wikiutil.split_anchor(word)
   272                 return ''.join([
   273                     self.formatter.pagelink(1, word, anchor=anchor),
   274                     self.emit_children(node) or self.formatter.text(target),
   275                     self.formatter.pagelink(0, word),
   276                 ])
   277             elif m.group('extern_addr'):
   278                 # external link
   279                 address = m.group('extern_addr')
   280                 proto = m.group('extern_proto')
   281                 return ''.join([
   282                     self.formatter.url(1, address, css=proto),
   283                     self.emit_children(node) or self.formatter.text(target),
   284                     self.formatter.url(0),
   285                 ])
   286             elif m.group('inter_wiki'):
   287                 # interwiki link
   288                 wiki = m.group('inter_wiki')
   289                 page = m.group('inter_page')
   290                 page, anchor = wikiutil.split_anchor(page)
   291                 return ''.join([
   292                     self.formatter.interwikilink(1, wiki, page, anchor=anchor),
   293                     self.emit_children(node) or self.formatter.text(page),
   294                     self.formatter.interwikilink(0),
   295                 ])
   296             elif m.group('attach_scheme'):
   297                 # link to an attachment
   298                 scheme = m.group('attach_scheme')
   299                 attachment = m.group('attach_addr')
   300                 url = wikiutil.url_unquote(attachment)
   301                 text = self.get_text(node)
   302                 return ''.join([
   303                         self.formatter.attachment_link(1, url),
   304                         self.formatter.text(text),
   305                         self.formatter.attachment_link(0)
   306                     ])
   307         return "".join(["[[", self.formatter.text(target), "]]"])
   308 
   309 # Not used
   310 #    def anchor_link_emit(self, node):
   311 #        return ''.join([
   312 #            self.formatter.url(1, node.content, css='anchor'),
   313 #            self.emit_children(node),
   314 #            self.formatter.url(0),
   315 #        ])
   316 
   317     def image_emit(self, node):
   318         target = node.content
   319         text = self.get_text(node)
   320         m = self.addr_re.match(target)
   321         if m:
   322             if m.group('page_name'):
   323                 # inserted anchors
   324                 url = wikiutil.url_unquote(target)
   325                 if target.startswith('#'):
   326                     return self.formatter.anchordef(url[1:])
   327                 # default to images
   328                 return self.formatter.attachment_image(
   329                     url, alt=text, html_class='image')
   330             elif m.group('extern_addr'):
   331                 # external link
   332                 address = m.group('extern_addr')
   333                 proto = m.group('extern_proto')
   334                 url = wikiutil.url_unquote(address)
   335                 return self.formatter.image(
   336                     src=url, alt=text, html_class='external_image')
   337             elif m.group('attach_scheme'):
   338                 # link to an attachment
   339                 scheme = m.group('attach_scheme')
   340                 attachment = m.group('attach_addr')
   341                 url = wikiutil.url_unquote(attachment)
   342                 if scheme == 'image':
   343                     return self.formatter.attachment_image(
   344                         url, alt=text, html_class='image')
   345                 elif scheme == 'drawing':
   346                     return self.formatter.attachment_drawing(url, text, alt=text)
   347                 else:
   348                     pass
   349             elif m.group('inter_wiki'):
   350                 # interwiki link
   351                 pass
   352 #        return "".join(["{{", self.formatter.text(target), "}}"])
   353         url = wikiutil.url_unquote(node.content)
   354         return self.formatter.attachment_inlined(url, text)
   355 
   356 # Not used
   357 #    def drawing_emit(self, node):
   358 #        url = wikiutil.url_unquote(node.content)
   359 #        text = self.get_text(node)
   360 #        return self.formatter.attachment_drawing(url, text)
   361 
   362 # Not used
   363 #    def figure_emit(self, node):
   364 #        text = self.get_text(node)
   365 #        url = wikiutil.url_unquote(node.content)
   366 #        return ''.join([
   367 #            self.formatter.rawHTML('<div class="figure">'),
   368 #            self.get_image(url, text), self.emit_children(node),
   369 #            self.formatter.rawHTML('</div>'),
   370 #        ])
   371 
   372 # Not used
   373 #    def bad_link_emit(self, node):
   374 #        return self.formatter.text(''.join([
   375 #            '[[',
   376 #            node.content or '',
   377 #            ']]',
   378 #        ]))
   379 
   380     def macro_emit(self, node):
   381         macro_name = node.content
   382         args = node.args
   383         return self.formatter.macro(self.macro, macro_name, args)
   384 
   385 # Not used
   386 #    def section_emit(self, node):
   387 #        return ''.join([
   388 #            self.formatter.rawHTML(
   389 #                '<div class="%s" style="%s">' % (node.sect, node.style)),
   390 #            self.emit_children(node),
   391 #            self.formatter.rawHTML('</div>'),
   392 #        ])
   393 
   394     def break_emit(self, node):
   395         return self.formatter.linebreak(preformatted=0)
   396 
   397 # Not used
   398 #    def blockquote_emit(self, node):
   399 #        return ''.join([
   400 #            self.formatter.rawHTML('<blockquote>'),
   401 #            self.emit_children(node),
   402 #            self.formatter.rawHTML('</blockquote>'),
   403 #        ])
   404 
   405     def preformatted_emit(self, node):
   406         parser_name = getattr(node, 'sect', '')
   407         if parser_name:
   408             # The formatter.parser will *sometimes* just return the result
   409             # and *sometimes* try to write it directly. We need to take both
   410             # cases into account!
   411             lines = node.content.split(u'\n')
   412             buf = StringIO.StringIO()
   413             try:
   414                 try:
   415                     self.request.redirect(buf)
   416                     ret = self.formatter.parser(parser_name, lines)
   417                 finally:
   418                     self.request.redirect()
   419                 buf.flush()
   420                 writ = buf.getvalue()
   421                 buf.close()
   422                 return ret + writ
   423             except wikiutil.PluginMissingError:
   424                 pass
   425         return ''.join([
   426             self.formatter.preformatted(1),
   427             self.formatter.text(node.content),
   428             self.formatter.preformatted(0),
   429         ])
   430 
   431     def default_emit(self, node):
   432         """Fallback function for emitting unknown nodes."""
   433 
   434         return ''.join([
   435             self.formatter.preformatted(1),
   436             self.formatter.text('<%s>\n' % node.kind),
   437             self.emit_children(node),
   438             self.formatter.preformatted(0),
   439         ])
   440 
   441     def emit_children(self, node):
   442         """Emit all the children of a node."""
   443 
   444         return ''.join([self.emit_node(child) for child in node.children])
   445 
   446     def emit_node(self, node):
   447         """Emit a single node."""
   448 
   449         emit = getattr(self, '%s_emit' % node.kind, self.default_emit)
   450         return emit(node)
   451 
   452     def emit(self):
   453         """Emit the document represented by self.root DOM tree."""
   454 
   455         # Try to disable 'smart' formatting if possible
   456         magic_save = getattr(self.formatter, 'no_magic', False)
   457         self.formatter.no_magic = True
   458         output = '\n'.join([
   459             self.emit_node(self.root),
   460         ])
   461         # restore 'smart' formatting if it was set
   462         self.formatter.no_magic = magic_save
   463         return output
   464 
   465 del _