Mercurial > moin > 1.9
view MoinMoin/parser/text_moin_wiki.py @ 3052:1d3e65884337
copy moin wiki parser and its tests from 1.6 branch (cs 2558:3e3a549b49f0), keep _heading_repl from 1.7, update interwiki map to make tests happy
author | Thomas Waldmann <tw AT waldmann-edv DOT de> |
---|---|
date | Wed, 20 Feb 2008 01:13:29 +0100 |
parents | 6dcc9617bab6 |
children | ed1a433803c6 |
line wrap: on
line source
# -*- coding: iso-8859-1 -*- """ MoinMoin - MoinMoin Wiki Markup Parser @copyright: 2000-2002 Juergen Hermann <jh@web.de>, 2006-2007 MoinMoin:ThomasWaldmann, 2007 by MoinMoin:ReimarBauer @license: GNU GPL, see COPYING for details. """ import re import logging from MoinMoin import config, wikiutil, macro from MoinMoin.Page import Page from MoinMoin.support.python_compatibility import rsplit, set Dependencies = ['user'] # {{{#!wiki comment ... }}} has different output depending on the user's profile settings class Parser: """ Parse wiki format markup (and call the formatter to generate output). All formatting commands can be parsed one line at a time, though some state is carried over between lines. Methods named like _*_repl() are responsible to handle the named regex patterns. """ # allow caching caching = 1 Dependencies = Dependencies # some common strings CHILD_PREFIX = wikiutil.CHILD_PREFIX CHILD_PREFIX_LEN = wikiutil.CHILD_PREFIX_LEN PARENT_PREFIX = wikiutil.PARENT_PREFIX PARENT_PREFIX_LEN = wikiutil.PARENT_PREFIX_LEN punct_pattern = re.escape(u'''"\'}]|:,.)?!''') url_scheme = u'|'.join(config.url_schemas) # some common rules url_rule = ur''' (?:^|(?<=\W)) # require either beginning of line or some non-alphanum char (whitespace, punctuation) to the left (?P<url_target> # capture whole url there (?P<url_scheme>%(url_scheme)s) # some scheme \: \S+? # anything non-whitespace ) (?:$|(?=\s|[%(punct)s]+(\s|$))) # require either end of line or some whitespace or some punctuation+blank/eol afterwards ''' % { 'url_scheme': url_scheme, 'punct': punct_pattern, } # this is for a free (non-bracketed) interwiki link - to avoid false positives, # we are rather restrictive here (same as in moin 1.5: require that the # interwiki_wiki name starts with an uppercase letter A-Z. Later, the code # also checks whether the wiki name is in the interwiki map (if not, it renders # normal text, no link): interwiki_rule = ur''' (?:^|(?<=\W)) # require either beginning of line or some non-alphanum char (whitespace, punctuation) to the left (?P<interwiki_wiki>[A-Z][a-zA-Z]+) # interwiki wiki name \: (?P<interwiki_page> # interwiki page name (?=[^ ]*[%(u)s%(l)s0..9][^ ]*\ ) # make sure there is something non-blank with at least one alphanum letter following [^\s%(punct)s]+ # we take all until we hit some blank or punctuation char ... ) ''' % { 'u': config.chars_upper, 'l': config.chars_lower, 'punct': punct_pattern, } word_rule = ur''' (?: (?<![%(u)s%(l)s/]) # require anything not upper/lower/slash before | ^ # ... or beginning of line ) (?P<word_bang>\!)? # configurable: avoid getting CamelCase rendered as link (?P<word_name> (?: (%(parent)s)* # there might be either ../ parent prefix(es) | ((?<!%(child)s)%(child)s)? # or maybe a single / child prefix (but not if we already had it before) ) ( ((?<!%(child)s)%(child)s)? # there might be / child prefix (but not if we already had it before) (?:[%(u)s][%(l)s]+){2,} # at least 2 upper>lower transitions make CamelCase )+ # we can have MainPage/SubPage/SubSubPage ... (?: \# # anchor separator TODO check if this does not make trouble at places where word_rule is used (?P<word_anchor>\S+) # some anchor name )? ) (?: (?![%(u)s%(l)s/]) # require anything not upper/lower/slash following | $ # ... or end of line ) ''' % { 'u': config.chars_upper, 'l': config.chars_lower, 'child': re.escape(CHILD_PREFIX), 'parent': re.escape(PARENT_PREFIX), } # link targets: extern_rule = r'(?P<extern_addr>(?P<extern_scheme>%s)\:.*)' % url_scheme attach_rule = r'(?P<attach_scheme>attachment|drawing)\:(?P<attach_addr>.*)' page_rule = r'(?P<page_name>.*)' link_target_rules = r'|'.join([ extern_rule, attach_rule, page_rule, ]) link_target_re = re.compile(link_target_rules, re.VERBOSE|re.UNICODE) link_rule = r""" (?P<link> \[\[ # link target \s* # strip space (?P<link_target>[^|]+?) \s* # strip space ( \| # link description \s* # strip space (?P<link_desc> (?: # 1. we have either a transclusion here (usually a image) \{\{ \s*[^|]+?\s* # usually image target (strip space) (\|\s*[^|]*?\s* # usually image alt text (optional, strip space) (\|\s*[^|]*?\s* # transclusion parameters (usually key="value" format, optional, strip space) )? )? \}\} ) | (?: # 2. or we have simple text here. [^|]+? ) )? \s* # strip space ( \| # link parameters \s* # strip space (?P<link_params>[^|]+?)? \s* # strip space )? )? \]\] ) """ transclude_rule = r""" (?P<transclude> \{\{ \s*(?P<transclude_target>[^|]+?)\s* # usually image target (strip space) (\|\s*(?P<transclude_desc>[^|]+?)?\s* # usually image alt text (optional, strip space) (\|\s*(?P<transclude_params>[^|]+?)?\s* # transclusion parameters (usually key="value" format, optional, strip space) )? )? \}\} ) """ text_rule = r""" (?P<simple_text> [^|]+ # some text (not empty, does not contain separator) ) """ # link descriptions: link_desc_rules = r'|'.join([ transclude_rule, text_rule, ]) link_desc_re = re.compile(link_desc_rules, re.VERBOSE|re.UNICODE) # transclude descriptions: transclude_desc_rules = r'|'.join([ text_rule, ]) transclude_desc_re = re.compile(transclude_desc_rules, re.VERBOSE|re.UNICODE) # lists: ol_rule = ur""" ^\s+ # indentation (?:[0-9]+|[aAiI])\. # arabic, alpha, roman counting (?:\#\d+)? # optional start number \s # require one blank afterwards """ ol_re = re.compile(ol_rule, re.VERBOSE|re.UNICODE) dl_rule = ur""" ^\s+ # indentation .*?:: # definition term:: \s # require on blank afterwards """ dl_re = re.compile(dl_rule, re.VERBOSE|re.UNICODE) # others indent_re = re.compile(ur"^\s*", re.UNICODE) eol_re = re.compile(r'\r?\n', re.UNICODE) # this is used inside parser/pre sections (we just want to know when it's over): parser_unique = u'' parser_scan_rule = ur""" (?P<parser_end> %s\}\}\} # in parser/pre, we only look for the end of the parser/pre ) """ # the big, fat, less ugly one ;) # please be very careful: blanks and # must be escaped with \ ! scan_rules = ur""" (?P<emph_ibb> '''''(?=[^']+''') # italic on, bold on, ..., bold off )|(?P<emph_ibi> '''''(?=[^']+'') # italic on, bold on, ..., italic off )|(?P<emph_ib_or_bi> '{5}(?=[^']) # italic and bold or bold and italic )|(?P<emph> '{2,3} # italic or bold )|(?P<u> __ # underline )|(?P<small> ( (?P<small_on>\~-\ ?) # small on (we eat a trailing blank if it is there) | (?P<small_off>-\~) # small off ) )|(?P<big> ( (?P<big_on>\~\+\ ?) # big on (eat trailing blank) | (?P<big_off>\+\~) # big off ) )|(?P<strike> ( (?P<strike_on>--\() # strike-through on | (?P<strike_off>\)--) # strike-through off ) )|(?P<remark> ( (^|(?<=\s)) # we require either beginning of line or some whitespace before a remark begin (?P<remark_on>/\*\s) # inline remark on (require and eat whitespace after it) ) | ( (?P<remark_off>\s\*/) # off (require and eat whitespace before it) (?=\s) # we require some whitespace after a remark end ) )|(?P<sup> \^ # superscript on (?P<sup_text>.*?) # capture the text \^ # off )|(?P<sub> ,, # subscript on (?P<sub_text>[^,]{1,40}) # capture 1..40 chars of text ,, # off )|(?P<tt> \{\{\{ # teletype on (?P<tt_text>.*?) # capture the text \}\}\} # off )|(?P<tt_bt> ` # teletype (using a backtick) on (?P<tt_bt_text>.*?) # capture the text ` # off )|(?P<interwiki> %(interwiki_rule)s # OtherWiki:PageName )|(?P<word> # must come AFTER interwiki rule! %(word_rule)s # CamelCase wiki words )| %(link_rule)s | %(transclude_rule)s |(?P<url> %(url_rule)s )|(?P<email> [-\w._+]+ # name \@ # at [\w-]+(\.[\w-]+)+ # server/domain )|(?P<smiley> (^|(?<=\s)) # we require either beginning of line or some space before a smiley (%(smiley)s) # one of the smileys (?=\s) # we require some space after the smiley )|(?P<macro> << (?P<macro_name>\w+) # name of the macro (?:\((?P<macro_args>.*?)\))? # optionally macro arguments >> )|(?P<heading> ^(?P<hmarker>=+)\s+ # some === at beginning of line, eat trailing blanks (?P<heading_text>.*?) # capture heading text \s+(?P=hmarker)\s$ # some === at end of line (matching amount as we have seen), eat blanks )|(?P<parser> \{\{\{ # parser on (?P<parser_unique>(\{*|\w*)) # either some more {{{{ or some chars to solve the nesting problem (?P<parser_line> ( \#! # hash bang (?P<parser_name>\w*) # we have a parser name (can be empty) directly following the {{{ ( \s+ # some space ... (?P<parser_args>.+?) # followed by parser args )? # parser args are optional \s* # followed by whitespace (eat it) until EOL ) | (?P<parser_nothing>\s*) # no parser name, only whitespace up to EOL (eat it) )$ # "parser off" detection is done with parser_scan_rule! )|(?P<comment> ^\#\#.*$ # src code comment, rest of line )|(?P<ol> %(ol_rule)s # ordered list )|(?P<dl> %(dl_rule)s # definition list )|(?P<li> ^\s+\*\s* # unordered list )|(?P<li_none> ^\s+\.\s* # unordered list, no bullets )|(?P<indent> ^\s+ # indented by some spaces )|(?P<tableZ> \|\|\ $ # the right end of a table row )|(?P<table> (?:\|\|)+(?:<(?!<)[^>]*?>)?(?!\|?\s$) # a table )|(?P<rule> -{4,} # hor. rule, min. 4 - )|(?P<entity> &( ([a-zA-Z]+) # symbolic entity, like ü | (\#(\d{1,5}|x[0-9a-fA-F]+)) # numeric entities, like * or B ); )|(?P<sgml_entity> # must come AFTER entity rule! [<>&] # needs special treatment for html/xml )""" % { 'url_scheme': url_scheme, 'url_rule': url_rule, 'punct': punct_pattern, 'ol_rule': ol_rule, 'dl_rule': dl_rule, 'interwiki_rule': interwiki_rule, 'word_rule': word_rule, 'link_rule': link_rule, 'transclude_rule': transclude_rule, 'u': config.chars_upper, 'l': config.chars_lower, 'smiley': u'|'.join([re.escape(s) for s in config.smileys])} scan_re = re.compile(scan_rules, re.UNICODE|re.VERBOSE) # Don't start p before these no_new_p_before = ("heading rule table tableZ tr td " "ul ol dl dt dd li li_none indent " "macro parser") no_new_p_before = no_new_p_before.split() no_new_p_before = dict(zip(no_new_p_before, [1] * len(no_new_p_before))) def __init__(self, raw, request, **kw): self.raw = raw self.request = request self.form = request.form # Macro object uses this self._ = request.getText self.cfg = request.cfg self.line_anchors = kw.get('line_anchors', True) self.start_line = kw.get('start_line', 0) self.macro = None # currently, there is only a single, optional argument to this parser and # (when given), it is used as class(es) for a div wrapping the formatter output # either use a single class like "comment" or multiple like "comment/red/dotted" self.wrapping_div_class = kw.get('format_args', '').strip().replace('/', ' ') self.is_em = 0 # must be int self.is_b = 0 # must be int self.is_u = False self.is_strike = False self.is_big = False self.is_small = False self.is_remark = False self.lineno = 0 self.in_list = 0 # between <ul/ol/dl> and </ul/ol/dl> self.in_li = 0 # between <li> and </li> self.in_dd = 0 # between <dd> and </dd> # states of the parser concerning being inside/outside of some "pre" section: # None == we are not in any kind of pre section (was: 0) # 'search_parser' == we didn't get a parser yet, still searching for it (was: 1) # 'found_parser' == we found a valid parser (was: 2) self.in_pre = None self.no_862 = False self.in_table = 0 self.inhibit_p = 0 # if set, do not auto-create a <p>aragraph # holds the nesting level (in chars) of open lists self.list_indents = [] self.list_types = [] def _close_item(self, result): #result.append("<!-- close item begin -->\n") if self.in_table: result.append(self.formatter.table(0)) self.in_table = 0 if self.in_li: self.in_li = 0 if self.formatter.in_p: result.append(self.formatter.paragraph(0)) result.append(self.formatter.listitem(0)) if self.in_dd: self.in_dd = 0 if self.formatter.in_p: result.append(self.formatter.paragraph(0)) result.append(self.formatter.definition_desc(0)) #result.append("<!-- close item end -->\n") def _u_repl(self, word, groups): """Handle underline.""" self.is_u = not self.is_u return self.formatter.underline(self.is_u) def _remark_repl(self, word, groups): """Handle remarks.""" on = groups.get('remark_on') if on and self.is_remark: return self.formatter.text(word) off = groups.get('remark_off') if off and not self.is_remark: return self.formatter.text(word) self.is_remark = not self.is_remark return self.formatter.span(self.is_remark, css_class='comment') _remark_on_repl = _remark_repl _remark_off_repl = _remark_repl def _strike_repl(self, word, groups): """Handle strikethrough.""" on = groups.get('strike_on') if on and self.is_strike: return self.formatter.text(word) off = groups.get('strike_off') if off and not self.is_strike: return self.formatter.text(word) self.is_strike = not self.is_strike return self.formatter.strike(self.is_strike) _strike_on_repl = _strike_repl _strike_off_repl = _strike_repl def _small_repl(self, word, groups): """Handle small.""" on = groups.get('small_on') if on and self.is_small: return self.formatter.text(word) off = groups.get('small_off') if off and not self.is_small: return self.formatter.text(word) self.is_small = not self.is_small return self.formatter.small(self.is_small) _small_on_repl = _small_repl _small_off_repl = _small_repl def _big_repl(self, word, groups): """Handle big.""" on = groups.get('big_on') if on and self.is_big: return self.formatter.text(word) off = groups.get('big_off') if off and not self.is_big: return self.formatter.text(word) self.is_big = not self.is_big return self.formatter.big(self.is_big) _big_on_repl = _big_repl _big_off_repl = _big_repl def _emph_repl(self, word, groups): """Handle emphasis, i.e. '' and '''.""" if len(word) == 3: self.is_b = not self.is_b if self.is_em and self.is_b: self.is_b = 2 return self.formatter.strong(self.is_b) else: self.is_em = not self.is_em if self.is_em and self.is_b: self.is_em = 2 return self.formatter.emphasis(self.is_em) def _emph_ibb_repl(self, word, groups): """Handle mixed emphasis, i.e. ''''' followed by '''.""" self.is_b = not self.is_b self.is_em = not self.is_em if self.is_em and self.is_b: self.is_b = 2 return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b) def _emph_ibi_repl(self, word, groups): """Handle mixed emphasis, i.e. ''''' followed by ''.""" self.is_b = not self.is_b self.is_em = not self.is_em if self.is_em and self.is_b: self.is_em = 2 return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em) def _emph_ib_or_bi_repl(self, word, groups): """Handle mixed emphasis, exactly five '''''.""" b_before_em = self.is_b > self.is_em > 0 self.is_b = not self.is_b self.is_em = not self.is_em if b_before_em: return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em) else: return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b) def _sup_repl(self, word, groups): """Handle superscript.""" text = groups.get('sup_text', '') return (self.formatter.sup(1) + self.formatter.text(text) + self.formatter.sup(0)) _sup_text_repl = _sup_repl def _sub_repl(self, word, groups): """Handle subscript.""" text = groups.get('sub_text', '') return (self.formatter.sub(1) + self.formatter.text(text) + self.formatter.sub(0)) _sub_text_repl = _sub_repl def _tt_repl(self, word, groups): """Handle inline code.""" tt_text = groups.get('tt_text', '') return (self.formatter.code(1) + self.formatter.text(tt_text) + self.formatter.code(0)) _tt_text_repl = _tt_repl def _tt_bt_repl(self, word, groups): """Handle backticked inline code.""" tt_bt_text = groups.get('tt_bt_text', '') return (self.formatter.code(1, css="backtick") + self.formatter.text(tt_bt_text) + self.formatter.code(0)) _tt_bt_text_repl = _tt_bt_repl def _rule_repl(self, word, groups): """Handle sequences of dashes.""" result = self._undent() + self._closeP() if len(word) <= 4: result += self.formatter.rule() else: # Create variable rule size 1 - 6. Actual size defined in css. size = min(len(word), 10) - 4 result += self.formatter.rule(size) return result def _interwiki_repl(self, word, groups): """Handle InterWiki links.""" wiki = groups.get('interwiki_wiki') page = groups.get('interwiki_page') wikitag_bad = wikiutil.resolve_interwiki(self.request, wiki, page)[3] if wikitag_bad: text = groups.get('interwiki') return self.formatter.text(text) else: return (self.formatter.interwikilink(1, wiki, page) + self.formatter.text(page) + self.formatter.interwikilink(0, wiki, page)) _interwiki_wiki_repl = _interwiki_repl _interwiki_page_repl = _interwiki_repl def _word_repl(self, word, groups): """Handle WikiNames.""" bang = '' bang_present = groups.get('word_bang') if bang_present: if self.cfg.bang_meta: # handle !NotWikiNames return self.formatter.nowikiword(word) else: bang = self.formatter.text('!') name = groups.get('word_name') current_page = self.formatter.page.page_name abs_name = wikiutil.AbsPageName(current_page, name) # if a simple, self-referencing link, emit it as plain text if abs_name == current_page: return self.formatter.text(word) else: # handle anchors try: abs_name, anchor = rsplit(abs_name, "#", 1) except ValueError: anchor = "" return (bang + self.formatter.pagelink(1, abs_name, anchor=anchor) + self.formatter.text(word) + self.formatter.pagelink(0, abs_name)) _word_bang_repl = _word_repl _word_name_repl = _word_repl _word_anchor_repl = _word_repl def _url_repl(self, word, groups): """Handle literal URLs.""" scheme = groups.get('url_scheme', 'http') target = groups.get('url_target', '') return (self.formatter.url(1, target, css=scheme) + self.formatter.text(target) + self.formatter.url(0)) _url_target_repl = _url_repl _url_scheme_repl = _url_repl def _transclude_description(self, desc, default_text=''): """ parse a string <desc> valid as transclude description (text, ...) and return "formatted" content (we do not pass it through the text formatter, but only through wikiutil.escape as we only use it as alt/title text). @param desc: the transclude description to parse @param default_text: use this text if parsing desc returns nothing. """ m = self.transclude_desc_re.match(desc) if m: if m.group('simple_text'): desc = m.group('simple_text') else: desc = default_text desc = wikiutil.escape(desc) return desc def _get_params(self, params, tag_attrs=None, acceptable_attrs=None, query_args=None): """ parse the parameters of link/transclusion markup, defaults can be a dict with some default key/values that will be in the result as given, unless overriden by the params. """ if tag_attrs is None: tag_attrs = {} if query_args is None: query_args = {} if params: fixed, kw, trailing = wikiutil.parse_quoted_separated(params) # we ignore fixed and trailing args and only use kw args: if acceptable_attrs is None: acceptable_attrs = [] for key, val in kw.items(): key = str(key) # we can't use unicode as key if key in acceptable_attrs: key = wikiutil.escape(key) val = unicode(val) # but for the value val = wikiutil.escape(val) tag_attrs[key] = val elif key.startswith('&'): key = key[1:] val = unicode(val) query_args[key] = val return tag_attrs, query_args def _transclude_repl(self, word, groups): """Handles transcluding content, usually embedding images.""" target = groups.get('transclude_target', '') target = wikiutil.url_unquote(target, want_unicode=True) desc = groups.get('transclude_desc', '') or '' params = groups.get('transclude_params', u'') or u'' acceptable_attrs_img = ['class', 'title', 'longdesc', 'width', 'height', 'align', ] # no style because of JS acceptable_attrs_object = ['class', 'title', 'width', 'height', # no style because of JS 'type', 'standby', ] # we maybe need a hack for <PARAM> here m = self.link_target_re.match(target) if m: if m.group('extern_addr'): scheme = m.group('extern_scheme') target = m.group('extern_addr') desc = self._transclude_description(desc, target) if scheme.startswith('http'): # can also be https # currently only supports ext. image inclusion tag_attrs, query_args = self._get_params(params, tag_attrs={'class': 'external_image', 'alt': desc, 'title': desc, }, acceptable_attrs=acceptable_attrs_img) return self.formatter.image(src=target, **tag_attrs) # FF2 has a bug with target mimetype detection, it looks at the url path # and expects to find some "filename extension" there (like .png) and this # (not the response http headers) will set the default content-type of # the object. This will often work for staticly served files, but # fails for MoinMoin attachments (they don't have the filename.ext in the # path, but in the query string). FF3 seems to have this bug fixed, opera 9.2 # also works. #return (self.formatter.transclusion(1, data=target) + # desc + # self.formatter.transclusion(0)) elif m.group('attach_scheme'): scheme = m.group('attach_scheme') url = wikiutil.url_unquote(m.group('attach_addr'), want_unicode=True) if scheme == 'attachment': mt = wikiutil.MimeType(filename=url) if mt.major == 'text': desc = self._transclude_description(desc, url) return self.formatter.attachment_inlined(url, desc) elif mt.major == 'image': desc = self._transclude_description(desc, url) tag_attrs, query_args = self._get_params(params, tag_attrs={'alt': desc, 'title': desc, }, acceptable_attrs=acceptable_attrs_img) return self.formatter.attachment_image(url, **tag_attrs) else: from MoinMoin.action import AttachFile pagename = self.formatter.page.page_name if AttachFile.exists(self.request, pagename, url): href = AttachFile.getAttachUrl(pagename, url, self.request, escaped=0) tag_attrs, query_args = self._get_params(params, tag_attrs={'title': desc, }, acceptable_attrs=acceptable_attrs_object) return (self.formatter.transclusion(1, data=href, type=mt.spoil(), **tag_attrs) + self._transclude_description(desc, url) + self.formatter.transclusion(0)) else: return (self.formatter.attachment_link(1, url) + self._transclude_description(desc, url) + self.formatter.attachment_link(0)) #NOT USED CURRENTLY: # use EmbedObject for other mimetypes from MoinMoin.macro.EmbedObject import EmbedObject from MoinMoin.action import AttachFile if mt is not None: # reuse class tmp from Despam to define macro from MoinMoin.action.Despam import tmp macro = tmp() macro.request = self.request macro.formatter = self.request.html_formatter pagename = self.formatter.page.page_name href = AttachFile.getAttachUrl(pagename, url, self.request, escaped=1) return self.formatter.rawHTML(EmbedObject.embed(EmbedObject(macro, wikiutil.escape(url)), mt, href)) elif scheme == 'drawing': desc = self._transclude_description(desc, url) return self.formatter.attachment_drawing(url, desc) elif m.group('page_name'): # experimental client side transclusion page_name_all = m.group('page_name') if ':' in page_name_all: wiki_name, page_name = page_name_all.split(':', 1) wikitag, wikiurl, wikitail, err = wikiutil.resolve_interwiki(self.request, wiki_name, page_name) else: err = True if err: # not a interwiki link / not in interwiki map tag_attrs, query_args = self._get_params(params, tag_attrs={'type': 'text/html', 'width': '100%', }, acceptable_attrs=acceptable_attrs_object) if 'action' not in query_args: query_args['action'] = 'content' url = Page(self.request, page_name_all).url(self.request, querystr=query_args, relative=False) return (self.formatter.transclusion(1, data=url, **tag_attrs) + self._transclude_description(desc, page_name_all) + self.formatter.transclusion(0)) #return u"Error: <<Include(%s,%s)>> emulation missing..." % (page_name, args) else: # looks like a valid interwiki link url = wikiutil.join_wiki(wikiurl, wikitail) tag_attrs, query_args = self._get_params(params, tag_attrs={'type': 'text/html', 'width': '100%', }, acceptable_attrs=acceptable_attrs_object) if 'action' not in query_args: query_args['action'] = 'content' # XXX moin specific url += '?%s' % wikiutil.makeQueryString(query_args) return (self.formatter.transclusion(1, data=url, **tag_attrs) + self._transclude_description(desc, page_name) + self.formatter.transclusion(0)) #return u"Error: <<RemoteInclude(%s:%s,%s)>> still missing." % (wiki_name, page_name, args) else: desc = self._transclude_description(desc, target) return self.formatter.text('{{%s|%s|%s}}' % (target, desc, params)) return word +'???' _transclude_target_repl = _transclude_repl _transclude_desc_repl = _transclude_repl _transclude_params_repl = _transclude_repl def _link_description(self, desc, target='', default_text=''): """ parse a string <desc> valid as link description (text, transclusion, ...) and return formatted content. @param desc: the link description to parse @param default_text: use this text (formatted as text) if parsing desc returns nothing. @param target: target of the link (as readable markup) - used for transcluded image's description """ m = self.link_desc_re.match(desc) if m: if m.group('simple_text'): desc = m.group('simple_text') desc = self.formatter.text(desc) elif m.group('transclude'): groupdict = m.groupdict() if groupdict.get('transclude_desc') is None: # if transcluded obj (image) has no description, use target for it groupdict['transclude_desc'] = target desc = m.group('transclude') desc = self._transclude_repl(desc, groupdict) else: desc = default_text if desc: desc = self.formatter.text(desc) return desc def _link_repl(self, word, groups): """Handle [[target|text]] links.""" target = groups.get('link_target', '') desc = groups.get('link_desc', '') or '' params = groups.get('link_params', u'') or u'' acceptable_attrs = ['class', 'title', 'target', 'accesskey', ] # no style because of JS mt = self.link_target_re.match(target) if mt: if mt.group('page_name'): page_name_and_anchor = mt.group('page_name') if ':' in page_name_and_anchor: wiki_name, page_name = page_name_and_anchor.split(':', 1) wikitag, wikiurl, wikitail, err = wikiutil.resolve_interwiki(self.request, wiki_name, page_name) else: err = True if err: # not a interwiki link / not in interwiki map # handle anchors try: page_name, anchor = rsplit(page_name_and_anchor, "#", 1) except ValueError: page_name, anchor = page_name_and_anchor, "" current_page = self.formatter.page.page_name if not page_name: page_name = current_page # handle relative links abs_page_name = wikiutil.AbsPageName(current_page, page_name) tag_attrs, query_args = self._get_params(params, tag_attrs={}, acceptable_attrs=acceptable_attrs) return (self.formatter.pagelink(1, abs_page_name, anchor=anchor, querystr=query_args, **tag_attrs) + self._link_description(desc, target, page_name_and_anchor) + self.formatter.pagelink(0, abs_page_name)) else: # interwiki link tag_attrs, query_args = self._get_params(params, tag_attrs={}, acceptable_attrs=acceptable_attrs) return (self.formatter.interwikilink(1, wiki_name, page_name, querystr=query_args, **tag_attrs) + self._link_description(desc, target, page_name) + self.formatter.interwikilink(0, wiki_name, page_name)) elif mt.group('extern_addr'): scheme = mt.group('extern_scheme') target = mt.group('extern_addr') tag_attrs, query_args = self._get_params(params, tag_attrs={'class': scheme, }, acceptable_attrs=acceptable_attrs) return (self.formatter.url(1, target, **tag_attrs) + self._link_description(desc, target, target) + self.formatter.url(0)) elif mt.group('attach_scheme'): scheme = mt.group('attach_scheme') url = wikiutil.url_unquote(mt.group('attach_addr'), want_unicode=True) tag_attrs, query_args = self._get_params(params, tag_attrs={'title': desc, }, acceptable_attrs=acceptable_attrs) if scheme == 'attachment': return (self.formatter.attachment_link(1, url, querystr=query_args, **tag_attrs) + self._link_description(desc, target, url) + self.formatter.attachment_link(0)) elif scheme == 'drawing': return self.formatter.attachment_drawing(url, desc, alt=desc, **tag_attrs) else: if desc: desc = '|' + desc return self.formatter.text('[[%s%s]]' % (target, desc)) _link_target_repl = _link_repl _link_desc_repl = _link_repl _link_params_repl = _link_repl def _email_repl(self, word, groups): """Handle email addresses (without a leading mailto:).""" return (self.formatter.url(1, "mailto:%s" % word, css='mailto') + self.formatter.text(word) + self.formatter.url(0)) def _sgml_entity_repl(self, word, groups): """Handle SGML entities.""" return self.formatter.text(word) def _entity_repl(self, word, groups): """Handle numeric (decimal and hexadecimal) and symbolic SGML entities.""" return self.formatter.rawHTML(word) def _indent_repl(self, match, groups): """Handle pure indentation (no - * 1. markup).""" result = [] if not (self.in_li or self.in_dd): self._close_item(result) self.in_li = 1 css_class = None if self.line_was_empty and not self.first_list_item: css_class = 'gap' result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none")) return ''.join(result) def _li_none_repl(self, match, groups): """Handle type=none (" .") lists.""" result = [] self._close_item(result) self.in_li = 1 css_class = None if self.line_was_empty and not self.first_list_item: css_class = 'gap' result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none")) return ''.join(result) def _li_repl(self, match, groups): """Handle bullet (" *") lists.""" result = [] self._close_item(result) self.in_li = 1 css_class = None if self.line_was_empty and not self.first_list_item: css_class = 'gap' result.append(self.formatter.listitem(1, css_class=css_class)) return ''.join(result) def _ol_repl(self, match, groups): """Handle numbered lists.""" return self._li_repl(match, groups) def _dl_repl(self, match, groups): """Handle definition lists.""" result = [] self._close_item(result) self.in_dd = 1 result.extend([ self.formatter.definition_term(1), self.formatter.text(match[1:-3].lstrip(' ')), self.formatter.definition_term(0), self.formatter.definition_desc(1), ]) return ''.join(result) def _indent_level(self): """Return current char-wise indent level.""" return len(self.list_indents) and self.list_indents[-1] def _indent_to(self, new_level, list_type, numtype, numstart): """Close and open lists.""" openlist = [] # don't make one out of these two statements! closelist = [] if self._indent_level() != new_level and self.in_table: closelist.append(self.formatter.table(0)) self.in_table = 0 while self._indent_level() > new_level: self._close_item(closelist) if self.list_types[-1] == 'ol': tag = self.formatter.number_list(0) elif self.list_types[-1] == 'dl': tag = self.formatter.definition_list(0) else: tag = self.formatter.bullet_list(0) closelist.append(tag) del self.list_indents[-1] del self.list_types[-1] if self.list_types: # we are still in a list if self.list_types[-1] == 'dl': self.in_dd = 1 else: self.in_li = 1 # Open new list, if necessary if self._indent_level() < new_level: self.list_indents.append(new_level) self.list_types.append(list_type) if self.formatter.in_p: closelist.append(self.formatter.paragraph(0)) if list_type == 'ol': tag = self.formatter.number_list(1, numtype, numstart) elif list_type == 'dl': tag = self.formatter.definition_list(1) else: tag = self.formatter.bullet_list(1) openlist.append(tag) self.first_list_item = 1 self.in_li = 0 self.in_dd = 0 # If list level changes, close an open table if self.in_table and (openlist or closelist): closelist[0:0] = [self.formatter.table(0)] self.in_table = 0 self.in_list = self.list_types != [] return ''.join(closelist) + ''.join(openlist) def _undent(self): """Close all open lists.""" result = [] #result.append("<!-- _undent start -->\n") self._close_item(result) for type in self.list_types[::-1]: if type == 'ol': result.append(self.formatter.number_list(0)) elif type == 'dl': result.append(self.formatter.definition_list(0)) else: result.append(self.formatter.bullet_list(0)) #result.append("<!-- _undent end -->\n") self.list_indents = [] self.list_types = [] return ''.join(result) def _getTableAttrs(self, attrdef): attr_rule = r'^(\|\|)*<(?!<)(?P<attrs>[^>]*?)>' m = re.match(attr_rule, attrdef, re.U) if not m: return {}, '' attrdef = m.group('attrs') # extension for special table markup def table_extension(key, parser, attrs, wiki_parser=self): """ returns: tuple (found_flag, msg) found_flag: whether we found something and were able to process it here true for special stuff like 100% or - or #AABBCC false for style xxx="yyy" attributes msg: "" or an error msg """ _ = wiki_parser._ found = False msg = '' if key[0] in "0123456789": token = parser.get_token() if token != '%': wanted = '%' msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"', formatted=False) % { 'wanted': wanted, 'key': key, 'token': token} else: try: dummy = int(key) except ValueError: msg = _('Expected an integer "%(key)s" before "%(token)s"', formatted=False) % { 'key': key, 'token': token} else: found = True attrs['width'] = '"%s%%"' % key elif key == '-': arg = parser.get_token() try: dummy = int(arg) except ValueError: msg = _('Expected an integer "%(arg)s" after "%(key)s"', formatted=False) % { 'arg': arg, 'key': key} else: found = True attrs['colspan'] = '"%s"' % arg elif key == '|': arg = parser.get_token() try: dummy = int(arg) except ValueError: msg = _('Expected an integer "%(arg)s" after "%(key)s"', formatted=False) % { 'arg': arg, 'key': key} else: found = True attrs['rowspan'] = '"%s"' % arg elif key == '(': found = True attrs['align'] = '"left"' elif key == ':': found = True attrs['align'] = '"center"' elif key == ')': found = True attrs['align'] = '"right"' elif key == '^': found = True attrs['valign'] = '"top"' elif key == 'v': found = True attrs['valign'] = '"bottom"' elif key == '#': arg = parser.get_token() try: if len(arg) != 6: raise ValueError dummy = int(arg, 16) except ValueError: msg = _('Expected a color value "%(arg)s" after "%(key)s"', formatted=False) % { 'arg': arg, 'key': key} else: found = True attrs['bgcolor'] = '"#%s"' % arg return found, self.formatter.rawHTML(msg) # scan attributes attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension) if msg: msg = '<strong class="highlight">%s</strong>' % msg #self.request.log("parseAttributes returned %r" % attr) return attr, msg def _tableZ_repl(self, word, groups): """Handle table row end.""" if self.in_table: result = '' # REMOVED: check for self.in_li, p should always close if self.formatter.in_p: result = self.formatter.paragraph(0) result += self.formatter.table_cell(0) + self.formatter.table_row(0) return result else: return self.formatter.text(word) def _table_repl(self, word, groups): """Handle table cell separator.""" if self.in_table: result = [] # check for attributes attrs, attrerr = self._getTableAttrs(word) # start the table row? if self.table_rowstart: self.table_rowstart = 0 result.append(self.formatter.table_row(1, attrs)) else: # Close table cell, first closing open p # REMOVED check for self.in_li, paragraph should close always! if self.formatter.in_p: result.append(self.formatter.paragraph(0)) result.append(self.formatter.table_cell(0)) # check for adjacent cell markers if word.count("|") > 2: if 'align' not in attrs and \ not ('style' in attrs and 'text-align' in attrs['style'].lower()): # add center alignment if we don't have some alignment already attrs['align'] = '"center"' if 'colspan' not in attrs: attrs['colspan'] = '"%d"' % (word.count("|")/2) # return the complete cell markup result.append(self.formatter.table_cell(1, attrs) + attrerr) result.append(self._line_anchordef()) return ''.join(result) else: return self.formatter.text(word) def _heading_repl(self, word, groups): """Handle section headings.""" heading_text = groups.get('heading_text', '') depth = min(len(groups.get('hmarker')), 5) return ''.join([ self._closeP(), self.formatter.heading(1, depth, id=heading_text), self.formatter.text(heading_text), self.formatter.heading(0, depth), ]) _heading_text_repl = _heading_repl def _parser_repl(self, word, groups): """Handle parsed code displays.""" self.parser = None self.parser_name = None self.parser_lines = [] parser_line = word = groups.get('parser_line', u'') parser_name = groups.get('parser_name', None) parser_args = groups.get('parser_args', None) parser_nothing = groups.get('parser_nothing', None) parser_unique = groups.get('parser_unique', u'') or u'' #logging.debug("_parser_repl: parser_name %r parser_args %r parser_unique %r" % (parser_name, parser_args, parser_unique)) if set(parser_unique) == set('{'): # just some more {{{{{{ parser_unique = u'}' * len(parser_unique) # for symmetry cosmetic reasons self.parser_unique = parser_unique if parser_name is not None: # First try to find a parser for this if parser_name == u'': # empty bang paths lead to a normal code display # can be used to escape real, non-empty bang paths #logging.debug("_parser_repl: empty bangpath") parser_name = 'text' word = '' elif parser_nothing is None: # there was something non-whitespace following the {{{ parser_name = 'text' self.setParser(parser_name) if not self.parser and parser_name: # loading the desired parser didn't work, retry a safe option: wanted_parser = parser_name parser_name = 'text' self.setParser(parser_name) word = '%s %s (-)' % (wanted_parser, parser_args) # indication that it did not work if self.parser: self.parser_name = parser_name self.in_pre = 'found_parser' if word: self.parser_lines.append(word) else: self.in_pre = 'search_parser' #logging.debug("_parser_repl: in_pre %r line %d" % (self.in_pre, self.lineno)) return '' _parser_unique_repl = _parser_repl _parser_line_repl = _parser_repl _parser_name_repl = _parser_repl _parser_args_repl = _parser_repl _parser_nothing_repl = _parser_repl def _parser_content(self, line): """ handle state and collecting lines for parser in pre/parser sections """ #logging.debug("parser_content: %r" % line) if self.in_pre == 'search_parser' and line.strip(): # try to find a parser specification if line.strip().startswith("#!"): parser_name = line.strip()[2:].split()[0] else: parser_name = 'text' self.setParser(parser_name) if not self.parser: parser_name = 'text' self.setParser(parser_name) if self.parser: self.in_pre = 'found_parser' self.parser_lines.append(line) self.parser_name = parser_name elif self.in_pre == 'found_parser': # collect the content lines self.parser_lines.append(line) return '' # we emit the content after reaching the end of the parser/pre section def _parser_end_repl(self, word, groups): """ when we reach the end of a parser/pre section, we call the parser with the lines we collected """ #if self.in_pre: self.in_pre = None self.inhibit_p = 0 #logging.debug("_parser_end_repl: in_pre %r line %d" % (self.in_pre, self.lineno)) self.request.write(self._closeP()) if self.parser_name is None: # we obviously did not find a parser specification self.parser_name = 'text' result = self.formatter.parser(self.parser_name, self.parser_lines) del self.parser_lines self.in_pre = None self.parser = None return result def _smiley_repl(self, word, groups): """Handle smileys.""" return self.formatter.smiley(word) def _comment_repl(self, word, groups): # if we are in a paragraph, we must close it so that normal text following # in the line below the comment will reopen a new paragraph. if self.formatter.in_p: self.formatter.paragraph(0) self.line_is_empty = 1 # markup following comment lines treats them as if they were empty return self.formatter.comment(word) def _closeP(self): if self.formatter.in_p: return self.formatter.paragraph(0) return '' def _macro_repl(self, word, groups): """Handle macros.""" macro_name = groups.get('macro_name') macro_args = groups.get('macro_args') self.inhibit_p = 0 # 1 fixes UserPreferences, 0 fixes paragraph formatting for macros # create macro instance if self.macro is None: self.macro = macro.Macro(self) return self.formatter.macro(self.macro, macro_name, macro_args, markup=groups.get('macro')) _macro_name_repl = _macro_repl _macro_args_repl = _macro_repl def scan(self, line, inhibit_p=False): """ Scans one line Append text before match, invoke replace() with match, and add text after match. """ result = [] lastpos = 0 # absolute position within line line_length = len(line) ###result.append(u'<span class="info">[scan: <tt>"%s"</tt>]</span>' % line) while lastpos <= line_length: # it is <=, not <, because we need to process the empty line also parser_scan_re = re.compile(self.parser_scan_rule % re.escape(self.parser_unique), re.VERBOSE|re.UNICODE) scan_re = self.in_pre and parser_scan_re or self.scan_re match = scan_re.search(line, lastpos) if match: start = match.start() if lastpos < start: if self.in_pre: self._parser_content(line[lastpos:start]) else: ###result.append(u'<span class="info">[add text before match: <tt>"%s"</tt>]</span>' % line[lastpos:match.start()]) # self.no_862 is added to solve the issue of macros called inline if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or self.no_862): result.append(self.formatter.paragraph(1, css_class="line862")) # add the simple text in between lastpos and beginning of current match result.append(self.formatter.text(line[lastpos:start])) # Replace match with markup if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or self.in_table or self.in_list): result.append(self.formatter.paragraph(1, css_class="line867")) result.append(self.replace(match, inhibit_p)) end = match.end() lastpos = end if start == end: # we matched an empty string lastpos += 1 # proceed, we don't want to match this again else: if self.in_pre: self._parser_content(line[lastpos:]) elif line[lastpos:]: ###result.append('<span class="info">[no match, add rest: <tt>"%s"<tt>]</span>' % line[lastpos:]) if not (inhibit_p or self.inhibit_p or self.in_pre or self.formatter.in_p or self.in_li or self.in_dd): result.append(self.formatter.paragraph(1, css_class="line874")) # add the simple text (no markup) after last match result.append(self.formatter.text(line[lastpos:])) break # nothing left to do! return u''.join(result) def _replace(self, match): """ Same as replace() but with no magic """ for name, text in match.groupdict().iteritems(): if text is not None: # Get replace method and replace text replace_func = getattr(self, '_%s_repl' % name) result = replace_func(text, match.groupdict()) return result def replace(self, match, inhibit_p=False): """ Replace match using type name """ result = [] for type, hit in match.groupdict().items(): if hit is not None and not type in ["hmarker", ]: ##result.append(u'<span class="info">[replace: %s: "%s"]</span>' % (type, hit)) # Open p for certain types if not (inhibit_p or self.inhibit_p or self.formatter.in_p or self.in_pre or (type in self.no_new_p_before)): result.append(self.formatter.paragraph(1, css_class="line891")) # Get replace method and replace hit replace_func = getattr(self, '_%s_repl' % type) result.append(replace_func(hit, match.groupdict())) return ''.join(result) else: # We should never get here import pprint raise Exception("Can't handle match %r\n%s\n%s" % ( match, pprint.pformat(match.groupdict()), pprint.pformat(match.groups()), )) return "" def _line_anchordef(self): if self.line_anchors and not self.line_anchor_printed: self.line_anchor_printed = 1 return self.formatter.line_anchordef(self.lineno) else: return '' def format(self, formatter, inhibit_p=False): """ For each line, scan through looking for magic strings, outputting verbatim any intervening text. """ self.formatter = formatter self.hilite_re = self.formatter.page.hilite_re # get text and replace TABs rawtext = self.raw.expandtabs() # go through the lines self.lineno = self.start_line self.lines = self.eol_re.split(rawtext) self.line_is_empty = 0 self.in_processing_instructions = 1 if self.wrapping_div_class: self.request.write(self.formatter.div(1, css_class=self.wrapping_div_class)) # Main loop for line in self.lines: self.lineno += 1 if '>><<' in line.replace(' ', ''): self.no_862 = True self.line_anchor_printed = 0 if not self.in_table: self.request.write(self._line_anchordef()) self.table_rowstart = 1 self.line_was_empty = self.line_is_empty self.line_is_empty = 0 self.first_list_item = 0 self.inhibit_p = 0 # ignore processing instructions if self.in_processing_instructions: found = False for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated", "#pragma", "#form", "#acl", "#language"): if line.lower().startswith(pi): self.request.write(self.formatter.comment(line)) found = True break if not found: self.in_processing_instructions = 0 else: continue # do not parse this line if not self.in_pre: # we don't have \n as whitespace any more # This is the space between lines we join to one paragraph line += ' ' # Paragraph break on empty lines if not line.strip(): if self.in_table: self.request.write(self.formatter.table(0)) self.request.write(self._line_anchordef()) self.in_table = 0 # CHANGE: removed check for not self.list_types # p should close on every empty line if self.formatter.in_p: self.request.write(self.formatter.paragraph(0)) self.line_is_empty = 1 continue # Check indent level indent = self.indent_re.match(line) indlen = len(indent.group(0)) indtype = "ul" numtype = None numstart = None if indlen: match = self.ol_re.match(line) if match: numtype, numstart = match.group(0).strip().split('.') numtype = numtype[0] if numstart and numstart[0] == "#": numstart = int(numstart[1:]) else: numstart = None indtype = "ol" else: match = self.dl_re.match(line) if match: indtype = "dl" # output proper indentation tags self.request.write(self._indent_to(indlen, indtype, numtype, numstart)) # Table mode # TODO: move into function? if (not self.in_table and line[indlen:indlen + 2] == "||" and line.endswith("|| ") and len(line) >= 5 + indlen): # Start table if self.list_types and not self.in_li: self.request.write(self.formatter.listitem(1, style="list-style-type:none")) ## CHANGE: no automatic p on li ##self.request.write(self.formatter.paragraph(1)) self.in_li = 1 # CHANGE: removed check for self.in_li # paragraph should end before table, always! if self.formatter.in_p: self.request.write(self.formatter.paragraph(0)) attrs, attrerr = self._getTableAttrs(line[indlen+2:]) self.request.write(self.formatter.table(1, attrs) + attrerr) self.in_table = True # self.lineno elif (self.in_table and not # intra-table comments should not break a table (line.startswith("##") or line[indlen:indlen + 2] == "||" and line.endswith("|| ") and len(line) >= 5 + indlen)): # Close table self.request.write(self.formatter.table(0)) self.request.write(self._line_anchordef()) self.in_table = 0 # Scan line, format and write formatted_line = self.scan(line, inhibit_p=inhibit_p) self.request.write(formatted_line) # Close code displays, paragraphs, tables and open lists self.request.write(self._undent()) if self.in_pre: self.request.write(self.formatter.preformatted(0)) if self.formatter.in_p: self.request.write(self.formatter.paragraph(0)) if self.in_table: self.request.write(self.formatter.table(0)) if self.wrapping_div_class: self.request.write(self.formatter.div(0)) # Private helpers ------------------------------------------------------------ def setParser(self, name): """ Set parser to parser named 'name' """ # XXX this is done by the formatter as well try: self.parser = wikiutil.searchAndImportPlugin(self.request.cfg, "parser", name) except wikiutil.PluginMissingError: self.parser = None