Mercurial > moin > 1.9
changeset 2731:a62313ef3473
added file with 1.5.8 wiki parser code (see last commit)
author | Thomas Waldmann <tw AT waldmann-edv DOT de> |
---|---|
date | Sun, 19 Aug 2007 19:29:40 +0200 |
parents | a907a61c33b3 |
children | b185b5b80d1b |
files | MoinMoin/script/migration/text_moin158_wiki.py |
diffstat | 1 files changed, 1137 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MoinMoin/script/migration/text_moin158_wiki.py Sun Aug 19 19:29:40 2007 +0200 @@ -0,0 +1,1137 @@ +# -*- coding: iso-8859-1 -*- +""" + MoinMoin - MoinMoin Wiki Markup Parser + + @copyright: 2000, 2001, 2002 by Jürgen Hermann <jh@web.de> + @license: GNU GPL, see COPYING for details. +""" + +import os, re +from MoinMoin import config, wikiutil +from MoinMoin import macro as wikimacro +from MoinMoin.Page import Page +from MoinMoin.util import web + +Dependencies = [] + +class Parser: + """ + Object that turns Wiki markup into HTML. + + All formatting commands can be parsed one line at a time, though + some state is carried over between lines. + + Methods named like _*_repl() are responsible to handle the named regex + patterns defined in print_html(). + """ + + # allow caching + caching = 1 + Dependencies = [] + + # some common strings + PARENT_PREFIX = wikiutil.PARENT_PREFIX + punct_pattern = re.escape(u'''"\'}]|:,.)?!''') + attachment_schemas = ["attachment", "inline", "drawing", ] + url_schemas = ['http', 'https', 'ftp', 'wiki', 'mailto', 'nntp', 'news', + 'telnet', 'file', 'irc', 'ircs', + 'webcal', 'ed2k', 'xmpp', 'rootz', + ] + url_pattern = u'|'.join(url_schemas + attachment_schemas) + + # some common rules + word_rule = ur'(?:(?<![%(u)s%(l)s])|^)%(parent)s(?:%(subpages)s(?:[%(u)s][%(l)s]+){2,})+(?![%(u)s%(l)s]+)' % { + 'u': config.chars_upper, + 'l': config.chars_lower, + 'subpages': wikiutil.CHILD_PREFIX + '?', + 'parent': ur'(?:%s)?' % re.escape(PARENT_PREFIX), + } + url_rule = ur'%(url_guard)s(%(url)s)\:([^\s\<%(punct)s]|([%(punct)s][^\s\<%(punct)s]))+' % { + 'url_guard': u'(^|(?<!\w))', + 'url': url_pattern, + 'punct': punct_pattern, + } + + ol_rule = ur"^\s+(?:[0-9]+|[aAiI])\.(?:#\d+)?\s" + dl_rule = ur"^\s+.*?::\s" + + config_smileys = dict([(key, None) for key in config.smileys]) + + # the big, fat, ugly one ;) + formatting_rules = ur"""(?P<ent_numeric>&#(\d{1,5}|x[0-9a-fA-F]+);) +(?:(?P<emph_ibb>'''''(?=[^']+''')) +(?P<emph_ibi>'''''(?=[^']+'')) +(?P<emph_ib_or_bi>'{5}(?=[^'])) +(?P<emph>'{2,3}) +(?P<u>__) +(?P<sup>\^.*?\^) +(?P<sub>,,[^,]{1,40},,) +(?P<tt>\{\{\{.*?\}\}\}) +(?P<processor>(\{\{\{(#!.*|\s*$))) +(?P<pre>(\{\{\{ ?|\}\}\})) +(?P<small>(\~- ?|-\~)) +(?P<big>(\~\+ ?|\+\~)) +(?P<strike>(--\(|\)--)) +(?P<rule>-{4,}) +(?P<comment>^\#\#.*$) +(?P<macro>\[\[(%%(macronames)s)(?:\(.*?\))?\]\])) +(?P<ol>%(ol_rule)s) +(?P<dl>%(dl_rule)s) +(?P<li>^\s+\*\s*) +(?P<li_none>^\s+\.\s*) +(?P<indent>^\s+) +(?P<tableZ>\|\| $) +(?P<table>(?:\|\|)+(?:<[^>]*?>)?(?!\|? $)) +(?P<heading>^\s*(?P<hmarker>=+)\s.*\s(?P=hmarker) $) +(?P<interwiki>[A-Z][a-zA-Z]+\:[^\s'\"\:\<\|]([^\s%(punct)s]|([%(punct)s][^\s%(punct)s]))+) +(?P<word>%(word_rule)s) +(?P<url_bracket>\[((%(url)s)\:|#|\:)[^\s\]]+(\s[^\]]+)?\]) +(?P<url>%(url_rule)s) +(?P<email>[-\w._+]+\@[\w-]+(\.[\w-]+)+) +(?P<smiley>(?<=\s)(%(smiley)s)(?=\s)) +(?P<smileyA>^(%(smiley)s)(?=\s)) +(?P<ent_symbolic>&\w+;) +(?P<ent>[<>&]) +(?P<wikiname_bracket>\[".*?"\]) +(?P<tt_bt>`.*?`)""" % { + + 'url': url_pattern, + 'punct': punct_pattern, + 'ol_rule': ol_rule, + 'dl_rule': dl_rule, + 'url_rule': url_rule, + 'word_rule': word_rule, + 'smiley': u'|'.join(map(re.escape, config_smileys.keys()))} + + # Don't start p before these + no_new_p_before = ("heading rule table tableZ tr td " + "ul ol dl dt dd li li_none indent " + "macro processor pre") + no_new_p_before = no_new_p_before.split() + no_new_p_before = dict(zip(no_new_p_before, [1] * len(no_new_p_before))) + + def __init__(self, raw, request, **kw): + self.raw = raw + self.request = request + self.form = request.form + self._ = request.getText + self.cfg = request.cfg + self.line_anchors = kw.get('line_anchors', True) + self.macro = None + self.start_line = kw.get('start_line', 0) + + self.is_em = 0 + self.is_b = 0 + self.is_u = 0 + self.is_strike = 0 + self.lineno = 0 + self.in_list = 0 # between <ul/ol/dl> and </ul/ol/dl> + self.in_li = 0 # between <li> and </li> + self.in_dd = 0 # between <dd> and </dd> + self.in_pre = 0 + self.in_table = 0 + self.is_big = False + self.is_small = False + self.inhibit_p = 0 # if set, do not auto-create a <p>aragraph + self.titles = request._page_headings + + # holds the nesting level (in chars) of open lists + self.list_indents = [] + self.list_types = [] + + self.formatting_rules = self.formatting_rules % {'macronames': u'|'.join(wikimacro.getNames(self.cfg))} + + def _close_item(self, result): + #result.append("<!-- close item begin -->\n") + if self.in_table: + result.append(self.formatter.table(0)) + self.in_table = 0 + if self.in_li: + self.in_li = 0 + if self.formatter.in_p: + result.append(self.formatter.paragraph(0)) + result.append(self.formatter.listitem(0)) + if self.in_dd: + self.in_dd = 0 + if self.formatter.in_p: + result.append(self.formatter.paragraph(0)) + result.append(self.formatter.definition_desc(0)) + #result.append("<!-- close item end -->\n") + + + def interwiki(self, url_and_text, **kw): + # TODO: maybe support [wiki:Page http://wherever/image.png] ? + if len(url_and_text) == 1: + url = url_and_text[0] + text = None + else: + url, text = url_and_text + + # keep track of whether this is a self-reference, so links + # are always shown even the page doesn't exist. + is_self_reference = 0 + url2 = url.lower() + if url2.startswith('wiki:self:'): + url = url[10:] # remove "wiki:self:" + is_self_reference = 1 + elif url2.startswith('wiki:'): + url = url[5:] # remove "wiki: + + tag, tail = wikiutil.split_wiki(url) + if text is None: + if tag: + text = tail + else: + text = url + url = "" + elif (url.startswith(wikiutil.CHILD_PREFIX) or # fancy link to subpage [wiki:/SubPage text] + is_self_reference or # [wiki:Self:LocalPage text] or [:LocalPage:text] + Page(self.request, url).exists()): # fancy link to local page [wiki:LocalPage text] + return self._word_repl(url, text) + + wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, url) + href = wikiutil.join_wiki(wikiurl, wikitail) + + # check for image URL, and possibly return IMG tag + if not kw.get('pretty_url', 0) and wikiutil.isPicture(wikitail): + return self.formatter.image(src=href) + + # link to self? + if wikitag is None: + return self._word_repl(wikitail) + + return (self.formatter.interwikilink(1, tag, tail) + + self.formatter.text(text) + + self.formatter.interwikilink(0, tag, tail)) + + def attachment(self, url_and_text, **kw): + """ This gets called on attachment URLs. + """ + _ = self._ + if len(url_and_text) == 1: + url = url_and_text[0] + text = None + else: + url, text = url_and_text + + inline = url[0] == 'i' + drawing = url[0] == 'd' + url = url.split(":", 1)[1] + url = wikiutil.url_unquote(url, want_unicode=True) + text = text or url + + from MoinMoin.action import AttachFile + if drawing: + return self.formatter.attachment_drawing(url, text) + + # check for image URL, and possibly return IMG tag + # (images are always inlined, just like for other URLs) + if not kw.get('pretty_url', 0) and wikiutil.isPicture(url): + return self.formatter.attachment_image(url) + + # inline the attachment + if inline: + return self.formatter.attachment_inlined(url, text) + + return self.formatter.attachment_link(url, text) + + def _u_repl(self, word): + """Handle underline.""" + self.is_u = not self.is_u + return self.formatter.underline(self.is_u) + + def _strike_repl(self, word): + """Handle strikethrough.""" + # XXX we don't really enforce the correct sequence --( ... )-- here + self.is_strike = not self.is_strike + return self.formatter.strike(self.is_strike) + + def _small_repl(self, word): + """Handle small.""" + if word.strip() == '~-' and self.is_small: + return self.formatter.text(word) + if word.strip() == '-~' and not self.is_small: + return self.formatter.text(word) + self.is_small = not self.is_small + return self.formatter.small(self.is_small) + + def _big_repl(self, word): + """Handle big.""" + if word.strip() == '~+' and self.is_big: + return self.formatter.text(word) + if word.strip() == '+~' and not self.is_big: + return self.formatter.text(word) + self.is_big = not self.is_big + return self.formatter.big(self.is_big) + + def _emph_repl(self, word): + """Handle emphasis, i.e. '' and '''.""" + ##print "#", self.is_b, self.is_em, "#" + if len(word) == 3: + self.is_b = not self.is_b + if self.is_em and self.is_b: + self.is_b = 2 + return self.formatter.strong(self.is_b) + else: + self.is_em = not self.is_em + if self.is_em and self.is_b: + self.is_em = 2 + return self.formatter.emphasis(self.is_em) + + def _emph_ibb_repl(self, word): + """Handle mixed emphasis, i.e. ''''' followed by '''.""" + self.is_b = not self.is_b + self.is_em = not self.is_em + if self.is_em and self.is_b: + self.is_b = 2 + return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b) + + def _emph_ibi_repl(self, word): + """Handle mixed emphasis, i.e. ''''' followed by ''.""" + self.is_b = not self.is_b + self.is_em = not self.is_em + if self.is_em and self.is_b: + self.is_em = 2 + return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em) + + def _emph_ib_or_bi_repl(self, word): + """Handle mixed emphasis, exactly five '''''.""" + ##print "*", self.is_b, self.is_em, "*" + b_before_em = self.is_b > self.is_em > 0 + self.is_b = not self.is_b + self.is_em = not self.is_em + if b_before_em: + return self.formatter.strong(self.is_b) + self.formatter.emphasis(self.is_em) + else: + return self.formatter.emphasis(self.is_em) + self.formatter.strong(self.is_b) + + + def _sup_repl(self, word): + """Handle superscript.""" + return self.formatter.sup(1) + \ + self.formatter.text(word[1:-1]) + \ + self.formatter.sup(0) + + def _sub_repl(self, word): + """Handle subscript.""" + return self.formatter.sub(1) + \ + self.formatter.text(word[2:-2]) + \ + self.formatter.sub(0) + + + def _rule_repl(self, word): + """Handle sequences of dashes.""" + result = self._undent() + self._closeP() + if len(word) <= 4: + result = result + self.formatter.rule() + else: + # Create variable rule size 1 - 6. Actual size defined in css. + size = min(len(word), 10) - 4 + result = result + self.formatter.rule(size) + return result + + + def _word_repl(self, word, text=None): + """Handle WikiNames.""" + + # check for parent links + # !!! should use wikiutil.AbsPageName here, but setting `text` + # correctly prevents us from doing this for now + if word.startswith(wikiutil.PARENT_PREFIX): + if not text: + text = word + word = '/'.join(filter(None, self.formatter.page.page_name.split('/')[:-1] + [word[wikiutil.PARENT_PREFIX_LEN:]])) + + if not text: + # if a simple, self-referencing link, emit it as plain text + if word == self.formatter.page.page_name: + return self.formatter.text(word) + text = word + if word.startswith(wikiutil.CHILD_PREFIX): + word = self.formatter.page.page_name + '/' + word[wikiutil.CHILD_PREFIX_LEN:] + + # handle anchors + parts = word.split("#", 1) + anchor = "" + if len(parts) == 2: + word, anchor = parts + + return (self.formatter.pagelink(1, word, anchor=anchor) + + self.formatter.text(text) + + self.formatter.pagelink(0, word)) + + def _notword_repl(self, word): + """Handle !NotWikiNames.""" + return self.formatter.nowikiword(word[1:]) + + def _interwiki_repl(self, word): + """Handle InterWiki links.""" + wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, word) + if wikitag_bad: + return self.formatter.text(word) + else: + return self.interwiki(["wiki:" + word]) + + + def _url_repl(self, word): + """Handle literal URLs including inline images.""" + scheme = word.split(":", 1)[0] + + if scheme == "wiki": + return self.interwiki([word]) + if scheme in self.attachment_schemas: + return self.attachment([word]) + + if wikiutil.isPicture(word): + word = wikiutil.mapURL(self.request, word) + # Get image name http://here.com/dir/image.gif -> image + name = word.split('/')[-1] + name = ''.join(name.split('.')[:-1]) + return self.formatter.image(src=word, alt=name) + else: + return (self.formatter.url(1, word, css=scheme) + + self.formatter.text(word) + + self.formatter.url(0)) + + + def _wikiname_bracket_repl(self, word): + """Handle special-char wikinames.""" + wikiname = word[2:-2] + if wikiname: + return self._word_repl(wikiname) + else: + return self.formatter.text(word) + + + def _url_bracket_repl(self, word): + """Handle bracketed URLs.""" + + # Local extended link? + if word[1] == ':': + words = word[2:-1].split(':', 1) + if len(words) == 1: + words = words * 2 + words[0] = 'wiki:Self:%s' % words[0] + return self.interwiki(words, pretty_url=1) + #return self._word_repl(words[0], words[1]) + + # Traditional split on space + words = word[1:-1].split(None, 1) + if len(words) == 1: + words = words * 2 + + if words[0][0] == '#': + # anchor link + return (self.formatter.url(1, words[0]) + + self.formatter.text(words[1]) + + self.formatter.url(0)) + + scheme = words[0].split(":", 1)[0] + if scheme == "wiki": + return self.interwiki(words, pretty_url=1) + if scheme in self.attachment_schemas: + return self.attachment(words, pretty_url=1) + + if wikiutil.isPicture(words[1]) and re.match(self.url_rule, words[1]): + return (self.formatter.url(1, words[0], css='external', do_escape=0) + + self.formatter.image(title=words[0], alt=words[0], src=words[1]) + + self.formatter.url(0)) + else: + return (self.formatter.url(1, words[0], css=scheme, do_escape=0) + + self.formatter.text(words[1]) + + self.formatter.url(0)) + + + def _email_repl(self, word): + """Handle email addresses (without a leading mailto:).""" + return (self.formatter.url(1, "mailto:" + word, css='mailto') + + self.formatter.text(word) + + self.formatter.url(0)) + + + def _ent_repl(self, word): + """Handle SGML entities.""" + return self.formatter.text(word) + #return {'&': '&', + # '<': '<', + # '>': '>'}[word] + + def _ent_numeric_repl(self, word): + """Handle numeric (decimal and hexadecimal) SGML entities.""" + return self.formatter.rawHTML(word) + + def _ent_symbolic_repl(self, word): + """Handle symbolic SGML entities.""" + return self.formatter.rawHTML(word) + + def _indent_repl(self, match): + """Handle pure indentation (no - * 1. markup).""" + result = [] + if not (self.in_li or self.in_dd): + self._close_item(result) + self.in_li = 1 + css_class = None + if self.line_was_empty and not self.first_list_item: + css_class = 'gap' + result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none")) + return ''.join(result) + + def _li_none_repl(self, match): + """Handle type=none (" .") lists.""" + result = [] + self._close_item(result) + self.in_li = 1 + css_class = None + if self.line_was_empty and not self.first_list_item: + css_class = 'gap' + result.append(self.formatter.listitem(1, css_class=css_class, style="list-style-type:none")) + return ''.join(result) + + def _li_repl(self, match): + """Handle bullet (" *") lists.""" + result = [] + self._close_item(result) + self.in_li = 1 + css_class = None + if self.line_was_empty and not self.first_list_item: + css_class = 'gap' + result.append(self.formatter.listitem(1, css_class=css_class)) + return ''.join(result) + + def _ol_repl(self, match): + """Handle numbered lists.""" + return self._li_repl(match) + + def _dl_repl(self, match): + """Handle definition lists.""" + result = [] + self._close_item(result) + self.in_dd = 1 + result.extend([ + self.formatter.definition_term(1), + self.formatter.text(match[1:-3].lstrip(' ')), + self.formatter.definition_term(0), + self.formatter.definition_desc(1), + ]) + return ''.join(result) + + + def _indent_level(self): + """Return current char-wise indent level.""" + return len(self.list_indents) and self.list_indents[-1] + + + def _indent_to(self, new_level, list_type, numtype, numstart): + """Close and open lists.""" + open = [] # don't make one out of these two statements! + close = [] + + if self._indent_level() != new_level and self.in_table: + close.append(self.formatter.table(0)) + self.in_table = 0 + + while self._indent_level() > new_level: + self._close_item(close) + if self.list_types[-1] == 'ol': + tag = self.formatter.number_list(0) + elif self.list_types[-1] == 'dl': + tag = self.formatter.definition_list(0) + else: + tag = self.formatter.bullet_list(0) + close.append(tag) + + del self.list_indents[-1] + del self.list_types[-1] + + if self.list_types: # we are still in a list + if self.list_types[-1] == 'dl': + self.in_dd = 1 + else: + self.in_li = 1 + + # Open new list, if necessary + if self._indent_level() < new_level: + self.list_indents.append(new_level) + self.list_types.append(list_type) + + if self.formatter.in_p: + close.append(self.formatter.paragraph(0)) + + if list_type == 'ol': + tag = self.formatter.number_list(1, numtype, numstart) + elif list_type == 'dl': + tag = self.formatter.definition_list(1) + else: + tag = self.formatter.bullet_list(1) + open.append(tag) + + self.first_list_item = 1 + self.in_li = 0 + self.in_dd = 0 + + # If list level changes, close an open table + if self.in_table and (open or close): + close[0:0] = [self.formatter.table(0)] + self.in_table = 0 + + self.in_list = self.list_types != [] + return ''.join(close) + ''.join(open) + + + def _undent(self): + """Close all open lists.""" + result = [] + #result.append("<!-- _undent start -->\n") + self._close_item(result) + for type in self.list_types[::-1]: + if type == 'ol': + result.append(self.formatter.number_list(0)) + elif type == 'dl': + result.append(self.formatter.definition_list(0)) + else: + result.append(self.formatter.bullet_list(0)) + #result.append("<!-- _undent end -->\n") + self.list_indents = [] + self.list_types = [] + return ''.join(result) + + + def _tt_repl(self, word): + """Handle inline code.""" + return self.formatter.code(1) + \ + self.formatter.text(word[3:-3]) + \ + self.formatter.code(0) + + + def _tt_bt_repl(self, word): + """Handle backticked inline code.""" + # if len(word) == 2: return "" // removed for FCK editor + return self.formatter.code(1, css="backtick") + \ + self.formatter.text(word[1:-1]) + \ + self.formatter.code(0) + + + def _getTableAttrs(self, attrdef): + # skip "|" and initial "<" + while attrdef and attrdef[0] == "|": + attrdef = attrdef[1:] + if not attrdef or attrdef[0] != "<": + return {}, '' + attrdef = attrdef[1:] + + # extension for special table markup + def table_extension(key, parser, attrs, wiki_parser=self): + """ returns: tuple (found_flag, msg) + found_flag: whether we found something and were able to process it here + true for special stuff like 100% or - or #AABBCC + false for style xxx="yyy" attributes + msg: "" or an error msg + """ + _ = wiki_parser._ + found = False + msg = '' + if key[0] in "0123456789": + token = parser.get_token() + if token != '%': + wanted = '%' + msg = _('Expected "%(wanted)s" after "%(key)s", got "%(token)s"') % { + 'wanted': wanted, 'key': key, 'token': token} + else: + try: + dummy = int(key) + except ValueError: + msg = _('Expected an integer "%(key)s" before "%(token)s"') % { + 'key': key, 'token': token} + else: + found = True + attrs['width'] = '"%s%%"' % key + elif key == '-': + arg = parser.get_token() + try: + dummy = int(arg) + except ValueError: + msg = _('Expected an integer "%(arg)s" after "%(key)s"') % { + 'arg': arg, 'key': key} + else: + found = True + attrs['colspan'] = '"%s"' % arg + elif key == '|': + arg = parser.get_token() + try: + dummy = int(arg) + except ValueError: + msg = _('Expected an integer "%(arg)s" after "%(key)s"') % { + 'arg': arg, 'key': key} + else: + found = True + attrs['rowspan'] = '"%s"' % arg + elif key == '(': + found = True + attrs['align'] = '"left"' + elif key == ':': + found = True + attrs['align'] = '"center"' + elif key == ')': + found = True + attrs['align'] = '"right"' + elif key == '^': + found = True + attrs['valign'] = '"top"' + elif key == 'v': + found = True + attrs['valign'] = '"bottom"' + elif key == '#': + arg = parser.get_token() + try: + if len(arg) != 6: raise ValueError + dummy = int(arg, 16) + except ValueError: + msg = _('Expected a color value "%(arg)s" after "%(key)s"') % { + 'arg': arg, 'key': key} + else: + found = True + attrs['bgcolor'] = '"#%s"' % arg + return found, self.formatter.rawHTML(msg) + + # scan attributes + attr, msg = wikiutil.parseAttributes(self.request, attrdef, '>', table_extension) + if msg: + msg = '<strong class="highlight">%s</strong>' % msg + #self.request.log("parseAttributes returned %r" % attr) + return attr, msg + + def _tableZ_repl(self, word): + """Handle table row end.""" + if self.in_table: + result = '' + # REMOVED: check for self.in_li, p should always close + if self.formatter.in_p: + result = self.formatter.paragraph(0) + result += self.formatter.table_cell(0) + self.formatter.table_row(0) + return result + else: + return self.formatter.text(word) + + def _table_repl(self, word): + """Handle table cell separator.""" + if self.in_table: + result = [] + # check for attributes + attrs, attrerr = self._getTableAttrs(word) + + # start the table row? + if self.table_rowstart: + self.table_rowstart = 0 + result.append(self.formatter.table_row(1, attrs)) + else: + # Close table cell, first closing open p + # REMOVED check for self.in_li, paragraph should close always! + if self.formatter.in_p: + result.append(self.formatter.paragraph(0)) + result.append(self.formatter.table_cell(0)) + + # check for adjacent cell markers + if word.count("|") > 2: + if not attrs.has_key('align') and \ + not (attrs.has_key('style') and 'text-align' in attrs['style'].lower()): + # add center alignment if we don't have some alignment already + attrs['align'] = '"center"' + if not attrs.has_key('colspan'): + attrs['colspan'] = '"%d"' % (word.count("|")/2) + + # return the complete cell markup + result.append(self.formatter.table_cell(1, attrs) + attrerr) + result.append(self._line_anchordef()) + return ''.join(result) + else: + return self.formatter.text(word) + + + def _heading_repl(self, word): + """Handle section headings.""" + import sha + + h = word.strip() + level = 1 + while h[level:level+1] == '=': + level += 1 + depth = min(5, level) + + # this is needed for Included pages + # TODO but it might still result in unpredictable results + # when included the same page multiple times + title_text = h[level:-level].strip() + pntt = self.formatter.page.page_name + title_text + self.titles.setdefault(pntt, 0) + self.titles[pntt] += 1 + + unique_id = '' + if self.titles[pntt] > 1: + unique_id = '-%d' % self.titles[pntt] + result = self._closeP() + result += self.formatter.heading(1, depth, id="head-"+sha.new(pntt.encode(config.charset)).hexdigest()+unique_id) + + return (result + self.formatter.text(title_text) + + self.formatter.heading(0, depth)) + + def _processor_repl(self, word): + """Handle processed code displays.""" + if word[:3] == '{{{': + word = word[3:] + + self.processor = None + self.processor_name = None + self.processor_is_parser = 0 + s_word = word.strip() + if s_word == '#!': + # empty bang paths lead to a normal code display + # can be used to escape real, non-empty bang paths + word = '' + self.in_pre = 3 + return self._closeP() + self.formatter.preformatted(1) + elif s_word[:2] == '#!': + # First try to find a processor for this (will go away in 2.0) + processor_name = s_word[2:].split()[0] + self.setProcessor(processor_name) + + if self.processor: + self.processor_name = processor_name + self.in_pre = 2 + self.colorize_lines = [word] + return '' + elif s_word: + self.in_pre = 3 + return self._closeP() + self.formatter.preformatted(1) + \ + self.formatter.text(s_word + ' (-)') + else: + self.in_pre = 1 + return '' + + def _pre_repl(self, word): + """Handle code displays.""" + word = word.strip() + if word == '{{{' and not self.in_pre: + self.in_pre = 3 + return self._closeP() + self.formatter.preformatted(self.in_pre) + elif word == '}}}' and self.in_pre: + self.in_pre = 0 + self.inhibit_p = 0 + return self.formatter.preformatted(self.in_pre) + return self.formatter.text(word) + + + def _smiley_repl(self, word): + """Handle smileys.""" + return self.formatter.smiley(word) + + _smileyA_repl = _smiley_repl + + + def _comment_repl(self, word): + # if we are in a paragraph, we must close it so that normal text following + # in the line below the comment will reopen a new paragraph. + if self.formatter.in_p: + self.formatter.paragraph(0) + self.line_is_empty = 1 # markup following comment lines treats them as if they were empty + return self.formatter.comment(word) + + def _closeP(self): + if self.formatter.in_p: + return self.formatter.paragraph(0) + return '' + + def _macro_repl(self, word): + """Handle macros ([[macroname]]).""" + macro_name = word[2:-2] + self.inhibit_p = 0 # 1 fixes UserPreferences, 0 fixes paragraph formatting for macros + + # check for arguments + args = None + if macro_name.count("("): + macro_name, args = macro_name.split('(', 1) + args = args[:-1] + + # create macro instance + if self.macro is None: + self.macro = wikimacro.Macro(self) + return self.formatter.macro(self.macro, macro_name, args) + + def scan(self, scan_re, line): + """ Scans one line + + Append text before match, invoke replace() with match, and add text after match. + """ + result = [] + lastpos = 0 + + ###result.append(u'<span class="info">[scan: <tt>"%s"</tt>]</span>' % line) + + for match in scan_re.finditer(line): + # Add text before the match + if lastpos < match.start(): + + ###result.append(u'<span class="info">[add text before match: <tt>"%s"</tt>]</span>' % line[lastpos:match.start()]) + + if not (self.inhibit_p or self.in_pre or self.formatter.in_p): + result.append(self.formatter.paragraph(1, css_class="line862")) + result.append(self.formatter.text(line[lastpos:match.start()])) + + # Replace match with markup + if not (self.inhibit_p or self.in_pre or self.formatter.in_p or + self.in_table or self.in_list): + result.append(self.formatter.paragraph(1, css_class="line867")) + result.append(self.replace(match)) + lastpos = match.end() + + ###result.append('<span class="info">[no match, add rest: <tt>"%s"<tt>]</span>' % line[lastpos:]) + + # Add paragraph with the remainder of the line + if not (self.in_pre or self.in_li or self.in_dd or self.inhibit_p or + self.formatter.in_p) and lastpos < len(line): + result.append(self.formatter.paragraph(1, css_class="line874")) + result.append(self.formatter.text(line[lastpos:])) + return u''.join(result) + + def replace(self, match): + """ Replace match using type name """ + result = [] + for type, hit in match.groupdict().items(): + if hit is not None and type != "hmarker": + + ###result.append(u'<span class="info">[replace: %s: "%s"]</span>' % (type, hit)) + if self.in_pre and type not in ['pre', 'ent']: + return self.formatter.text(hit) + else: + # Open p for certain types + if not (self.inhibit_p or self.formatter.in_p + or self.in_pre or (type in self.no_new_p_before)): + result.append(self.formatter.paragraph(1, css_class="line891")) + + # Get replace method and replece hit + replace = getattr(self, '_' + type + '_repl') + result.append(replace(hit)) + return ''.join(result) + else: + # We should never get here + import pprint + raise Exception("Can't handle match " + `match` + + "\n" + pprint.pformat(match.groupdict()) + + "\n" + pprint.pformat(match.groups()) ) + + return "" + + def _line_anchordef(self): + if self.line_anchors and not self.line_anchor_printed: + self.line_anchor_printed = 1 + return self.formatter.line_anchordef(self.lineno) + else: + return '' + + def format(self, formatter): + """ For each line, scan through looking for magic + strings, outputting verbatim any intervening text. + """ + self.formatter = formatter + self.hilite_re = self.formatter.page.hilite_re + + # prepare regex patterns + rules = self.formatting_rules.replace('\n', '|') + if self.cfg.bang_meta: + rules = ur'(?P<notword>!%(word_rule)s)|%(rules)s' % { + 'word_rule': self.word_rule, + 'rules': rules, + } + self.request.clock.start('compile_huge_and_ugly') + scan_re = re.compile(rules, re.UNICODE) + number_re = re.compile(self.ol_rule, re.UNICODE) + term_re = re.compile(self.dl_rule, re.UNICODE) + indent_re = re.compile("^\s*", re.UNICODE) + eol_re = re.compile(r'\r?\n', re.UNICODE) + self.request.clock.stop('compile_huge_and_ugly') + + # get text and replace TABs + rawtext = self.raw.expandtabs() + + # go through the lines + self.lineno = self.start_line + self.lines = eol_re.split(rawtext) + self.line_is_empty = 0 + + self.in_processing_instructions = 1 + + # Main loop + for line in self.lines: + self.lineno += 1 + self.line_anchor_printed = 0 + if not self.in_table: + self.request.write(self._line_anchordef()) + self.table_rowstart = 1 + self.line_was_empty = self.line_is_empty + self.line_is_empty = 0 + self.first_list_item = 0 + self.inhibit_p = 0 + + # ignore processing instructions + if self.in_processing_instructions: + found = False + for pi in ("##", "#format", "#refresh", "#redirect", "#deprecated", + "#pragma", "#form", "#acl", "#language"): + if line.lower().startswith(pi): + self.request.write(self.formatter.comment(line)) + found = True + break + if not found: + self.in_processing_instructions = 0 + else: + continue # do not parse this line + if self.in_pre: + # TODO: move this into function + # still looking for processing instructions + # TODO: use strings for pre state, not numbers + if self.in_pre == 1: + self.processor = None + self.processor_is_parser = 0 + processor_name = '' + if (line.strip()[:2] == "#!"): + processor_name = line.strip()[2:].split()[0] + self.setProcessor(processor_name) + + if self.processor: + self.in_pre = 2 + self.colorize_lines = [line] + self.processor_name = processor_name + continue + else: + self.request.write(self._closeP() + + self.formatter.preformatted(1)) + self.in_pre = 3 + if self.in_pre == 2: + # processing mode + endpos = line.find("}}}") + if endpos == -1: + self.colorize_lines.append(line) + continue + if line[:endpos]: + self.colorize_lines.append(line[:endpos]) + + # Close p before calling processor + # TODO: do we really need this? + self.request.write(self._closeP()) + res = self.formatter.processor(self.processor_name, + self.colorize_lines, + self.processor_is_parser) + self.request.write(res) + del self.colorize_lines + self.in_pre = 0 + self.processor = None + + # send rest of line through regex machinery + line = line[endpos+3:] + if not line.strip(): # just in the case "}}} " when we only have blanks left... + continue + else: + # we don't have \n as whitespace any more + # This is the space between lines we join to one paragraph + line += ' ' + + # Paragraph break on empty lines + if not line.strip(): + if self.in_table: + self.request.write(self.formatter.table(0)) + self.request.write(self._line_anchordef()) + self.in_table = 0 + # CHANGE: removed check for not self.list_types + # p should close on every empty line + if self.formatter.in_p: + self.request.write(self.formatter.paragraph(0)) + self.line_is_empty = 1 + continue + + # Check indent level + indent = indent_re.match(line) + indlen = len(indent.group(0)) + indtype = "ul" + numtype = None + numstart = None + if indlen: + match = number_re.match(line) + if match: + numtype, numstart = match.group(0).strip().split('.') + numtype = numtype[0] + + if numstart and numstart[0] == "#": + numstart = int(numstart[1:]) + else: + numstart = None + + indtype = "ol" + else: + match = term_re.match(line) + if match: + indtype = "dl" + + # output proper indentation tags + self.request.write(self._indent_to(indlen, indtype, numtype, numstart)) + + # Table mode + # TODO: move into function? + if (not self.in_table and line[indlen:indlen + 2] == "||" + and line[-3:] == "|| " and len(line) >= 5 + indlen): + # Start table + if self.list_types and not self.in_li: + self.request.write(self.formatter.listitem(1, style="list-style-type:none")) + ## CHANGE: no automatic p on li + ##self.request.write(self.formatter.paragraph(1)) + self.in_li = 1 + + # CHANGE: removed check for self.in_li + # paragraph should end before table, always! + if self.formatter.in_p: + self.request.write(self.formatter.paragraph(0)) + attrs, attrerr = self._getTableAttrs(line[indlen+2:]) + self.request.write(self.formatter.table(1, attrs) + attrerr) + self.in_table = True # self.lineno + elif (self.in_table and not + # intra-table comments should not break a table + (line[:2] == "##" or + line[indlen:indlen + 2] == "||" and + line[-3:] == "|| " and + len(line) >= 5 + indlen)): + + # Close table + self.request.write(self.formatter.table(0)) + self.request.write(self._line_anchordef()) + self.in_table = 0 + + # Scan line, format and write + formatted_line = self.scan(scan_re, line) + self.request.write(formatted_line) + + if self.in_pre == 3: + self.request.write(self.formatter.linebreak()) + + # Close code displays, paragraphs, tables and open lists + self.request.write(self._undent()) + if self.in_pre: self.request.write(self.formatter.preformatted(0)) + if self.formatter.in_p: self.request.write(self.formatter.paragraph(0)) + if self.in_table: self.request.write(self.formatter.table(0)) + + # -------------------------------------------------------------------- + # Private helpers + + def setProcessor(self, name): + """ Set processer to either processor or parser named 'name' """ + cfg = self.request.cfg + try: + self.processor = wikiutil.importPlugin(cfg, "processor", name, + "process") + self.processor_is_parser = 0 + except wikiutil.PluginMissingError: + try: + self.processor = wikiutil.importPlugin(cfg, "parser", name, + "Parser") + self.processor_is_parser = 1 + except wikiutil.PluginMissingError: + self.processor = None + +