Mercurial > moin > 1.9
view MoinMoin/parser/_ParserBase.py @ 4363:817d99d715fe
remove direct usage of deprecated sha module - use hashlib, if possible
author | Thomas Waldmann <tw AT waldmann-edv DOT de> |
---|---|
date | Fri, 03 Oct 2008 22:33:35 +0200 |
parents | b4bdfaaa79b0 |
children | 09328e4aab45 |
line wrap: on
line source
# -*- coding: iso-8859-1 -*- """ MoinMoin - Base Source Parser @copyright: 2002 by Taesu Pyo <bigflood@hitel.net>, 2005 by Oliver Graf <ograf@bitart.de>, 2005-2008 MoinMoin:ThomasWaldmann @license: GNU GPL, see COPYING for details. basic css: pre.codearea { font-style: sans-serif; color: #000000; } pre.codearea span.ID { color: #000000; } pre.codearea span.Char { color: #004080; } pre.codearea span.Comment { color: #808080; } pre.codearea span.Number { color: #008080; font-weight: bold; } pre.codearea span.String { color: #004080; } pre.codearea span.SPChar { color: #0000C0; } pre.codearea span.ResWord { color: #4040ff; font-weight: bold; } pre.codearea span.ConsWord { color: #008080; font-weight: bold; } """ import re from MoinMoin import config, wikiutil from MoinMoin.support.python_compatibility import hash_new class FormatTextBase: pass class FormatBeginLine(FormatTextBase): def formatString(self, formatter, word): return formatter.code_line(1) class FormatEndLine(FormatTextBase): def formatString(self, formatter, word): return formatter.code_line(0) class FormatText(FormatTextBase): def __init__(self, fmt): self.fmt = fmt def formatString(self, formatter, word): return (formatter.code_token(1, self.fmt) + formatter.text(word) + formatter.code_token(0, self.fmt)) class FormatTextID(FormatTextBase): def __init__(self, fmt, icase=False): if not isinstance(fmt, FormatText): fmt = FormatText(fmt) self.setDefaultFormat(fmt) self._ignore_case = icase self.fmt = {} def setDefaultFormat(self, fmt): self._def_fmt = fmt def addFormat(self, word, fmt): if self._ignore_case: word = word.lower() self.fmt[word] = fmt def formatString(self, formatter, word): if self._ignore_case: sword = word.lower() else: sword = word return self.fmt.get(sword, self._def_fmt).formatString(formatter, word) class FormattingRuleSingle: def __init__(self, name, str_re, icase=False): self.name = name self.str_re = str_re def getStartRe(self): return self.str_re def getText(self, parser, hit): return hit class FormattingRulePair: def __init__(self, name, str_begin, str_end, icase=False): self.name = name self.str_begin = str_begin self.str_end = str_end re_flags = re.M if icase: re_flags |= re.I self.end_re = re.compile(str_end, re_flags) def getStartRe(self): return self.str_begin def getText(self, parser, hit): match = self.end_re.search(parser.text, parser.lastpos) if not match: next_lastpos = parser.text_len else: next_lastpos = match.end() + (match.end() == parser.lastpos) r = parser.text[parser.lastpos:next_lastpos] parser.lastpos = next_lastpos return hit + r # ------------------------------------------------------------------------ def parse_start_step(request, args): """ Parses common Colorizer parameters start, step, numbers. Uses L{wikiutil.parseAttributes} and sanitizes the results. Start and step must be a non negative number and default to 1, numbers might be on, off, or none and defaults to on. On or off means that numbers are switchable via JavaScript (html formatter), disabled means that numbers are disabled completely. attrdict is returned as last element in the tuple, to enable the calling parser to extract further arguments. @param request: a request instance @param args: the argument string @returns: numbers, start, step, attrdict """ nums, start, step = 1, 1, 1 attrs, msg = wikiutil.parseAttributes(request, args) if not msg: try: start = int(attrs.get('start', '"1"')[1:-1]) except ValueError: pass try: step = int(attrs.get('step', '"1"')[1:-1]) except ValueError: pass if attrs.get('numbers', '"on"')[1:-1].lower() in ('off', 'false', 'no'): nums = 0 elif attrs.get('numbers', '"on"')[1:-1].lower() in ('none', 'disable'): nums = -1 return nums, start, step, attrs class ParserBase: parsername = 'ParserBase' tabwidth = 4 # for dirty tricks, see comment in format(): STARTL, STARTL_RE = u"^\n", ur"\^\n" ENDL, ENDL_RE = u"\n$", ur"\n\$" LINESEP = ENDL + STARTL def __init__(self, raw, request, **kw): self.raw = raw self.request = request self.show_nums, self.num_start, self.num_step, attrs = parse_start_step(request, kw.get('format_args', '')) self._ignore_case = False self._formatting_rules = [] self._formatting_rules_n2r = {} self._formatting_rule_index = 0 self.rule_fmt = {} #self.line_count = len(raw.split('\n')) + 1 def setupRules(self): self.addRuleFormat("BEGINLINE", FormatBeginLine()) self.addRuleFormat("ENDLINE", FormatEndLine()) # we need a little dirty trick here, see comment in format(): self.addRule("BEGINLINE", self.STARTL_RE) self.addRule("ENDLINE", self.ENDL_RE) self.def_format = FormatText('Default') self.reserved_word_format = FormatText('ResWord') self.constant_word_format = FormatText('ConsWord') self.ID_format = FormatTextID('ID', self._ignore_case) self.addRuleFormat("ID", self.ID_format) self.addRuleFormat("Operator") self.addRuleFormat("Char") self.addRuleFormat("Comment") self.addRuleFormat("Number") self.addRuleFormat("String") self.addRuleFormat("SPChar") self.addRuleFormat("ResWord") self.addRuleFormat("ResWord2") self.addRuleFormat("ConsWord") self.addRuleFormat("Special") self.addRuleFormat("Preprc") self.addRuleFormat("Error") def _addRule(self, name, fmt): self._formatting_rule_index += 1 name = "%s_%s" % (name, self._formatting_rule_index) # create unique name self._formatting_rules.append((name, fmt)) self._formatting_rules_n2r[name] = fmt def addRule(self, name, str_re): self._addRule(name, FormattingRuleSingle(name, str_re, self._ignore_case)) def addRulePair(self, name, start_re, end_re): self._addRule(name, FormattingRulePair(name, start_re, end_re, self._ignore_case)) def addWords(self, words, fmt): if not isinstance(fmt, FormatTextBase): fmt = FormatText(fmt) for w in words: self.ID_format.addFormat(w, fmt) def addReserved(self, words): self.addWords(words, self.reserved_word_format) def addConstant(self, words): self.addWords(words, self.constant_word_format) def addRuleFormat(self, name, fmt=None): if fmt is None: fmt = FormatText(name) self.rule_fmt[name] = fmt def format(self, formatter, form=None): """ Send the text. """ self.setupRules() formatting_regexes = ["(?P<%s>%s)" % (n, f.getStartRe()) for n, f in self._formatting_rules] re_flags = re.M if self._ignore_case: re_flags |= re.I scan_re = re.compile("|".join(formatting_regexes), re_flags) self.text = self.raw # dirty little trick to work around re lib's limitations (it can't have # zero length matches at line beginning for ^ and at the same time match # something else at the beginning of the line): self.text = self.LINESEP.join([line.replace('\r', '') for line in self.text.splitlines()]) self.text = self.STARTL + self.text + self.ENDL self.text_len = len(self.text) result = [] # collects output self._code_id = hash_new('sha1', self.raw.encode(config.charset)).hexdigest() result.append(formatter.code_area(1, self._code_id, self.parsername, self.show_nums, self.num_start, self.num_step)) self.lastpos = 0 match = scan_re.search(self.text) while match and self.lastpos < self.text_len: # add the rendering of the text left of the match we found text = self.text[self.lastpos:match.start()] if text: result.extend(self.format_normal_text(formatter, text)) self.lastpos = match.end() + (match.end() == self.lastpos) # add the rendering of the match we found result.extend(self.format_match(formatter, match)) # search for the next one match = scan_re.search(self.text, self.lastpos) # add the rendering of the text right of the last match we found text = self.text[self.lastpos:] if text: result.extend(self.format_normal_text(formatter, text)) result.append(formatter.code_area(0, self._code_id)) self.request.write(''.join(result)) def format_normal_text(self, formatter, text): return [formatter.text(text.expandtabs(self.tabwidth))] def format_match(self, formatter, match): result = [] for n, hit in match.groupdict().items(): if hit is None: continue r = self._formatting_rules_n2r[n] s = r.getText(self, hit) c = self.rule_fmt.get(r.name, None) if not c: c = self.def_format if s: lines = s.expandtabs(self.tabwidth).split(self.LINESEP) for line in lines[:-1]: result.append(c.formatString(formatter, line)) result.append(FormatEndLine().formatString(formatter, '')) result.append(FormatBeginLine().formatString(formatter, '')) result.append(c.formatString(formatter, lines[-1])) return result