changeset 3911:348c9b6e0033

ParserBase (and other parsers deriving from it): cleanup / refactorings: source code reformattings / rearrangements, PEP8 fixes using booleans instead of 0/1, use raw strings for regexes reduce code duplication better attr names introduce tabwidth constant as a class member
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sat, 02 Aug 2008 13:48:59 +0200
parents 6fbfffffc4d1
children 01ba9b2e96f6
files MoinMoin/parser/_ParserBase.py MoinMoin/parser/text_cplusplus.py MoinMoin/parser/text_java.py MoinMoin/parser/text_pascal.py MoinMoin/parser/text_python.py
diffstat 5 files changed, 128 insertions(+), 120 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/parser/_ParserBase.py	Thu Jul 31 14:17:19 2008 +0200
+++ b/MoinMoin/parser/_ParserBase.py	Sat Aug 02 13:48:59 2008 +0200
@@ -2,10 +2,12 @@
 """
     MoinMoin - Base Source Parser
 
-    @copyright: 2002 by Taesu Pyo <bigflood@hitel.net>
+    @copyright: 2002 by Taesu Pyo <bigflood@hitel.net>,
+                2005 by Oliver Graf <ograf@bitart.de>,
+                2005-2008 MoinMoin:ThomasWaldmann
+
     @license: GNU GPL, see COPYING for details.
 
-    Docstrings and some refactoring by Oliver Graf <ograf@bitart.de>
 
 basic css:
 
@@ -23,8 +25,87 @@
 """
 
 import re, sha
+
 from MoinMoin import config, wikiutil
 
+class FormatTextBase:
+    pass
+
+class FormatText(FormatTextBase):
+
+    def __init__(self, fmt):
+        self.fmt = fmt
+
+    def formatString(self, formatter, word):
+        return (formatter.code_token(1, self.fmt) +
+                formatter.text(word) +
+                formatter.code_token(0, self.fmt))
+
+class FormatTextID(FormatTextBase):
+
+    def __init__(self, fmt, icase=False):
+        if not isinstance(fmt, FormatText):
+            fmt = FormatText(fmt)
+        self.setDefaultFormat(fmt)
+        self._ignore_case = icase
+        self.fmt = {}
+
+    def setDefaultFormat(self, fmt):
+        self._def_fmt = fmt
+
+    def addFormat(self, word, fmt):
+        if self._ignore_case:
+            word = word.lower()
+        self.fmt[word] = fmt
+
+    def formatString(self, formatter, word):
+        if self._ignore_case:
+            sword = word.lower()
+        else:
+            sword = word
+        return self.fmt.get(sword, self._def_fmt).formatString(formatter, word)
+
+
+class FormattingRuleSingle:
+
+    def __init__(self, name, str_re, icase=False):
+        self.name = name
+        self.str_re = str_re
+
+    def getStartRe(self):
+        return self.str_re
+
+    def getText(self, parser, hit):
+        return hit
+
+
+class FormattingRulePair:
+
+    def __init__(self, name, str_begin, str_end, icase=False):
+        self.name = name
+        self.str_begin = str_begin
+        self.str_end = str_end
+        re_flags = re.M
+        if icase:
+            re_flags |= re.I
+        self.end_re = re.compile(str_end, re_flags)
+
+    def getStartRe(self):
+        return self.str_begin
+
+    def getText(self, parser, hit):
+        match = self.end_re.search(parser.line, parser.lastpos)
+        if not match:
+            next_lastpos = len(parser.line)
+        else:
+            next_lastpos = match.end() + (match.end() == parser.lastpos)
+        r = parser.line[parser.lastpos:next_lastpos]
+        parser.lastpos = next_lastpos
+        return hit + r
+
+
+# ------------------------------------------------------------------------
+
 def parse_start_step(request, args):
     """
     Parses common Colorizer parameters start, step, numbers.
@@ -60,98 +141,23 @@
             nums = -1
     return nums, start, step, attrs
 
-class FormatTextBase:
-    pass
-
-class FormatText(FormatTextBase):
-
-    def __init__(self, fmt):
-        self.fmt = fmt
-
-    def formatString(self, formatter, word):
-        return (formatter.code_token(1, self.fmt) +
-                formatter.text(word) +
-                formatter.code_token(0, self.fmt))
-
-class FormatTextID(FormatTextBase):
-
-    def __init__(self, fmt, icase=0):
-        if not isinstance(fmt, FormatText):
-            self.def_fmt = FormatText(fmt)
-        else:
-            self.def_fmt = fmt
-        self._ignore_case = icase
-        self.fmt = {}
-
-    def addFormat(self, word, fmt):
-        if self._ignore_case:
-            word = word.lower()
-        self.fmt[word] = fmt
-
-    def setDefaultFormat(self, fmt):
-        self.def_fmt = fmt
-
-    def formatString(self, formatter, word):
-        if self._ignore_case:
-            sword = word.lower()
-        else:
-            sword = word
-        return self.fmt.get(sword, self.def_fmt).formatString(formatter, word)
-
-class FormattingRuleSingle:
-
-    def __init__(self, name, str_re, icase=0):
-        self.name = name
-        self.str_re = str_re
-
-    def getStartRe(self):
-        return self.str_re
-
-    def getText(self, parser, hit):
-        return hit
-
-class FormattingRulePair:
-
-    def __init__(self, name, str_begin, str_end, icase=0):
-        self.name = name
-        self.str_begin = str_begin
-        self.str_end = str_end
-        if icase:
-            self.end_re = re.compile(str_end, re.M|re.I)
-        else:
-            self.end_re = re.compile(str_end, re.M)
-
-    def getStartRe(self):
-        return self.str_begin
-
-    def getText(self, parser, hit):
-        match = self.end_re.search(parser.line, parser.lastpos)
-        if not match:
-            next_lastpos = len(parser.line)
-        else:
-            next_lastpos = match.end() + (match.end() == parser.lastpos)
-        r = parser.line[parser.lastpos:next_lastpos]
-        parser.lastpos = next_lastpos
-        return hit + r
-
-
-# ------------------------------------------------------------------------
 
 class ParserBase:
 
     parsername = 'ParserBase'
+    tabwidth = 4
 
     def __init__(self, raw, request, **kw):
         self.raw = raw
         self.request = request
         self.show_nums, self.num_start, self.num_step, attrs = parse_start_step(request, kw.get('format_args', ''))
 
-        self._ignore_case = 0
+        self._ignore_case = False
         self._formatting_rules = []
         self._formatting_rules_n2r = {}
         self._formatting_rule_index = 0
         self.rule_fmt = {}
-        self.line_count = len(raw.split('\n'))+1
+        self.line_count = len(raw.split('\n')) + 1
 
     def setupRules(self):
         self.def_format = FormatText('Default')
@@ -209,14 +215,12 @@
 
         self.setupRules()
 
-        l = []
-        for n, f in self._formatting_rules:
-            l.append("(?P<%s>%s)" % (n, f.getStartRe()))
-
+        formatting_regexes = ["(?P<%s>%s)" % (n, f.getStartRe())
+                              for n, f in self._formatting_rules]
+        re_flags = re.M
         if self._ignore_case:
-            scan_re = re.compile("|".join(l), re.M|re.I)
-        else:
-            scan_re = re.compile("|".join(l), re.M)
+            re_flags |= re.I
+        scan_re = re.compile("|".join(formatting_regexes), re_flags)
 
         self.lastpos = 0
         self.line = self.raw
@@ -244,14 +248,13 @@
 
         self.request.write(formatter.code_area(0, self._code_id))
 
-
     def write_normal_text(self, formatter, text):
-        first = 1
-        for line in text.expandtabs(4).split('\n'):
+        first = True
+        for line in text.expandtabs(self.tabwidth).split('\n'):
             if not first:
                 self.request.write(formatter.code_line(1))
             else:
-                first = 0
+                first = False
             self.request.write(formatter.text(line))
 
     def write_match(self, formatter, match):
@@ -263,10 +266,11 @@
             c = self.rule_fmt.get(r.name, None)
             if not c:
                 c = self.def_format
-            first = 1
-            for line in s.expandtabs(4).split('\n'):
+            first = True
+            for line in s.expandtabs(self.tabwidth).split('\n'):
                 if not first:
                     self.request.write(formatter.code_line(1))
                 else:
-                    first = 0
+                    first = False
                 self.request.write(c.formatString(formatter, line))
+
--- a/MoinMoin/parser/text_cplusplus.py	Thu Jul 31 14:17:19 2008 +0200
+++ b/MoinMoin/parser/text_cplusplus.py	Sat Aug 02 13:48:59 2008 +0200
@@ -36,13 +36,13 @@
     def setupRules(self):
         ParserBase.setupRules(self)
 
-        self.addRulePair("Comment", "/[*]", "[*]/")
-        self.addRule("Comment", "//.*$")
-        self.addRulePair("String", 'L?"', r'$|[^\\](\\\\)*"')
+        self.addRulePair("Comment", r"/[*]", r"[*]/")
+        self.addRule("Comment", r"//.*$")
+        self.addRulePair("String", r'L?"', r'$|[^\\](\\\\)*"')
         self.addRule("Char", r"'\\.'|'[^\\]'")
         self.addRule("Number", r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?")
         self.addRule("Preprc", r"^\s*#(.*\\\n)*(.*(?!\\))$")
-        self.addRule("ID", "[a-zA-Z_][0-9a-zA-Z_]*")
+        self.addRule("ID", r"[a-zA-Z_][0-9a-zA-Z_]*")
         self.addRule("SPChar", r"[~!%^&*()+=|\[\]:;,.<>/?{}-]")
 
         reserved_words = ['struct', 'class', 'union', 'enum',
--- a/MoinMoin/parser/text_java.py	Thu Jul 31 14:17:19 2008 +0200
+++ b/MoinMoin/parser/text_java.py	Sat Aug 02 13:48:59 2008 +0200
@@ -20,12 +20,12 @@
     def setupRules(self):
         ParserBase.setupRules(self)
 
-        self.addRulePair("Comment", "/[*]", "[*]/")
-        self.addRule("Comment", "//.*$")
-        self.addRulePair("String", '"', r'$|[^\\](\\\\)*"')
+        self.addRulePair("Comment", r"/[*]", r"[*]/")
+        self.addRule("Comment", r"//.*$")
+        self.addRulePair("String", r'"', r'$|[^\\](\\\\)*"')
         self.addRule("Char", r"'\\.'|'[^\\]'")
         self.addRule("Number", r"[0-9](\.[0-9]*)?(eE[+-][0-9])?[flFLdD]?|0[xX][0-9a-fA-F]+[Ll]?")
-        self.addRule("ID", "[a-zA-Z_][0-9a-zA-Z_]*")
+        self.addRule("ID", r"[a-zA-Z_][0-9a-zA-Z_]*")
         self.addRule("SPChar", r"[~!%^&*()+=|\[\]:;,.<>/?{}-]")
 
         reserved_words = ['class', 'interface', 'enum', 'import', 'package',
@@ -40,3 +40,4 @@
         constant_words = ['true', 'false', 'null']
 
         self.addConstant(constant_words)
+
--- a/MoinMoin/parser/text_pascal.py	Thu Jul 31 14:17:19 2008 +0200
+++ b/MoinMoin/parser/text_pascal.py	Sat Aug 02 13:48:59 2008 +0200
@@ -18,18 +18,18 @@
 
     def __init__(self, raw, request, **kw):
         ParserBase.__init__(self, raw, request, **kw)
-        self._ignore_case = 1
+        self._ignore_case = True
 
     def setupRules(self):
         ParserBase.setupRules(self)
 
-        self.addRulePair("Comment", "\(\*", "\*\)")
-        self.addRulePair("Comment", "\{", "\}")
-        self.addRule("Comment", "//.*$")
-        self.addRulePair("String", '\'', '\'')
+        self.addRulePair("Comment", r"\(\*", r"\*\)")
+        self.addRulePair("Comment", r"\{", r"\}")
+        self.addRule("Comment", r"//.*$")
+        self.addRulePair("String", r"'", r"'")
         self.addRule("Char", r"'\\.'|#[a-f0-9][a-f0-9]")
         self.addRule("Number", r"[0-9](\.[0-9]*)?(eE[+-][0-9])?|\$[0-9a-fA-F]+")
-        self.addRule("ID", "[a-zA-Z_][0-9a-zA-Z_]*")
+        self.addRule("ID", r"[a-zA-Z_][0-9a-zA-Z_]*")
         self.addRule("SPChar", r"[~!%^&*()+=|\[\]:;,.<>/?{}-]")
 
         reserved_words = ['class', 'interface', 'set', 'uses', 'unit',
@@ -48,3 +48,4 @@
         constant_words = ['true', 'false', 'nil']
 
         self.addConstant(constant_words)
+
--- a/MoinMoin/parser/text_python.py	Thu Jul 31 14:17:19 2008 +0200
+++ b/MoinMoin/parser/text_python.py	Sat Aug 02 13:48:59 2008 +0200
@@ -2,27 +2,29 @@
 """
     MoinMoin - highlighting Python Source Parser
 
-    @copyright: 2001 Juergen Hermann <jh@web.de>
+    @copyright: 2001 Juergen Hermann <jh@web.de>,
+                2006-2008 MoinMoin:ThomasWaldmann
     @license: GNU GPL, see COPYING for details.
 """
 
 import StringIO
 import keyword, token, tokenize, sha
+
 from MoinMoin import config, wikiutil
 from MoinMoin.parser._ParserBase import parse_start_step
 
 _KEYWORD = token.NT_OFFSET + 1
-_TEXT    = token.NT_OFFSET + 2
+_TEXT = token.NT_OFFSET + 2
 
 _tokens = {
-    token.NUMBER:       'Number',
-    token.OP:           'Operator',
-    token.STRING:       'String',
-    tokenize.COMMENT:   'Comment',
-    token.NAME:         'ID',
-    token.ERRORTOKEN:   'Error',
-    _KEYWORD:           'ResWord',
-    _TEXT:              'Text',
+    token.NUMBER: 'Number',
+    token.OP: 'Operator',
+    token.STRING: 'String',
+    tokenize.COMMENT: 'Comment',
+    token.NAME: 'ID',
+    token.ERRORTOKEN: 'Error',
+    _KEYWORD: 'ResWord',
+    _TEXT: 'Text',
 }
 
 Dependencies = ['user'] # the "Toggle line numbers link" depends on user's language
@@ -117,13 +119,13 @@
         tokid = _tokens.get(toktype, _tokens[_TEXT])
 
         # send text
-        first = 1
+        first = True
         for part in toktext.split('\n'):
             if not first:
                 self.request.write(self.formatter.code_line(0))
                 self.request.write(self.formatter.code_line(1))
             else:
-                first = 0
+                first = False
             self.request.write(self.formatter.code_token(1, tokid) +
                                self.formatter.text(part) +
                                self.formatter.code_token(0, tokid))