changeset 4378:e1877a60f46d

New parser, highlight.py, using Pygments to highlight a number of different markups
author Radomir Dopieralski <moindev@sheep.art.pl>
date Sat, 11 Oct 2008 21:28:23 +0200
parents 00b1307bd9c2
children 9ba5c65b18c0
files MoinMoin/parser/highlight.py
diffstat 1 files changed, 136 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/parser/highlight.py	Sat Oct 11 21:28:23 2008 +0200
@@ -0,0 +1,136 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+"""
+    MoinMoin - Pygments Parser
+
+    @copyright: 2008 Radomir Dopieralski <moindev@sheep.art.pl>
+
+    @license: GNU GPL, see COPYING for details.
+"""
+
+import re
+
+from MoinMoin.parser._ParserBase import parse_start_step
+from MoinMoin.support.python_compatibility import hash_new
+from MoinMoin import config, wikiutil
+
+import pygments
+import pygments.util
+import pygments.lexers
+import pygments.formatter
+from pygments.token import Token
+
+Dependencies = []
+extensions = ['.py']
+extension_re = re.compile(r'^\*(\..*$)')
+for name, short, patterns, mime in pygments.lexers.get_all_lexers():
+    for pattern in patterns:
+        m = extension_re.match(pattern)
+        if m and m.groups(0):
+            extensions.extend(m.groups(0))
+
+class PygmentsFormatter(pygments.formatter.Formatter):
+    line_re = re.compile(r'(\n)')
+
+    def __init__(self, formatter):
+        pygments.formatter.Formatter.__init__(self)
+        self.result = []
+        self.formatter = formatter
+
+    def get_class(self, ttype):
+        if ttype in Token.Text:
+            return None
+        # Reuse existing MoinMoin's class names
+        elif ttype in Token.Keyword.Constant:
+            return 'ConsWord'
+        elif ttype in Token.Keyword:
+            return 'ResWord'
+        elif ttype in Token.Name.Builtin:
+            return 'ResWord'
+        elif ttype in Token.Name.Constant:
+            return 'ConsWord'
+        elif ttype in Token.String.Char:
+            return 'Char'
+        elif ttype in Token.String.Escape:
+            return 'SPChar'
+        elif ttype in Token.String:
+            return 'String'
+        elif ttype in Token.Number:
+            return 'Number'
+        elif ttype in Token.Name:
+            return 'ID'
+        elif ttype in Token.Comment:
+            return 'Comment'
+        else:
+            # skip tags that have no class defined
+            return None
+            # ... or use the token's name when nothing apropriate
+            # return str(ttype).replace(".", " ")
+
+    def format(self, tokensource, outfile):
+        line_ready = False
+        fmt = self.formatter
+        result = self.result
+        for ttype, value in tokensource:
+            class_ = self.get_class(ttype)
+            if value:
+                for line in self.line_re.split(value):
+                    if not line_ready:
+                        result.append(fmt.code_line(1))
+                        line_ready = True
+                    if line == '\n':
+                        result.append(fmt.code_line(0))
+                        line_ready = False
+                    else:
+                        if class_:
+                            result.append(fmt.code_token(1, class_))
+                        result.append(fmt.text(line))
+                        if class_:
+                            result.append(fmt.code_token(0, class_))
+        result[-2] = ''
+        result[-1] = ''
+#        if line_ready:
+#            result.append(fmt.code_line(0))
+
+class Parser:
+    parsername = "highlight"
+    Dependencies = Dependencies
+    extensions = extensions
+
+    def __init__(self, raw, request, filename=None, **kw):
+        self.request = request
+        self.raw = raw.strip('\n')
+        self.filename = filename
+        parts = kw.get('format_args', '').split(None)
+        if parts:
+            self.syntax = parts[0]
+        else:
+            self.syntax = ''
+        if len(parts) > 1:
+            params = ''.join(parts[1:])
+        else:
+            params = ''
+        self.show_nums, self.num_start, self.num_step, attrs = parse_start_step(request, params)
+
+    def format(self, formatter):
+        fmt = PygmentsFormatter(formatter)
+        fmt.result.append(formatter.div(1, css_class="highlight %s" % self.syntax))
+        self._code_id = hash_new('sha1', self.raw.encode(config.charset)).hexdigest()
+        fmt.result.append(formatter.code_area(1, self._code_id, self.parsername, self.show_nums, self.num_start, self.num_step))
+        if self.filename is not None:
+            try:
+                lexer = pygments.lexers.get_lexer_for_filename(self.filename)
+            except pygments.util.ClassNotFound:
+                fmt.result.append(formatter.text(self.filename))
+                lexer = pygments.lexers.TextLexer()
+        else:
+            try:
+                lexer = pygments.lexers.get_lexer_by_name(self.syntax)
+            except pygments.util.ClassNotFound:
+                fmt.result.append(formatter.text('#!%s\n' % self.syntax))
+                lexer = pygments.lexers.TextLexer()
+        pygments.highlight(self.raw, lexer, fmt)
+        fmt.result.append(formatter.code_area(0, self._code_id))
+        fmt.result.append(formatter.div(0))
+        self.request.write("".join(fmt.result))