diff MoinMoin/parser/rst.py @ 0:77665d8e2254

tag of nonpublic@localhost--archive/moin--enterprise--1.5--base-0 (automatically generated log message) imported from: moin--main--1.5--base-0
author Thomas Waldmann <tw-public@gmx.de>
date Thu, 22 Sep 2005 15:09:50 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/parser/rst.py	Thu Sep 22 15:09:50 2005 +0000
@@ -0,0 +1,497 @@
+# -*- coding: iso-8859-1 -*-
+"""
+    MoinMoin - ReStructured Text Parser
+
+    @copyright: 2004 by Matthew Gilbert <gilbert AT voxmea DOT net>
+        and by Alexander Schremmer <alex AT alexanderweb DOT de>
+    @license: GNU GPL, see COPYING for details.
+
+    REQUIRES docutils 0.3.3 or later
+"""
+
+import re
+import new
+import StringIO
+import __builtin__
+import sys
+import copy
+
+# docutils imports are below
+import MoinMoin.parser.wiki
+from MoinMoin.Page import Page
+
+Dependencies = [] # this parser just depends on the raw text
+
+# --- make docutils safe by overriding all module-scoped names related to IO ---
+
+# TODO: Add an error message to dummyOpen so that the user knows what they did
+# requested an unsupported feature of docutils in MoinMoin.
+def dummyOpen(x, y=None, z=None): return
+
+class dummyIO(StringIO.StringIO):
+    def __init__(self, destination=None, destination_path=None,
+                 encoding=None, error_handler='', autoclose=1,
+                 handle_io_errors=1, source_path=None):
+        StringIO.StringIO.__init__(self)
+
+class dummyUrllib2:
+    def urlopen(a):
+        return StringIO.StringIO()
+    urlopen = staticmethod(urlopen)
+
+# # # All docutils imports must be contained below here
+import docutils
+from docutils.core import publish_parts
+from docutils.writers import html4css1
+from docutils.nodes import fully_normalize_name, reference
+from docutils.parsers import rst
+from docutils.parsers.rst import directives, roles
+# # # All docutils imports must be contained above here
+
+def safe_import(name, globals = None, locals = None, fromlist = None):
+    mod = __builtin__.__import__(name, globals, locals, fromlist)
+    if mod:
+        mod.open = dummyOpen
+        mod.urllib2 = dummyUrllib2
+    return mod
+
+# Go through and change all docutils modules to use a dummyOpen and dummyUrllib2
+# module. Also make sure that any docutils imported modules also get the dummy
+# implementations.
+for i in sys.modules.keys():
+    if i.startswith('docutils') and sys.modules[i]:
+        sys.modules[i].open = dummyOpen
+        sys.modules[i].urllib2 = dummyUrllib2
+        sys.modules[i].__import__ = safe_import
+
+docutils.io.FileOutput = docutils.io.FileInput = dummyIO
+
+# --- End of dummy-code --------------------------------------------------------
+
+def html_escape_unicode(node):
+    # Find Python function that does this for me. string.encode('ascii',
+    # 'xmlcharrefreplace') only 2.3 and above.
+    for i in node:
+        if ord(i) > 127:
+            node = node.replace(i, '&#%d;' % (ord(i)))
+    return node
+
+class MoinWriter(html4css1.Writer):
+
+    config_section = 'MoinMoin writer'
+    config_section_dependencies = ('writers',)
+
+    #"""Final translated form of `document`."""
+    output = None
+
+    def wiki_resolver(self, node):
+        """
+            Normally an unknown reference would be an error in an reST document.
+            However, this is how new documents are created in the wiki. This
+            passes on unknown references to eventually be handled by the
+            MoinMoin formatter.
+        """
+        # TODO: Need to better document the attributes here.
+        if getattr(node, 'indirect_reference_name', None):
+            node['refuri'] = node.indirect_reference_name
+            return 1
+        elif 'id' in node.attributes:
+            # I'm pretty sure the first test should catch any targets or
+            # references with the "id" attribute. Therefore, if we get to here
+            # its probably an internal link that didn't work so we let it go
+            # through as an error.
+            return 0
+        node['refuri'] = node['refname']
+        del node['refname']
+        self.nodes.append(node)
+        return 1
+
+    wiki_resolver.priority = 001
+
+    def __init__(self, formatter, request):
+        html4css1.Writer.__init__(self)
+        self.formatter = formatter
+        self.request = request
+        # Add our wiki unknown_reference_resolver to our list of functions to
+        # run when a target isn't found
+        self.unknown_reference_resolvers = [self.wiki_resolver]
+        # We create a new parser to process MoinMoin wiki style links in the
+        # reST.
+        self.wikiparser = MoinMoin.parser.wiki.Parser('', self.request)
+        self.wikiparser.formatter = self.formatter
+        self.wikiparser.hilite_re = None
+        self.nodes = []
+
+
+    def translate(self):
+        visitor = MoinTranslator(self.document,
+                                 self.formatter,
+                                 self.request,
+                                 self.wikiparser,
+                                 self)
+        self.document.walkabout(visitor)
+        self.visitor = visitor
+        self.output = html_escape_unicode(visitor.astext())
+
+
+class Parser:
+    caching = 1
+    Dependencies = Dependencies # copy dependencies from module-scope
+
+    def __init__(self, raw, request, **kw):
+        self.raw = raw
+        self.request = request
+        self.form = request.form
+
+    def format(self, formatter):
+        # Create our simple parser
+        parser = MoinDirectives(self.request)
+
+        parts =  publish_parts(source = self.raw,
+                               writer = MoinWriter(formatter, self.request),
+                               settings_overrides = {'halt_level': 5,
+                                                     'traceback': True,
+                                                     'file_insertion_enabled': 0,
+                                                     'raw_enabled': 0,
+                                                     }
+                              )
+
+        text = ''
+        if parts['title']:
+            text += '<h2>' + parts['title'] + '</h2>'
+        # If there is only one subtitle then it is held in parts['subtitle'].
+        # However, if there is more than one subtitle then this is empty and
+        # fragment contains all of the subtitles.
+        if parts['subtitle']:
+            text += '<h3>' + parts['subtitle'] + '</h3>'
+        if parts['docinfo']:
+            text += parts['docinfo']
+        text += parts['fragment']
+        self.request.write(html_escape_unicode(text))
+
+
+class MoinTranslator(html4css1.HTMLTranslator):
+
+    def __init__(self, document, formatter, request, parser, writer):
+        html4css1.HTMLTranslator.__init__(self, document)
+        self.formatter = formatter
+        self.request = request
+        # MMG: Using our own writer when needed. Save the old one to restore
+        # after the page has been processed by the html4css1 parser.
+        self.original_write, self.request.write = self.request.write, self.capture_wiki_formatting
+        self.wikiparser = parser
+        self.wikiparser.request = request
+        # MoinMoin likes to start the initial headers at level 3 and the title
+        # gets level 2, so to comply with their styles, we do here also.
+        # TODO: Could this be fixed by passing this value in settings_overrides?
+        self.initial_header_level = 3
+        # Temporary place for wiki returned markup. This will be filled when
+        # replacing the default writer with the capture_wiki_formatting
+        # function (see visit_image for an example).
+        self.wiki_text = ''
+        self.setup_wiki_handlers()
+
+    def capture_wiki_formatting(self, text):
+        """
+            Captures MoinMoin generated markup to the instance variable
+            wiki_text.
+        """
+        # For some reason getting empty strings here which of course overwrites
+        # what we really want (this is called multiple times per MoinMoin
+        # format call, which I don't understand).
+        self.wiki_text += text
+
+    def process_wiki_text(self, text):
+        """
+            This sequence is repeated numerous times, so its captured as a
+            single call here. Its important that wiki_text is blanked before we
+            make the format call. format will call request.write which we've
+            hooked to capture_wiki_formatting. If wiki_text is not blanked
+            before a call to request.write we will get the old markup as well as
+            the newly generated markup.
+
+            TODO: Could implement this as a list so that it acts as a stack. I
+            don't like having to remember to blank wiki_text.
+        """
+        self.wiki_text = ''
+        self.wikiparser.raw = text
+        self.wikiparser.format(self.formatter)
+
+    def add_wiki_markup(self):
+        """
+            Place holder in case this becomes more elaborate someday. For now it
+            only appends the MoinMoin generated markup to the html body and
+            raises SkipNode.
+        """
+        self.body.append(self.wiki_text)
+        self.wiki_text = ''
+        raise docutils.nodes.SkipNode
+
+    def astext(self):
+        self.request.write = self.original_write
+        return html4css1.HTMLTranslator.astext(self)
+
+    def process_inline(self, node, uri_string):
+        """
+            Process the "inline:" link scheme. This can either ome from
+            visit_reference or from visit_image. The uri_string changes
+            depending on the caller. The uri is passed to MoinMoin to handle the
+            inline link. If it is an image, the src line is extracted and passed
+            to the html4css1 writer to allow the reST image attributes.
+            Otherwise, the html from MoinMoin is inserted into the reST document
+            and SkipNode is raised.
+        """
+        self.process_wiki_text(node[uri_string])
+        # Only pass the src and alt parts to the writer. The reST writer
+        # inserts its own tags so we don't need the MoinMoin html markup.
+        src = re.search('src="([^"]+)"', self.wiki_text)
+        if src:
+            node['uri'] = src.groups()[0]
+            if not 'alt' in node.attributes:
+                alt = re.search('alt="([^"]*)"', self.wiki_text)
+                if alt:
+                    node['alt'] = alt.groups()[0]
+        else:
+            # Image doesn't exist yet for the page so just use what's
+            # returned from MoinMoin verbatim
+            self.add_wiki_markup()
+
+    def process_wiki_target(self, target):
+        self.process_wiki_text(target)
+        # MMG: May need a call to fixup_wiki_formatting here but I
+        # don't think so.
+        self.add_wiki_markup()
+
+    def fixup_wiki_formatting(self, text):
+        replacement = {'<p>': '', '</p>': '', '\n': '', '> ': '>'}
+        for src, dst in replacement.items():
+            text = text.replace(src, dst)
+        # Everything seems to have a space ending the text block. We want to
+        # get rid of this
+        if text and text[-1] == ' ':
+            text = text[:-1]
+        return text
+
+    def visit_reference(self, node):
+        """
+            Pass links to MoinMoin to get the correct wiki space url. Extract
+            the url and pass it on to the html4css1 writer to handle. Inline
+            images are also handled by visit_image. Not sure what the "drawing:"
+            link scheme is used for, so for now it is handled here.
+
+            Also included here is a hack to allow MoinMoin macros. This routine
+            checks for a link which starts with "[[". This link is passed to the
+            MoinMoin formatter and the resulting markup is inserted into the
+            document in the place of the original link reference.
+        """
+        moin_link_schemes = ('wiki:', 'attachment:', 'drawing:', '[[',
+                             'inline:')
+
+        if 'refuri' in node.attributes:
+            target = None
+            refuri = node['refuri']
+
+            # MMG: Fix this line
+            if [scheme for scheme in moin_link_schemes if
+                    refuri.lstrip().startswith(scheme)]:
+                # For a macro, We want the actuall text from the user in target,
+                # not the fully normalized version that is contained in refuri.
+                if refuri.startswith('[['):
+                    target = node['name']
+                else:
+                    target = refuri
+            # TODO: Figure out the following two elif's and comment
+            # appropriately.
+            # The node should have a whitespace normalized name if the docutlis
+            # reStructuredText parser would normally fully normalize the name.
+            elif ('name' in node.attributes and
+                  fully_normalize_name(node['name']) == refuri):
+                target = ':%s:' % (node['name'])
+            # If its not a uri containing a ':' then its probably destined for
+            # wiki space.
+            elif ':' not in refuri:
+                target = ':%s:' % (refuri)
+
+            if target:
+                if target.startswith('inline:'):
+                    self.process_inline(node, 'refuri')
+                elif target.startswith('[[') and target.endswith(']]'):
+                    self.process_wiki_target(target)
+                else:
+                    # Not a macro or inline so hopefully its a link. Put the target in
+                    # brackets so that MoinMoin knows its a link. Extract the
+                    # href, if it exists, and let docutils handle it from there.
+                    # If there is no href just add whatever MoinMoin returned.
+                    node_text = node.astext().replace('\n', ' ')
+                    self.process_wiki_text('[%s %s]' % (target, node_text))
+                    href = re.search('href="([^"]+)"', self.wiki_text)
+                    if href:
+                        # dirty hack in order to undo the HTML entity quoting
+                        node['refuri'] = href.groups()[0].replace("&amp;", "&")
+                    else:
+                        self.wiki_text = self.fixup_wiki_formatting(self.wiki_text)
+                        self.add_wiki_markup()
+        html4css1.HTMLTranslator.visit_reference(self, node)
+
+    def visit_image(self, node):
+        """
+            Need to intervene in the case of inline images. We need MoinMoin to
+            give us the actual src line to the image and then we can feed this
+            to the default html4css1 writer. NOTE: Since the writer can't "open"
+            this image the scale attribute doesn't work without directly
+            specifying the height or width (or both).
+
+            TODO: Need to handle figures similarly.
+        """
+        uri = node['uri'].lstrip()
+        prefix = ''       # assume no prefix
+        if ':' in uri:
+            prefix = uri.split(':',1)[0]
+        # if prefix isn't URL, try to display in page
+        if not prefix.lower() in ('file', 'http', 'https', 'ftp'):
+            # no prefix given, so fake "inline:"
+            if not prefix:
+                node['uri'] = 'inline:' + uri
+            self.process_inline(node, 'uri')
+        html4css1.HTMLTranslator.visit_image(self, node)
+
+    def create_wiki_functor(self, moin_func):
+        moin_callable = getattr(self.formatter, moin_func)
+        def visit_func(self, node):
+            self.wiki_text = ''
+            self.request.write(moin_callable(1))
+            self.body.append(self.wiki_text)
+        def depart_func(self, node):
+            self.wiki_text = ''
+            self.request.write(moin_callable(0))
+            self.body.append(self.wiki_text)
+        return visit_func, depart_func
+
+    def setup_wiki_handlers(self):
+        """
+            Have the MoinMoin formatter handle markup when it makes sense. These
+            are portions of the document that do not contain reST specific
+            markup. This allows these portions of the document to look
+            consistent with other wiki pages.
+
+            Setup dispatch routines to handle basic document markup. The
+            hanlders dict is the html4css1 handler name followed by the wiki
+            handler name.
+        """
+        handlers = {
+            # Text Markup
+            'emphasis': 'emphasis',
+            'strong': 'strong',
+            'literal': 'code',
+            # Blocks
+            'literal_block': 'preformatted',
+            # Simple Lists
+            'bullet_list': 'bullet_list',
+            'list_item': 'listitem',
+            # Definition List
+            'definition_list': 'definition_list',
+            # Admonitions
+            'warning': 'highlight'}
+        for rest_func, moin_func in handlers.items():
+            visit_func, depart_func = self.create_wiki_functor(moin_func)
+            visit_func = new.instancemethod(visit_func, self, MoinTranslator)
+            depart_func = new.instancemethod(depart_func, self, MoinTranslator)
+            setattr(self, 'visit_%s' % (rest_func), visit_func)
+            setattr(self, 'depart_%s' % (rest_func), depart_func)
+
+    # Enumerated list takes an extra paramter so we handle this differently
+    def visit_enumerated_list(self, node):
+        self.wiki_text = ''
+        self.request.write(self.formatter.number_list(1, start=node.get('start', None)))
+        self.body.append(self.wiki_text)
+
+    def depart_enumerated_list(self, node):
+        self.wiki_text = ''
+        self.request.write(self.formatter.number_list(0))
+        self.body.append(self.wiki_text)
+
+
+class MoinDirectives:
+    """
+        Class to handle all custom directive handling. This code is called as
+        part of the parsing stage.
+    """
+
+    def __init__(self, request):
+        self.request = request
+
+        # include MoinMoin pages
+        directives.register_directive('include', self.include)
+
+        # used for MoinMoin macros
+        directives.register_directive('macro', self.macro)
+
+        # disallow a few directives in order to prevent XSS
+        # for directive in ('meta', 'include', 'raw'):
+        for directive in ('meta', 'raw'):
+            directives.register_directive(directive, None)
+
+        # disable the raw role
+        roles._roles['raw'] = None
+
+        # As a quick fix for infinite includes we only allow a fixed number of
+        # includes per page
+        self.num_includes = 0
+        self.max_includes = 10
+
+    # Handle the include directive rather than letting the default docutils
+    # parser handle it. This allows the inclusion of MoinMoin pages instead of
+    # something from the filesystem.
+    def include(self, name, arguments, options, content, lineno,
+                content_offset, block_text, state, state_machine):
+        # content contains the included file name
+
+        _ = self.request.getText
+
+        # Limit the number of documents that can be included
+        if self.num_includes < self.max_includes:
+            self.num_includes += 1
+        else:
+            lines = [_("**Maximum number of allowed includes exceeded**")]
+            state_machine.insert_input(lines, 'MoinDirectives')
+            return
+
+        if len(content):
+            page = Page(page_name = content[0], request = self.request)
+            if page.exists():
+                text = page.get_raw_body()
+                lines = text.split('\n')
+                # Remove the "#format rst" line
+                if lines[0].startswith("#format"):
+                    del lines[0]
+            else:
+                lines = [_("**Could not find the referenced page: %s**") % (content[0],)]
+            # Insert the text from the included document and then continue
+            # parsing
+            state_machine.insert_input(lines, 'MoinDirectives')
+        return
+
+    include.content = True
+
+    # Add additional macro directive.
+    # This allows MoinMoin macros to be used either by using the directive
+    # directly or by using the substitution syntax. Much cleaner than using the
+    # reference hack (`[[SomeMacro]]`_). This however simply adds a node to the
+    # document tree which is a reference, but through a much better user
+    # interface.
+    def macro(self, name, arguments, options, content, lineno,
+                content_offset, block_text, state, state_machine):
+        # content contains macro to be called
+        if len(content):
+            # Allow either with or without brackets
+            if content[0].startswith('[['):
+                macro = content[0]
+            else:
+                macro = '[[%s]]' % content[0]
+            ref = reference(macro, refuri = macro)
+            ref['name'] = macro
+            return [ref]
+        return
+
+    macro.content = True
+