comparison MoinMoin/parser/rst.py @ 0:77665d8e2254

tag of nonpublic@localhost--archive/moin--enterprise--1.5--base-0 (automatically generated log message) imported from: moin--main--1.5--base-0
author Thomas Waldmann <tw-public@gmx.de>
date Thu, 22 Sep 2005 15:09:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:77665d8e2254
1 # -*- coding: iso-8859-1 -*-
2 """
3 MoinMoin - ReStructured Text Parser
4
5 @copyright: 2004 by Matthew Gilbert <gilbert AT voxmea DOT net>
6 and by Alexander Schremmer <alex AT alexanderweb DOT de>
7 @license: GNU GPL, see COPYING for details.
8
9 REQUIRES docutils 0.3.3 or later
10 """
11
12 import re
13 import new
14 import StringIO
15 import __builtin__
16 import sys
17 import copy
18
19 # docutils imports are below
20 import MoinMoin.parser.wiki
21 from MoinMoin.Page import Page
22
23 Dependencies = [] # this parser just depends on the raw text
24
25 # --- make docutils safe by overriding all module-scoped names related to IO ---
26
27 # TODO: Add an error message to dummyOpen so that the user knows what they did
28 # requested an unsupported feature of docutils in MoinMoin.
29 def dummyOpen(x, y=None, z=None): return
30
31 class dummyIO(StringIO.StringIO):
32 def __init__(self, destination=None, destination_path=None,
33 encoding=None, error_handler='', autoclose=1,
34 handle_io_errors=1, source_path=None):
35 StringIO.StringIO.__init__(self)
36
37 class dummyUrllib2:
38 def urlopen(a):
39 return StringIO.StringIO()
40 urlopen = staticmethod(urlopen)
41
42 # # # All docutils imports must be contained below here
43 import docutils
44 from docutils.core import publish_parts
45 from docutils.writers import html4css1
46 from docutils.nodes import fully_normalize_name, reference
47 from docutils.parsers import rst
48 from docutils.parsers.rst import directives, roles
49 # # # All docutils imports must be contained above here
50
51 def safe_import(name, globals = None, locals = None, fromlist = None):
52 mod = __builtin__.__import__(name, globals, locals, fromlist)
53 if mod:
54 mod.open = dummyOpen
55 mod.urllib2 = dummyUrllib2
56 return mod
57
58 # Go through and change all docutils modules to use a dummyOpen and dummyUrllib2
59 # module. Also make sure that any docutils imported modules also get the dummy
60 # implementations.
61 for i in sys.modules.keys():
62 if i.startswith('docutils') and sys.modules[i]:
63 sys.modules[i].open = dummyOpen
64 sys.modules[i].urllib2 = dummyUrllib2
65 sys.modules[i].__import__ = safe_import
66
67 docutils.io.FileOutput = docutils.io.FileInput = dummyIO
68
69 # --- End of dummy-code --------------------------------------------------------
70
71 def html_escape_unicode(node):
72 # Find Python function that does this for me. string.encode('ascii',
73 # 'xmlcharrefreplace') only 2.3 and above.
74 for i in node:
75 if ord(i) > 127:
76 node = node.replace(i, '&#%d;' % (ord(i)))
77 return node
78
79 class MoinWriter(html4css1.Writer):
80
81 config_section = 'MoinMoin writer'
82 config_section_dependencies = ('writers',)
83
84 #"""Final translated form of `document`."""
85 output = None
86
87 def wiki_resolver(self, node):
88 """
89 Normally an unknown reference would be an error in an reST document.
90 However, this is how new documents are created in the wiki. This
91 passes on unknown references to eventually be handled by the
92 MoinMoin formatter.
93 """
94 # TODO: Need to better document the attributes here.
95 if getattr(node, 'indirect_reference_name', None):
96 node['refuri'] = node.indirect_reference_name
97 return 1
98 elif 'id' in node.attributes:
99 # I'm pretty sure the first test should catch any targets or
100 # references with the "id" attribute. Therefore, if we get to here
101 # its probably an internal link that didn't work so we let it go
102 # through as an error.
103 return 0
104 node['refuri'] = node['refname']
105 del node['refname']
106 self.nodes.append(node)
107 return 1
108
109 wiki_resolver.priority = 001
110
111 def __init__(self, formatter, request):
112 html4css1.Writer.__init__(self)
113 self.formatter = formatter
114 self.request = request
115 # Add our wiki unknown_reference_resolver to our list of functions to
116 # run when a target isn't found
117 self.unknown_reference_resolvers = [self.wiki_resolver]
118 # We create a new parser to process MoinMoin wiki style links in the
119 # reST.
120 self.wikiparser = MoinMoin.parser.wiki.Parser('', self.request)
121 self.wikiparser.formatter = self.formatter
122 self.wikiparser.hilite_re = None
123 self.nodes = []
124
125
126 def translate(self):
127 visitor = MoinTranslator(self.document,
128 self.formatter,
129 self.request,
130 self.wikiparser,
131 self)
132 self.document.walkabout(visitor)
133 self.visitor = visitor
134 self.output = html_escape_unicode(visitor.astext())
135
136
137 class Parser:
138 caching = 1
139 Dependencies = Dependencies # copy dependencies from module-scope
140
141 def __init__(self, raw, request, **kw):
142 self.raw = raw
143 self.request = request
144 self.form = request.form
145
146 def format(self, formatter):
147 # Create our simple parser
148 parser = MoinDirectives(self.request)
149
150 parts = publish_parts(source = self.raw,
151 writer = MoinWriter(formatter, self.request),
152 settings_overrides = {'halt_level': 5,
153 'traceback': True,
154 'file_insertion_enabled': 0,
155 'raw_enabled': 0,
156 }
157 )
158
159 text = ''
160 if parts['title']:
161 text += '<h2>' + parts['title'] + '</h2>'
162 # If there is only one subtitle then it is held in parts['subtitle'].
163 # However, if there is more than one subtitle then this is empty and
164 # fragment contains all of the subtitles.
165 if parts['subtitle']:
166 text += '<h3>' + parts['subtitle'] + '</h3>'
167 if parts['docinfo']:
168 text += parts['docinfo']
169 text += parts['fragment']
170 self.request.write(html_escape_unicode(text))
171
172
173 class MoinTranslator(html4css1.HTMLTranslator):
174
175 def __init__(self, document, formatter, request, parser, writer):
176 html4css1.HTMLTranslator.__init__(self, document)
177 self.formatter = formatter
178 self.request = request
179 # MMG: Using our own writer when needed. Save the old one to restore
180 # after the page has been processed by the html4css1 parser.
181 self.original_write, self.request.write = self.request.write, self.capture_wiki_formatting
182 self.wikiparser = parser
183 self.wikiparser.request = request
184 # MoinMoin likes to start the initial headers at level 3 and the title
185 # gets level 2, so to comply with their styles, we do here also.
186 # TODO: Could this be fixed by passing this value in settings_overrides?
187 self.initial_header_level = 3
188 # Temporary place for wiki returned markup. This will be filled when
189 # replacing the default writer with the capture_wiki_formatting
190 # function (see visit_image for an example).
191 self.wiki_text = ''
192 self.setup_wiki_handlers()
193
194 def capture_wiki_formatting(self, text):
195 """
196 Captures MoinMoin generated markup to the instance variable
197 wiki_text.
198 """
199 # For some reason getting empty strings here which of course overwrites
200 # what we really want (this is called multiple times per MoinMoin
201 # format call, which I don't understand).
202 self.wiki_text += text
203
204 def process_wiki_text(self, text):
205 """
206 This sequence is repeated numerous times, so its captured as a
207 single call here. Its important that wiki_text is blanked before we
208 make the format call. format will call request.write which we've
209 hooked to capture_wiki_formatting. If wiki_text is not blanked
210 before a call to request.write we will get the old markup as well as
211 the newly generated markup.
212
213 TODO: Could implement this as a list so that it acts as a stack. I
214 don't like having to remember to blank wiki_text.
215 """
216 self.wiki_text = ''
217 self.wikiparser.raw = text
218 self.wikiparser.format(self.formatter)
219
220 def add_wiki_markup(self):
221 """
222 Place holder in case this becomes more elaborate someday. For now it
223 only appends the MoinMoin generated markup to the html body and
224 raises SkipNode.
225 """
226 self.body.append(self.wiki_text)
227 self.wiki_text = ''
228 raise docutils.nodes.SkipNode
229
230 def astext(self):
231 self.request.write = self.original_write
232 return html4css1.HTMLTranslator.astext(self)
233
234 def process_inline(self, node, uri_string):
235 """
236 Process the "inline:" link scheme. This can either ome from
237 visit_reference or from visit_image. The uri_string changes
238 depending on the caller. The uri is passed to MoinMoin to handle the
239 inline link. If it is an image, the src line is extracted and passed
240 to the html4css1 writer to allow the reST image attributes.
241 Otherwise, the html from MoinMoin is inserted into the reST document
242 and SkipNode is raised.
243 """
244 self.process_wiki_text(node[uri_string])
245 # Only pass the src and alt parts to the writer. The reST writer
246 # inserts its own tags so we don't need the MoinMoin html markup.
247 src = re.search('src="([^"]+)"', self.wiki_text)
248 if src:
249 node['uri'] = src.groups()[0]
250 if not 'alt' in node.attributes:
251 alt = re.search('alt="([^"]*)"', self.wiki_text)
252 if alt:
253 node['alt'] = alt.groups()[0]
254 else:
255 # Image doesn't exist yet for the page so just use what's
256 # returned from MoinMoin verbatim
257 self.add_wiki_markup()
258
259 def process_wiki_target(self, target):
260 self.process_wiki_text(target)
261 # MMG: May need a call to fixup_wiki_formatting here but I
262 # don't think so.
263 self.add_wiki_markup()
264
265 def fixup_wiki_formatting(self, text):
266 replacement = {'<p>': '', '</p>': '', '\n': '', '> ': '>'}
267 for src, dst in replacement.items():
268 text = text.replace(src, dst)
269 # Everything seems to have a space ending the text block. We want to
270 # get rid of this
271 if text and text[-1] == ' ':
272 text = text[:-1]
273 return text
274
275 def visit_reference(self, node):
276 """
277 Pass links to MoinMoin to get the correct wiki space url. Extract
278 the url and pass it on to the html4css1 writer to handle. Inline
279 images are also handled by visit_image. Not sure what the "drawing:"
280 link scheme is used for, so for now it is handled here.
281
282 Also included here is a hack to allow MoinMoin macros. This routine
283 checks for a link which starts with "[[". This link is passed to the
284 MoinMoin formatter and the resulting markup is inserted into the
285 document in the place of the original link reference.
286 """
287 moin_link_schemes = ('wiki:', 'attachment:', 'drawing:', '[[',
288 'inline:')
289
290 if 'refuri' in node.attributes:
291 target = None
292 refuri = node['refuri']
293
294 # MMG: Fix this line
295 if [scheme for scheme in moin_link_schemes if
296 refuri.lstrip().startswith(scheme)]:
297 # For a macro, We want the actuall text from the user in target,
298 # not the fully normalized version that is contained in refuri.
299 if refuri.startswith('[['):
300 target = node['name']
301 else:
302 target = refuri
303 # TODO: Figure out the following two elif's and comment
304 # appropriately.
305 # The node should have a whitespace normalized name if the docutlis
306 # reStructuredText parser would normally fully normalize the name.
307 elif ('name' in node.attributes and
308 fully_normalize_name(node['name']) == refuri):
309 target = ':%s:' % (node['name'])
310 # If its not a uri containing a ':' then its probably destined for
311 # wiki space.
312 elif ':' not in refuri:
313 target = ':%s:' % (refuri)
314
315 if target:
316 if target.startswith('inline:'):
317 self.process_inline(node, 'refuri')
318 elif target.startswith('[[') and target.endswith(']]'):
319 self.process_wiki_target(target)
320 else:
321 # Not a macro or inline so hopefully its a link. Put the target in
322 # brackets so that MoinMoin knows its a link. Extract the
323 # href, if it exists, and let docutils handle it from there.
324 # If there is no href just add whatever MoinMoin returned.
325 node_text = node.astext().replace('\n', ' ')
326 self.process_wiki_text('[%s %s]' % (target, node_text))
327 href = re.search('href="([^"]+)"', self.wiki_text)
328 if href:
329 # dirty hack in order to undo the HTML entity quoting
330 node['refuri'] = href.groups()[0].replace("&amp;", "&")
331 else:
332 self.wiki_text = self.fixup_wiki_formatting(self.wiki_text)
333 self.add_wiki_markup()
334 html4css1.HTMLTranslator.visit_reference(self, node)
335
336 def visit_image(self, node):
337 """
338 Need to intervene in the case of inline images. We need MoinMoin to
339 give us the actual src line to the image and then we can feed this
340 to the default html4css1 writer. NOTE: Since the writer can't "open"
341 this image the scale attribute doesn't work without directly
342 specifying the height or width (or both).
343
344 TODO: Need to handle figures similarly.
345 """
346 uri = node['uri'].lstrip()
347 prefix = '' # assume no prefix
348 if ':' in uri:
349 prefix = uri.split(':',1)[0]
350 # if prefix isn't URL, try to display in page
351 if not prefix.lower() in ('file', 'http', 'https', 'ftp'):
352 # no prefix given, so fake "inline:"
353 if not prefix:
354 node['uri'] = 'inline:' + uri
355 self.process_inline(node, 'uri')
356 html4css1.HTMLTranslator.visit_image(self, node)
357
358 def create_wiki_functor(self, moin_func):
359 moin_callable = getattr(self.formatter, moin_func)
360 def visit_func(self, node):
361 self.wiki_text = ''
362 self.request.write(moin_callable(1))
363 self.body.append(self.wiki_text)
364 def depart_func(self, node):
365 self.wiki_text = ''
366 self.request.write(moin_callable(0))
367 self.body.append(self.wiki_text)
368 return visit_func, depart_func
369
370 def setup_wiki_handlers(self):
371 """
372 Have the MoinMoin formatter handle markup when it makes sense. These
373 are portions of the document that do not contain reST specific
374 markup. This allows these portions of the document to look
375 consistent with other wiki pages.
376
377 Setup dispatch routines to handle basic document markup. The
378 hanlders dict is the html4css1 handler name followed by the wiki
379 handler name.
380 """
381 handlers = {
382 # Text Markup
383 'emphasis': 'emphasis',
384 'strong': 'strong',
385 'literal': 'code',
386 # Blocks
387 'literal_block': 'preformatted',
388 # Simple Lists
389 'bullet_list': 'bullet_list',
390 'list_item': 'listitem',
391 # Definition List
392 'definition_list': 'definition_list',
393 # Admonitions
394 'warning': 'highlight'}
395 for rest_func, moin_func in handlers.items():
396 visit_func, depart_func = self.create_wiki_functor(moin_func)
397 visit_func = new.instancemethod(visit_func, self, MoinTranslator)
398 depart_func = new.instancemethod(depart_func, self, MoinTranslator)
399 setattr(self, 'visit_%s' % (rest_func), visit_func)
400 setattr(self, 'depart_%s' % (rest_func), depart_func)
401
402 # Enumerated list takes an extra paramter so we handle this differently
403 def visit_enumerated_list(self, node):
404 self.wiki_text = ''
405 self.request.write(self.formatter.number_list(1, start=node.get('start', None)))
406 self.body.append(self.wiki_text)
407
408 def depart_enumerated_list(self, node):
409 self.wiki_text = ''
410 self.request.write(self.formatter.number_list(0))
411 self.body.append(self.wiki_text)
412
413
414 class MoinDirectives:
415 """
416 Class to handle all custom directive handling. This code is called as
417 part of the parsing stage.
418 """
419
420 def __init__(self, request):
421 self.request = request
422
423 # include MoinMoin pages
424 directives.register_directive('include', self.include)
425
426 # used for MoinMoin macros
427 directives.register_directive('macro', self.macro)
428
429 # disallow a few directives in order to prevent XSS
430 # for directive in ('meta', 'include', 'raw'):
431 for directive in ('meta', 'raw'):
432 directives.register_directive(directive, None)
433
434 # disable the raw role
435 roles._roles['raw'] = None
436
437 # As a quick fix for infinite includes we only allow a fixed number of
438 # includes per page
439 self.num_includes = 0
440 self.max_includes = 10
441
442 # Handle the include directive rather than letting the default docutils
443 # parser handle it. This allows the inclusion of MoinMoin pages instead of
444 # something from the filesystem.
445 def include(self, name, arguments, options, content, lineno,
446 content_offset, block_text, state, state_machine):
447 # content contains the included file name
448
449 _ = self.request.getText
450
451 # Limit the number of documents that can be included
452 if self.num_includes < self.max_includes:
453 self.num_includes += 1
454 else:
455 lines = [_("**Maximum number of allowed includes exceeded**")]
456 state_machine.insert_input(lines, 'MoinDirectives')
457 return
458
459 if len(content):
460 page = Page(page_name = content[0], request = self.request)
461 if page.exists():
462 text = page.get_raw_body()
463 lines = text.split('\n')
464 # Remove the "#format rst" line
465 if lines[0].startswith("#format"):
466 del lines[0]
467 else:
468 lines = [_("**Could not find the referenced page: %s**") % (content[0],)]
469 # Insert the text from the included document and then continue
470 # parsing
471 state_machine.insert_input(lines, 'MoinDirectives')
472 return
473
474 include.content = True
475
476 # Add additional macro directive.
477 # This allows MoinMoin macros to be used either by using the directive
478 # directly or by using the substitution syntax. Much cleaner than using the
479 # reference hack (`[[SomeMacro]]`_). This however simply adds a node to the
480 # document tree which is a reference, but through a much better user
481 # interface.
482 def macro(self, name, arguments, options, content, lineno,
483 content_offset, block_text, state, state_machine):
484 # content contains macro to be called
485 if len(content):
486 # Allow either with or without brackets
487 if content[0].startswith('[['):
488 macro = content[0]
489 else:
490 macro = '[[%s]]' % content[0]
491 ref = reference(macro, refuri = macro)
492 ref['name'] = macro
493 return [ref]
494 return
495
496 macro.content = True
497