MoinMoin/formatter/text_docbook.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Sat, 13 Mar 2010 22:45:07 +0100
changeset 4499 61d77b5506a5
parent 3291 954300d424eb
permissions -rw-r--r--
Fix docbook formatter crashing, see MoinMoinPatch/IncludeMacroWithDocBookFormatter

If a page has a Include macro call with editlink argument, the formatter
crashes. This patch removes the editlink arg before processing it.
     1 # -*- coding: iso-8859-1 -*-
     2 """
     3     MoinMoin - DocBook Formatter
     4 
     5     @copyright: 2005,2008 by Mikko Virkkilä <mvirkkil@cc.hut.fi>
     6     @copyright: 2005 by MoinMoin:AlexanderSchremmer (small modifications)
     7     @copyright: 2005 by MoinMoin:Petr Pytelka <pyta@lightcomp.com> (small modifications)
     8 
     9     @license: GNU GPL, see COPYING for details.
    10 """
    11 
    12 import os,re
    13 
    14 from xml.dom import getDOMImplementation
    15 from xml.dom.ext.reader import Sax
    16 from xml.dom.ext import Node
    17 
    18 from MoinMoin.formatter import FormatterBase
    19 from MoinMoin import wikiutil
    20 from MoinMoin.error import CompositeError
    21 from MoinMoin.action import AttachFile
    22 
    23 #For revision history
    24 from MoinMoin.logfile import editlog
    25 from MoinMoin import user
    26 
    27 
    28 class InternalError(CompositeError):
    29     pass
    30 
    31 try:
    32     dom = getDOMImplementation("4DOM")
    33 except ImportError:
    34     raise InternalError("You need to install 4suite to use the DocBook formatter.")
    35 
    36 
    37 class Formatter(FormatterBase):
    38     #TODO: How to handle revision history and other meta-info from included files?
    39     #      The problem is that we don't know what the original page is, since
    40     #      the Inlcude-macro doesn't pass us the information.
    41 
    42     # this list is extended as the page is parsed. Could be optimized by adding them here?
    43     section_should_break = ['abstract', 'para', 'emphasis']
    44 
    45     blacklisted_macros = ('TableOfContents', 'ShowSmileys', 'Navigation')
    46 
    47     # If the current node is one of the following and we are about the emit
    48     # text, the text should be wrapped in a paragraph
    49     wrap_text_in_para = ('listitem', 'glossdef', 'article', 'chapter', 'tip', 'warning', 'note', 'caution', 'important')
    50 
    51     # from dtd
    52     _can_contain_section = ("section", "appendix", "article", "chapter", "patintro", "preface")
    53 
    54     def __init__(self, request, doctype="article", **kw):
    55         FormatterBase.__init__(self, request, **kw)
    56         self.request = request
    57 
    58         '''
    59         If the formatter is used by the Include macro, it will set
    60         is_included=True in which case we know we need to call startDocument
    61         and endDocument from startContent and endContent respectively, since
    62         the Include macro will not be calling them, and the formatter doesn't
    63         work properly unless they are called.
    64         '''
    65         if kw.has_key("is_included") and kw["is_included"]:
    66             self.include_kludge = True
    67         else:
    68             self.include_kludge = False
    69 
    70         self.doctype = doctype
    71         self.curdepth = 0
    72         self.cur = None
    73 
    74     def startDocument(self, pagename):
    75         self.doc = dom.createDocument(None, self.doctype, dom.createDocumentType(
    76             self.doctype, "-//OASIS//DTD DocBook XML V4.4//EN",
    77             "http://www.docbook.org/xml/4.4/docbookx.dtd"))
    78 
    79         self.title = pagename
    80         self.root = self.doc.documentElement
    81 
    82         if not self.include_kludge and self.doctype == "article":
    83             info = self.doc.createElement("articleinfo")
    84             self.root.appendChild(info)
    85             self._addTitleElement(self.title, targetNode=info)
    86             self._addRevisionHistory(targetNode=info)
    87         else:
    88             self._addTitleElement(self.title, targetNode=self.root)
    89 
    90         self.cur = self.root
    91         return ""
    92 
    93     def startContent(self, content_id="content", **kw):
    94         if self.include_kludge and not self.cur:
    95             return self.startDocument("OnlyAnIdiotWouldCreateSuchaPage")
    96         return ""
    97 
    98     def endContent(self):
    99         if self.include_kludge:
   100             return self.endDocument()
   101         return ""
   102 
   103     def endDocument(self):
   104         from xml.dom.ext import PrettyPrint, Print
   105         import StringIO
   106 
   107         f = StringIO.StringIO()
   108         Print(self.doc, f)
   109         txt = f.getvalue()
   110         f.close()
   111 
   112         self.cur = None
   113         return txt
   114 
   115     def text(self, text, **kw):
   116         if text == "\\n":
   117             srcText = "\n"
   118         else:
   119             srcText = text
   120 
   121         if srcText and self._isInsidePreformatted():
   122 
   123             if self.cur.lastChild is not None and self.cur.lastChild.nodeType == Node.CDATA_SECTION_NODE:
   124                 # We can add it to a previous CDATA section
   125                 self.cur.lastChild.nodeValue = self.cur.lastChild.nodeValue + srcText
   126             else:
   127                 # We create a new cdata section
   128                 self.cur.appendChild(self.doc.createCDATASection(srcText))
   129 
   130         elif self.cur.nodeName in self.wrap_text_in_para:
   131             """
   132             If we already wrapped one text item in a para, we should add to that para
   133             and not create a new one. Another question is if we should add a space?
   134             """
   135             if self.cur.lastChild is not None and self.cur.lastChild.nodeName == 'para':
   136                 self.cur.lastChild.appendChild(self.doc.createTextNode(srcText))
   137             else:
   138                 self.paragraph(1)
   139                 self.text(text)
   140                 self.paragraph(0)
   141         else:
   142             self.cur.appendChild(self.doc.createTextNode(srcText))
   143         return ""
   144 
   145     def heading(self, on, depth, **kw):
   146         while self.cur.nodeName in self.section_should_break:
   147             self.cur = self.cur.parentNode
   148 
   149         if on:
   150             # try to go to higher level if needed
   151             if depth <= self.curdepth:
   152                 # number of levels we want to go higher
   153                 numberOfLevels = self.curdepth - depth + 1
   154                 for dummy in range(numberOfLevels):
   155                     # find first non section node
   156                     while not self.cur.nodeName in self._can_contain_section:
   157                         self.cur = self.cur.parentNode
   158 
   159                     if self.cur.nodeName == "section":
   160                         self.cur = self.cur.parentNode
   161 
   162             section = self.doc.createElement("section")
   163             self.cur.appendChild(section)
   164             self.cur = section
   165 
   166             title = self.doc.createElement("title")
   167             self.cur.appendChild(title)
   168             self.cur = title
   169             self.curdepth = depth
   170         else:
   171             self.cur = self.cur.parentNode
   172 
   173         return ""
   174 
   175     def paragraph(self, on, **kw):
   176         FormatterBase.paragraph(self, on)
   177 
   178         # Let's prevent empty paras
   179         if not on:
   180             if not self._hasContent(self.cur):
   181                 oldnode = self.cur
   182                 self.cur = oldnode.parentNode
   183                 self.cur.removeChild(oldnode)
   184                 return ""
   185 
   186         # Let's prevent para inside para
   187         if on and self.cur.nodeName == "para":
   188             return ""
   189         return self._handleNode("para", on)
   190 
   191     def linebreak(self, preformatted=1):
   192         """
   193         If preformatted, it will simply output a linebreak.
   194         If we are in a paragraph, we will close it, and open another one.
   195         """
   196         if preformatted:
   197             self.text('\\n')
   198         elif self.cur.nodeName == "para":
   199             self.paragraph(0)
   200             self.paragraph(1)
   201         else:
   202             self._emitComment("Warning: Probably not emitting right sort of linebreak")
   203             self.text('\n')
   204         return ""
   205 
   206 ### Inline ##########################################################
   207 
   208     def strong(self, on, **kw):
   209         return self._handleFormatting("emphasis", on, (('role', 'strong'), ))
   210 
   211     def emphasis(self, on, **kw):
   212         return self._handleFormatting("emphasis", on)
   213 
   214     def underline(self, on, **kw):
   215         return self._handleFormatting("emphasis", on, (('role', 'underline'), ))
   216 
   217     def highlight(self, on, **kw):
   218         return self._handleFormatting("emphasis", on, (('role', 'highlight'), ))
   219 
   220     def sup(self, on, **kw):
   221         return self._handleFormatting("superscript", on)
   222 
   223     def sub(self, on, **kw):
   224         return self._handleFormatting("subscript", on)
   225 
   226     def strike(self, on, **kw):
   227         # does not yield <strike> using the HTML XSLT files here ...
   228         # but seems to be correct
   229         return self._handleFormatting("emphasis", on,
   230                                       (('role', 'strikethrough'), ))
   231 
   232     def code(self, on, **kw):
   233         # Let's prevent empty code
   234         if not on:
   235             if not self._hasContent(self.cur):
   236                 oldnode = self.cur
   237                 self.cur = oldnode.parentNode
   238                 self.cur.removeChild(oldnode)
   239                 return ""
   240         return self._handleFormatting("code", on)
   241 
   242     def preformatted(self, on, **kw):
   243         return self._handleFormatting("screen", on)
   244 
   245 
   246 ### Lists ###########################################################
   247 
   248     def number_list(self, on, type=None, start=None, **kw):
   249         docbook_ol_types = {'1': "arabic",
   250                             'a': "loweralpha",
   251                             'A': "upperalpha",
   252                             'i': "lowerroman",
   253                             'I': "upperroman"}
   254 
   255         if type and docbook_ol_types.has_key(type):
   256             attrs = [("numeration", docbook_ol_types[type])]
   257         else:
   258             attrs = []
   259 
   260         return self._handleNode('orderedlist', on, attrs)
   261 
   262     def bullet_list(self, on, **kw):
   263         return self._handleNode("itemizedlist", on)
   264 
   265     def listitem(self, on, style=None, **kw):
   266         if self.cur.nodeName == "glosslist" or self.cur.nodeName == "glossentry":
   267             return self.definition_desc(on)
   268         if on and self.cur.nodeName == "listitem":
   269             """If we are inside a listitem, and someone wants to create a new one, it
   270             means they forgot to close the old one, and we need to do it for them."""
   271             self.listitem(0)
   272 
   273         args = []
   274         if on and style:
   275             styles = self._convertStylesToDict(style)
   276             if styles.has_key('list-style-type'):
   277                 args.append(('override', styles['list-style-type']))
   278 
   279         return self._handleNode("listitem", on, attributes=args)
   280 
   281     def definition_list(self, on, **kw):
   282         return self._handleNode("glosslist", on)
   283 
   284     def definition_term(self, on, compact=0, **kw):
   285         if on:
   286             self._handleNode("glossentry", on)
   287             self._handleNode("glossterm", on)
   288         else:
   289             if self._hasContent(self.cur):
   290                 self._handleNode("glossterm", on)
   291                 self._handleNode("glossentry", on)
   292             else:
   293                 # No term info :(
   294                 term = self.cur
   295                 entry = term.parentNode
   296                 self.cur = entry.parentNode
   297                 self.cur.removeChild(entry)
   298         return ""
   299 
   300     def definition_desc(self, on, **kw):
   301         if on:
   302             if self.cur.nodeName == "glossentry":
   303                 # Good, we can add it here.
   304                 self._handleNode("glossdef", on)
   305                 return ""
   306 
   307             # We are somewhere else, let's see...
   308             if self.cur.nodeName != "glosslist":
   309                 self._emitComment("Trying to add a definition, but we arent in a glosslist")
   310                 return ""
   311             if not self.cur.lastChild or self.cur.lastChild.nodeName != "glossentry":
   312                 self._emitComment("Trying to add a definition, but there is no entry")
   313                 return ""
   314 
   315             # Found it, calling again
   316             self.cur = self.cur.lastChild
   317             return self.definition_desc(on)
   318         else:
   319             if not self._hasContent(self.cur):
   320                 # Seems no valuable info was added
   321                 assert(self.cur.nodeName == "glossdef")
   322                 toRemove = self.cur
   323                 self.cur = toRemove.parentNode
   324                 self.cur.removeChild(toRemove)
   325 
   326             while self.cur.nodeName != "glosslist":
   327                 self.cur = self.cur.parentNode
   328         return ""
   329 
   330 ### Links ###########################################################
   331     # TODO: Fix anchors to documents which are included. Needs probably to be
   332     #       a postprocessing rule. Could be done by having the anchors have
   333     #       the "linkend" value of PageName#anchor. Then at post process the
   334     #       following would be done for all urls:
   335     #        - get all ulinks with an anchor part in their url
   336     #        - get the ulink's PageName#anchor -part by removing baseurl part
   337     #        - if any of our <anchor> elements have the same PageName#anchor
   338     #          value as our <ulink>, then replace the ulink with a link
   339     #          element.
   340     #       Note: This would the case when someone wants to link to a
   341     #             section on the original webpage impossible. The link would
   342     #             instead point within the docbook page and not to the webpage.
   343 
   344 
   345     def pagelink(self, on, pagename='', page=None, **kw):
   346         FormatterBase.pagelink(self, on, pagename, page, **kw)
   347         return self.interwikilink(on, 'Self', pagename, **kw)
   348 
   349     def interwikilink(self, on, interwiki='', pagename='', **kw):
   350         if not on:
   351             return self.url(on, **kw)
   352 
   353         wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_interwiki(self.request, interwiki, pagename)
   354         wikiurl = wikiutil.mapURL(self.request, wikiurl)
   355         href = wikiutil.join_wiki(wikiurl, wikitail)
   356         if kw.has_key("anchor"):
   357             href="%s#%s"%(href, kw['anchor'])
   358 
   359         if pagename == self.page.page_name:
   360             kw['is_self']=True
   361 
   362         return self.url(on, href, **kw)
   363 
   364     def url(self, on, url=None, css=None, **kw):
   365         if url and url.startswith("/"):
   366             # convert to absolute path:
   367             url = "%s%s"%(self.request.getBaseURL(), url)
   368 
   369         if not on:
   370             self._cleanupUlinkNode()
   371 
   372         if kw.has_key("anchor") and kw.has_key("is_self") and kw["is_self"]:
   373             #handle the case where we are pointing to somewhere insidee our own document
   374             return self._handleNode("link", on, attributes=(('linkend', kw["anchor"]), ))
   375         else:
   376             return self._handleNode("ulink", on, attributes=(('url', url), ))
   377 
   378     def anchordef(self, name):
   379         self._handleNode("anchor", True, attributes=(('id', name), ))
   380         self._handleNode("anchor", False)
   381         return ""
   382 
   383     def anchorlink(self, on, name='', **kw):
   384         linkid = kw.get('id', None)
   385         attrs = []
   386         if name != '':
   387             attrs.append(('endterm', name))
   388         if id is not None:
   389             attrs.append(('linkend', linkid))
   390         elif name != '':
   391             attrs.append(('linkend', name))
   392 
   393         return self._handleNode("link", on, attrs)
   394 
   395 ### Attachments ######################################################
   396 
   397     def attachment_link(self, on, url=None, **kw):
   398         assert on in (0, 1, False, True) # make sure we get called the new way, not like the 1.5 api was
   399         # we do not output a "upload link" when outputting docbook
   400         if on:
   401             pagename, filename = AttachFile.absoluteName(url, self.page.page_name)
   402             fname = wikiutil.taintfilename(filename)
   403             target = AttachFile.getAttachUrl(pagename, filename, self.request)
   404             return self.url(1, target, title="attachment:%s" % url)
   405         else:
   406             return self.url(0)
   407 
   408     def attachment_image(self, url, **kw):
   409         """
   410         Figures out the absolute path to the image and then hands over to
   411         the image function. Any title is also handed over, and an additional
   412         title suggestion is made based on filename. The image function will
   413         use the suggestion if no other text alternative is found.
   414 
   415         If the file is not found, then a simple text will replace it.
   416         """
   417         _ = self.request.getText
   418         pagename, filename = AttachFile.absoluteName(url, self.page.page_name)
   419         fname = wikiutil.taintfilename(filename)
   420         fpath = AttachFile.getFilename(self.request, pagename, fname)
   421         if not os.path.exists(fpath):
   422             return self.text("[attachment:%s]" % url)
   423         else:
   424             return self.image(
   425                 src=AttachFile.getAttachUrl(pagename, filename,
   426                                             self.request, addts=1),
   427                 attachment_title=url,
   428                 **kw)
   429 
   430 
   431     def attachment_drawing(self, url, text, **kw):
   432         _ = self.request.getText
   433         pagename, filename = AttachFile.absoluteName(url, self.page.page_name)
   434         fname = wikiutil.taintfilename(filename)
   435         drawing = fname
   436         fname = fname + ".png"
   437         filename = filename + ".png"
   438         fpath = AttachFile.getFilename(self.request, pagename, fname)
   439         if not os.path.exists(fpath):
   440             return self.text("[drawing:%s]" % url)
   441         else:
   442             src = AttachFile.getAttachUrl(pagename, filename, self.request, addts=1)
   443             return self.image(alt=drawing, src=src, html_class="drawing")
   444 
   445 ### Images and Smileys ##############################################
   446 
   447     def image(self, src=None, **kw):
   448         if src:
   449             kw['src'] = src
   450         media = self.doc.createElement('inlinemediaobject')
   451 
   452         imagewrap = self.doc.createElement('imageobject')
   453         media.appendChild(imagewrap)
   454 
   455         image = self.doc.createElement('imagedata')
   456         if kw.has_key('src'):
   457             src = kw['src']
   458             if src.startswith("/"):
   459                 # convert to absolute path:
   460                 src = self.request.getBaseURL()+src
   461             image.setAttribute('fileref', src)
   462         if kw.has_key('width'):
   463             image.setAttribute('width', str(kw['width']))
   464         if kw.has_key('height'):
   465             image.setAttribute('depth', str(kw['height']))
   466         imagewrap.appendChild(image)
   467 
   468         # Look for any suitable title, order is important.
   469         title = ''
   470         for a in ('title', 'html_title', 'alt', 'html_alt', 'attachment_title'):
   471             if kw.has_key(a):
   472                 title = kw[a]
   473                 break
   474         if title:
   475             txtcontainer = self.doc.createElement('textobject')
   476             self._addTextElem(txtcontainer, "phrase", title)
   477             media.appendChild(txtcontainer)
   478 
   479         self.cur.appendChild(media)
   480         return ""
   481 
   482     def transclusion(self, on, **kw):
   483         # TODO, see text_html formatter
   484         self._emitComment('transclusion is not implemented in DocBook formatter')
   485         return ""
   486 
   487     def transclusion_param(self, **kw):
   488         # TODO, see text_html formatter
   489         self._emitComment('transclusion parameters are not implemented in DocBook formatter')
   490         return ""
   491 
   492     def smiley(self, text):
   493         return self.request.theme.make_icon(text)
   494 
   495     def icon(self, type):
   496         return '' # self.request.theme.make_icon(type)
   497 
   498 
   499 ### Code area #######################################################
   500 
   501     def code_area(self, on, code_id, code_type=None, show=0, start=-1, step=-1):
   502         """Creates a formatted code region using screen or programlisting,
   503         depending on if a programming language was defined (code_type).
   504 
   505         The code_id is not used for anything in this formatter, but is just
   506         there to remain compatible with the HTML formatter's function.
   507 
   508         Line numbering is supported natively by DocBook so if linenumbering
   509         is requested the relevant attribute will be set.
   510 
   511         Call once with on=1 to start the region, and a second time
   512         with on=0 to end it.
   513         """
   514 
   515         if not on:
   516             return self._handleNode(None, on)
   517 
   518         show = show and 'numbered' or 'unnumbered'
   519         if start < 1:
   520             start = 1
   521 
   522         programming_languages = {"ColorizedJava": "java",
   523                                  "ColorizedPython": "python",
   524                                  "ColorizedCPlusPlus": "c++",
   525                                  "ColorizedPascal": "pascal",
   526                                 }
   527 
   528         if code_type is None:
   529             attrs = (('linenumbering', show),
   530                      ('startinglinenumber', str(start)),
   531                      ('format', 'linespecific'),
   532                      )
   533             return self._handleNode("screen", on, attributes=attrs)
   534         else:
   535             if programming_languages.has_key(code_type):
   536                 code_type = programming_languages[code_type]
   537 
   538             attrs = (('linenumbering', show),
   539                      ('startinglinenumber', str(start)),
   540                      ('language', code_type),
   541                      ('format', 'linespecific'),
   542                      )
   543             return self._handleNode("programlisting", on, attributes=attrs)
   544 
   545     def code_line(self, on):
   546         if on:
   547             self.cur.appendChild(self.doc.createTextNode('\n'))
   548         return ''
   549 
   550     def code_token(self, on, tok_type):
   551         """
   552         DocBook has some support for semantic annotation of code so the
   553         known tokens will be mapped to DocBook entities.
   554         """
   555         toks_map = {'ID': 'methodname',
   556                     'Operator': '',
   557                     'Char': '',
   558                     'Comment': 'lineannotation',
   559                     'Number': '',
   560                     'String': 'phrase',
   561                     'SPChar': '',
   562                     'ResWord': 'token',
   563                     'ConsWord': 'symbol',
   564                     'Error': 'errortext',
   565                     'ResWord2': 'type',
   566                     'Special': '',
   567                     'Preprc': '',
   568                     'Text': '',
   569                    }
   570         if toks_map.has_key(tok_type) and toks_map[tok_type]:
   571             return self._handleFormatting(toks_map[tok_type], on)
   572         else:
   573             return ""
   574 ### Macro ###########################################################
   575 
   576     def macro(self, macro_obj, name, args, markup=None):
   577         """As far as the DocBook formatter is conserned there are three
   578         kinds of macros: Bad, Handled and Unknown.
   579 
   580         The Bad ones are the ones that are known not to work, and are on its
   581         blacklist. They will be ignored and an XML comment will be written
   582         noting that the macro is not supported.
   583 
   584         Handled macros are such macros that code is written to handle them.
   585         For example for the FootNote macro it means that instead of executing
   586         the macro, a DocBook footnote entity is created, with the relevant
   587         pieces of information filles in.
   588 
   589         The Unknown are handled by executing the macro and capturing any
   590         textual output. There shouldn't be any textual output since macros
   591         should call formatter methods. This is unfortunately not always true,
   592         so the output it is then fed in to an xml parser and the
   593         resulting nodes copied to the DocBook-dom tree. If the output is not
   594         valid xml then a comment is written in the DocBook that the macro
   595         should be fixed.
   596 
   597         """
   598         # Another alternative would be to feed the output to rawHTML or even
   599         # combining these two approaches. The _best_ alternative would be to
   600         # fix the macros.
   601         excludes=("articleinfo", "title")
   602 
   603         if name in self.blacklisted_macros:
   604             self._emitComment("The macro %s doesn't work with the DocBook formatter." % name)
   605 
   606         elif name == "FootNote":
   607             footnote = self.doc.createElement('footnote')
   608             self._addTextElem(footnote, "para", str(args))
   609             self.cur.appendChild(footnote)
   610 
   611         elif name == "Include":
   612             was_in_para = self.cur.nodeName == "para"
   613             if was_in_para:
   614                 self.paragraph(0)
   615             
   616             # Regular Expression to match editlink arg, remove it because it causes trouble.
   617             _arg_editlink = r'(,\s*(?P<editlink>editlink))?'
   618             macro_args = re.sub(_arg_editlink, '', args)
   619         
   620             text = FormatterBase.macro(self, macro_obj, name, macro_args)
   621             if text.strip():
   622                 self._copyExternalNodes(Sax.FromXml(text).documentElement.childNodes, exclude=excludes)
   623             if was_in_para:
   624                 self.paragraph(1)
   625 
   626         else:
   627             text = FormatterBase.macro(self, macro_obj, name, args)
   628             if text:
   629                 from xml.parsers.expat import ExpatError
   630                 try:
   631                     xml_dom = Sax.FromXml(text).documentElement.childNodes
   632                     self._copyExternalNodes(xml_dom, exclude=excludes)
   633                 except ExpatError:
   634                     self._emitComment("The macro %s caused an error and should be blacklisted. It returned the data '%s' which caused the docbook-formatter to choke. Please file a bug." % (name, text))
   635 
   636         return u""
   637 
   638 ### Util functions ##################################################
   639 
   640     def _copyExternalNodes(self, nodes, deep=1, target=None, exclude=()):
   641         if not target:
   642             target = self.cur
   643 
   644         for node in nodes:
   645             if node.nodeName in exclude:
   646                 pass
   647             elif target.nodeName == "para" and node.nodeName == "para":
   648                 self._copyExternalNodes(node.childNodes, target=target)
   649                 self.cur = target.parentNode
   650             else:
   651                 target.appendChild(self.doc.importNode(node, deep))
   652 
   653     def _emitComment(self, text):
   654         text = text.replace("--", "- -") # There cannot be "--" in XML comment
   655         self.cur.appendChild(self.doc.createComment(text))
   656 
   657     def _handleNode(self, name, on, attributes=()):
   658         if on:
   659             node = self.doc.createElement(name)
   660             self.cur.appendChild(node)
   661             if len(attributes) > 0:
   662                 for name, value in attributes:
   663                     node.setAttribute(name, value)
   664             self.cur = node
   665         else:
   666             """
   667                 Because we prevent para inside para, we might get extra "please
   668                 exit para" when we are no longer inside one.
   669 
   670                 TODO: Maybe rethink the para in para case
   671             """
   672             if name == "para" and self.cur.nodeName != "para":
   673                 return ""
   674 
   675             self.cur = self.cur.parentNode
   676         return ""
   677 
   678     def _handleFormatting(self, name, on, attributes=()):
   679         # We add all the elements we create to the list of elements that should not contain a section
   680         if name not in self.section_should_break:
   681             self.section_should_break.append(name)
   682         return self._handleNode(name, on, attributes)
   683 
   684     def _isInsidePreformatted(self):
   685         """Walks all parents and checks if one is of a preformatted type, which
   686            means the child would need to be preformatted == embedded in a cdata
   687            section"""
   688         n = self.cur
   689         while n:
   690             if n.nodeName in ("screen", "programlisting"):
   691                 return True
   692             n = n.parentNode
   693         return False
   694 
   695     def _hasContent(self, node):
   696         if node.attributes and len(node.attributes):
   697             return True
   698         for child in node.childNodes:
   699             if child.nodeType == Node.TEXT_NODE and child.nodeValue.strip():
   700                 return True
   701             elif child.nodeType == Node.CDATA_SECTION_NODE and child.nodeValue.strip():
   702                 return True
   703 
   704             if self._hasContent(child):
   705                 return True
   706         return False
   707 
   708     def _addTitleElement(self, titleTxt, targetNode=None):
   709         if not targetNode:
   710             targetNode = self.cur
   711         self._addTextElem(targetNode, "title", titleTxt)
   712 
   713     def _convertStylesToDict(self, styles):
   714         '''Takes the CSS styling information and converts it to a dict'''
   715         attrs = {}
   716         for s in styles.split(";"):
   717             if s.strip(' "') == "":
   718                 continue
   719             if ":" not in s:
   720                 continue
   721             (key, value) = s.split(":", 1)
   722             key = key.strip(' "')
   723             value = value.strip(' "')
   724 
   725             if key == 'vertical-align':
   726                 key = 'valign'
   727             elif key == 'text-align':
   728                 key = 'align'
   729             elif key == 'background-color':
   730                 key = 'bgcolor'
   731 
   732             attrs[key] = value
   733         return attrs
   734 
   735     def _cleanupUlinkNode(self):
   736         """
   737         Moin adds the url as the text to a link, if no text is specified.
   738         Docbook does it when a docbook is rendered, so we don't want moin to
   739         do it and so if the url is exactly the same as the text node inside
   740         the ulink, we remove the text node.
   741         """
   742         if self.cur.nodeName == "ulink" and len(self.cur.childNodes) == 1 \
   743                 and self.cur.firstChild.nodeType == Node.TEXT_NODE \
   744                 and self.cur.firstChild.nodeValue.strip() == self.cur.getAttribute('url').strip():
   745             self.cur.removeChild(self.cur.firstChild)
   746 
   747     def _addTextElem(self, target, elemName, text):
   748         """
   749         Creates an element of the name elemName and adds a text node to it
   750         with the nodeValue of text. The new element is then added as a child
   751         to the element target.
   752         """
   753         newElement = self.doc.createElement(elemName)
   754         newElement.appendChild(self.doc.createTextNode(text))
   755         target.appendChild(newElement)
   756 
   757 
   758     def _addRevisionHistory(self, targetNode):
   759         """
   760         This will generate a revhistory element which it will populate with
   761         revision nodes. Each revision has the revnumber, date and author-
   762         initial elements, and if a comment was supplied, the comment element.
   763 
   764         The date elements format depends on the users settings, so it will
   765         be in the same format as the revision history as viewed in the
   766         page info on the wiki.
   767 
   768         The authorinitials will be the UserName or if it was an anonymous
   769         edit, then it will be the hostname/ip-address.
   770 
   771         The revision history of included documents is NOT included at the
   772         moment due to technical difficulties.
   773         """
   774         _ = self.request.getText
   775         log = editlog.EditLog(self.request, rootpagename=self.title)
   776         user_cache = {}
   777 
   778         history = self.doc.createElement("revhistory")
   779 
   780         # read in the complete log of this page
   781         for line in log.reverse():
   782             if not line.action in ('SAVE', 'SAVENEW', 'SAVE/REVERT', 'SAVE/RENAME', ):
   783                 #Let's ignore adding of attachments
   784                 continue
   785             revision = self.doc.createElement("revision")
   786 
   787             # Revision number (without preceeding zeros)
   788             self._addTextElem(revision, "revnumber", line.rev.lstrip('0'))
   789 
   790             # Date of revision
   791             date_text = self.request.user.getFormattedDateTime(
   792                 wikiutil.version2timestamp(line.ed_time_usecs))
   793             self._addTextElem(revision, "date", date_text)
   794 
   795             # Author or revision
   796             if not (line.userid in user_cache):
   797                 user_cache[line.userid] = user.User(self.request, line.userid, auth_method="text_docbook:740")
   798             author = user_cache[line.userid]
   799             if author and author.name:
   800                 self._addTextElem(revision, "authorinitials", author.name)
   801             else:
   802                 self._addTextElem(revision, "authorinitials", line.hostname)
   803 
   804             # Comment from author of revision
   805             comment = line.comment
   806             if not comment:
   807                 if '/REVERT' in line.action:
   808                     comment = _("Revert to revision %(rev)d.") % {'rev': int(line.extra)}
   809                 elif '/RENAME' in line.action:
   810                     comment = _("Renamed from '%(oldpagename)s'.") % {'oldpagename': line.extra}
   811             if comment:
   812                 self._addTextElem(revision, "revremark", comment)
   813 
   814             history.appendChild(revision)
   815 
   816         if history.firstChild:
   817             #only add revision history is there is history to add
   818             targetNode.appendChild(history)
   819 
   820 ### Not supported ###################################################
   821 
   822     def rule(self, size=0, **kw):
   823         self._emitComment('rule (<hr>) is not applicable to DocBook')
   824         return ""
   825 
   826     def small(self, on, **kw):
   827         if on:
   828             self._emitComment('"~-smaller-~" is not applicable to DocBook')
   829         return ""
   830 
   831     def big(self, on, **kw):
   832         if on:
   833             self._emitComment('"~+bigger+~" is not applicable to DocBook')
   834         return ""
   835 
   836     def rawHTML(self, markup):
   837         if markup.strip() == "":
   838             return ""
   839 
   840         if "<" not in markup and ">" not in markup:
   841             # Seems there are no tags.
   842             # Let's get all the "entity references".
   843             cleaned = markup
   844             import re
   845             entities = re.compile("&(?P<e>[a-zA-Z]+);").findall(cleaned)
   846             from htmlentitydefs import name2codepoint
   847             for ent in entities:
   848                 if name2codepoint.has_key(ent):
   849                     cleaned = cleaned.replace("&%s;" % ent, unichr(name2codepoint[ent]))
   850 
   851             # Then we replace all escaped unicodes.
   852             escapedunicodes = re.compile("&#(?P<h>[0-9]+);").findall(markup)
   853             for uni in escapedunicodes:
   854                 cleaned = cleaned.replace("&#%s;" % uni, unichr(int(uni)))
   855 
   856             self.text(cleaned)
   857 
   858         self._emitComment("RAW HTML: "+markup)
   859         return ""
   860 
   861     def div(self, on, **kw):
   862         """A div cannot really be supported in DocBook as it carries no
   863         semantic meaning, but the special cases can be handled when the class
   864         of the div carries the information.
   865 
   866         A dictionary is used for mapping between class names and the
   867         corresponding DocBook element.
   868 
   869         A MoinMoin comment is represented in DocBook by the remark element.
   870 
   871         The rest of the known classes are the admonitions in DocBook:
   872         warning, caution, important, note and hint
   873 
   874         Note: The remark entity can only contain inline elements, so it is
   875               very likely that the use of a comment div will produce invalid
   876               DocBook.
   877         """
   878         # Map your styles to docbook elements.
   879         # Even though comment is right now the only one that needs to be
   880         # mapped, having two different ways is more complicated than having
   881         # a single common way. Code clarity and generality first, especially
   882         # since we might want to do more div to docbook mappings in the future.
   883         class_to_docbook = {"warning":   "warning",
   884                             "caution":   "caution",
   885                             "important": "important",
   886                             "note":      "note",
   887                             "tip":       "tip",
   888                             "comment":   "remark"}
   889 
   890         if on and kw.get('css_class'):
   891             css_classes = kw.get('css_class').split()
   892             for style in class_to_docbook.keys():
   893                 if style in css_classes:
   894                     return self._handleNode(class_to_docbook[style], on)
   895 
   896         elif not on:
   897             if self.cur.nodeName in class_to_docbook.values():
   898                 return self._handleNode(self.cur.nodeName, on)
   899 
   900         return ""
   901 
   902     def span(self, on, **kw):
   903         """A span cannot really be supported in DocBook as it carries no
   904         semantic meaning, but the special case of a comment can be handled.
   905 
   906         A comment is represented in DocBook by the remark element.
   907 
   908         A comment span is recognized by the fact that it has the class
   909         "comment". Other cases of div use are ignored.
   910         """
   911         css_class = kw.get('css_class')
   912         if on and css_class and 'comment' in css_class.split():
   913             self._handleFormatting("remark", on)
   914         if not on and self.cur.nodeName == "remark":
   915             self._handleFormatting("remark", on)
   916         return ""
   917 
   918 
   919 
   920 ### Tables ##########################################################
   921 
   922     def table(self, on, attrs=(), **kw):
   923         if(on):
   924             if attrs:
   925                 self.curtable = Table(self, self.doc, self.cur, dict(attrs))
   926             else:
   927                 self.curtable = Table(self, self.doc, self.cur)
   928             self.cur = self.curtable.tableNode
   929         else:
   930             self.cur = self.curtable.finalizeTable()
   931             self.curtable = None
   932         return ""
   933 
   934     def table_row(self, on, attrs=(), **kw):
   935         if(on):
   936             if attrs:
   937                 self.curtable.addRow(dict(attrs))
   938             else:
   939                 self.cur = self.curtable.addRow()
   940         return ""
   941 
   942     def table_cell(self, on, attrs=(), **kw):
   943         if(on):
   944             if attrs:
   945                 self.cur = self.curtable.addCell(dict(attrs))
   946             else:
   947                 self.cur = self.curtable.addCell()
   948         return ""
   949 
   950 class Table:
   951     '''The Table class is used as a helper for collecting information about
   952     what kind of table we are building. When all relelvant data is gathered
   953     it calculates the different spans of the cells and columns.
   954 
   955     Note that it expects all arguments to be passed in a dict.
   956     '''
   957 
   958     def __init__(self, formatter, doc, parent, argsdict={}):
   959         self.formatter = formatter
   960         self.doc = doc
   961 
   962         self.tableNode = self.doc.createElement('informaltable')
   963         parent.appendChild(self.tableNode)
   964         self.colWidths = {}
   965         self.tgroup = self.doc.createElement('tgroup')
   966         # Bug in yelp, the two lines below don't affect rendering
   967         #self.tgroup.setAttribute('rowsep', '1')
   968         #self.tgroup.setAttribute('colsep', '1')
   969         self.curColumn = 0
   970         self.maxColumn = 0
   971         self.row = None
   972         self.tableNode.appendChild(self.tgroup)
   973 
   974         self.tbody = self.doc.createElement('tbody') # Note: This gets appended in finalizeTable
   975 
   976     def finalizeTable(self):
   977         """Calculates the final width of the whole table and the width of each
   978         column. Adds the colspec-elements and applies the colwidth attributes.
   979         Inserts the tbody element to the tgroup and returns the tables container
   980         element.
   981 
   982         A lot of the information is gathered from the style attributes passed
   983         to the functions
   984         """
   985         self.tgroup.setAttribute('cols', str(self.maxColumn))
   986         for colnr in range(0, self.maxColumn):
   987             colspecElem = self.doc.createElement('colspec')
   988             colspecElem.setAttribute('colname', 'col_%s' % str(colnr))
   989             if self.colWidths.has_key(str(colnr)) and self.colWidths[str(colnr)] != "1*":
   990                 colspecElem.setAttribute('colwidth', self.colWidths[str(colnr)])
   991             self.tgroup.appendChild(colspecElem)
   992         self.tgroup.appendChild(self.tbody)
   993         return self.tableNode.parentNode
   994 
   995     def addRow(self, argsdict={}):
   996         self.curColumn = 0
   997         self.row = self.doc.createElement('row')
   998         # Bug in yelp, doesn't affect the outcome.
   999         self.row.setAttribute("rowsep", "1") #Rows should have lines between them
  1000         self.tbody.appendChild(self.row)
  1001         return self.row
  1002 
  1003     def addCell(self, argsdict={}):
  1004         if 'style' in argsdict:
  1005             argsdict.update(self.formatter._convertStylesToDict(argsdict['style'].strip('"')))
  1006 
  1007         cell = self.doc.createElement('entry')
  1008         cell.setAttribute('rowsep', '1')
  1009         cell.setAttribute('colsep', '1')
  1010 
  1011         self.row.appendChild(cell)
  1012         self._handleSimpleCellAttributes(cell, argsdict)
  1013         self._handleColWidth(argsdict)
  1014         self.curColumn += self._handleColSpan(cell, argsdict)
  1015 
  1016         self.maxColumn = max(self.curColumn, self.maxColumn)
  1017 
  1018         return cell
  1019 
  1020     def _handleColWidth(self, argsdict={}):
  1021         if not argsdict.has_key("width"):
  1022             return
  1023         argsdict["width"] = argsdict["width"].strip('"')
  1024         if not argsdict["width"].endswith("%"):
  1025             self.formatter._emitComment("Width %s not supported" % argsdict["width"])
  1026             return
  1027 
  1028         self.colWidths[str(self.curColumn)] = argsdict["width"][:-1] + "*"
  1029 
  1030     def _handleColSpan(self, element, argsdict={}):
  1031         """Returns the number of colums this entry spans"""
  1032         if not argsdict or not argsdict.has_key('colspan'):
  1033             return 1
  1034         assert(element.nodeName == "entry")
  1035         extracols = int(argsdict['colspan'].strip('"')) - 1
  1036         element.setAttribute('namest', "col_" + str(self.curColumn))
  1037         element.setAttribute('nameend', "col_" + str(self.curColumn + extracols))
  1038         return 1 + extracols
  1039 
  1040     def _handleSimpleCellAttributes(self, element, argsdict={}):
  1041         if not argsdict:
  1042             return
  1043         assert(element.nodeName == "entry")
  1044 
  1045         safe_values_for = {'valign': ('top', 'middle', 'bottom'),
  1046                            'align': ('left', 'center', 'right'),
  1047                           }
  1048 
  1049         if argsdict.has_key('rowspan'):
  1050             extrarows = int(argsdict['rowspan'].strip('"')) - 1
  1051             element.setAttribute('morerows', str(extrarows))
  1052 
  1053         if argsdict.has_key('align'):
  1054             value = argsdict['align'].strip('"')
  1055             if value in safe_values_for['align']:
  1056                 element.setAttribute('align', value)
  1057             else:
  1058                 self.formatter._emitComment("Alignment %s not supported" % value)
  1059                 pass
  1060 
  1061         if argsdict.has_key('valign'):
  1062             value = argsdict['valign'].strip('"')
  1063             if value in safe_values_for['valign']:
  1064                 element.setAttribute('valign', value)
  1065             else:
  1066                 self.formatter._emitComment("Vertical alignment %s not supported" % value)
  1067                 pass
  1068 
  1069