MoinMoin/formatter/text_docbook.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Sat, 13 Mar 2010 22:45:07 +0100
changeset 4499 61d77b5506a5
parent 3291 954300d424eb
permissions -rw-r--r--
Fix docbook formatter crashing, see MoinMoinPatch/IncludeMacroWithDocBookFormatter

If a page has a Include macro call with editlink argument, the formatter
crashes. This patch removes the editlink arg before processing it.
mvirkkil@3199
     1
# -*- coding: iso-8859-1 -*-
alex@82
     2
"""
alex@82
     3
    MoinMoin - DocBook Formatter
alex@82
     4
mvirkkil@3199
     5
    @copyright: 2005,2008 by Mikko Virkkilä <mvirkkil@cc.hut.fi>
alex@82
     6
    @copyright: 2005 by MoinMoin:AlexanderSchremmer (small modifications)
alex@82
     7
    @copyright: 2005 by MoinMoin:Petr Pytelka <pyta@lightcomp.com> (small modifications)
tw@2286
     8
alex@82
     9
    @license: GNU GPL, see COPYING for details.
alex@82
    10
"""
alex@82
    11
tw@4499
    12
import os,re
mvirkkil@3195
    13
mvirkkil@3183
    14
from xml.dom import getDOMImplementation
mvirkkil@3183
    15
from xml.dom.ext.reader import Sax
mvirkkil@3183
    16
from xml.dom.ext import Node
alex@82
    17
tw@661
    18
from MoinMoin.formatter import FormatterBase
tw@1791
    19
from MoinMoin import wikiutil
alex@86
    20
from MoinMoin.error import CompositeError
Florian@200
    21
from MoinMoin.action import AttachFile
alex@86
    22
mvirkkil@3291
    23
#For revision history
mvirkkil@3291
    24
from MoinMoin.logfile import editlog
mvirkkil@3291
    25
from MoinMoin import user
mvirkkil@3291
    26
mvirkkil@3195
    27
mvirkkil@3198
    28
class InternalError(CompositeError):
mvirkkil@3198
    29
    pass
alex@86
    30
alex@86
    31
try:
alex@86
    32
    dom = getDOMImplementation("4DOM")
alex@86
    33
except ImportError:
alex@2233
    34
    raise InternalError("You need to install 4suite to use the DocBook formatter.")
alex@82
    35
alex@82
    36
alex@82
    37
class Formatter(FormatterBase):
mvirkkil@3225
    38
    #TODO: How to handle revision history and other meta-info from included files?
mvirkkil@3247
    39
    #      The problem is that we don't know what the original page is, since
mvirkkil@3247
    40
    #      the Inlcude-macro doesn't pass us the information.
alex@82
    41
mvirkkil@3225
    42
    # this list is extended as the page is parsed. Could be optimized by adding them here?
alex@82
    43
    section_should_break = ['abstract', 'para', 'emphasis']
mvirkkil@3192
    44
mvirkkil@3248
    45
    blacklisted_macros = ('TableOfContents', 'ShowSmileys', 'Navigation')
alex@82
    46
mvirkkil@3187
    47
    # If the current node is one of the following and we are about the emit
mvirkkil@3187
    48
    # text, the text should be wrapped in a paragraph
mvirkkil@3187
    49
    wrap_text_in_para = ('listitem', 'glossdef', 'article', 'chapter', 'tip', 'warning', 'note', 'caution', 'important')
mvirkkil@3192
    50
mvirkkil@3192
    51
    # from dtd
mvirkkil@3191
    52
    _can_contain_section = ("section", "appendix", "article", "chapter", "patintro", "preface")
mvirkkil@3187
    53
mvirkkil@3183
    54
    def __init__(self, request, doctype="article", **kw):
alex@82
    55
        FormatterBase.__init__(self, request, **kw)
mvirkkil@3183
    56
        self.request = request
mvirkkil@3204
    57
mvirkkil@3184
    58
        '''
mvirkkil@3204
    59
        If the formatter is used by the Include macro, it will set
mvirkkil@3204
    60
        is_included=True in which case we know we need to call startDocument
mvirkkil@3184
    61
        and endDocument from startContent and endContent respectively, since
mvirkkil@3184
    62
        the Include macro will not be calling them, and the formatter doesn't
mvirkkil@3184
    63
        work properly unless they are called.
mvirkkil@3184
    64
        '''
mvirkkil@3184
    65
        if kw.has_key("is_included") and kw["is_included"]:
mvirkkil@3184
    66
            self.include_kludge = True
mvirkkil@3184
    67
        else:
mvirkkil@3184
    68
            self.include_kludge = False
mvirkkil@3184
    69
mvirkkil@3183
    70
        self.doctype = doctype
mvirkkil@3183
    71
        self.curdepth = 0
mvirkkil@3183
    72
        self.cur = None
alex@86
    73
mvirkkil@3183
    74
    def startDocument(self, pagename):
mvirkkil@3183
    75
        self.doc = dom.createDocument(None, self.doctype, dom.createDocumentType(
mvirkkil@3183
    76
            self.doctype, "-//OASIS//DTD DocBook XML V4.4//EN",
alex@82
    77
            "http://www.docbook.org/xml/4.4/docbookx.dtd"))
mvirkkil@3183
    78
mvirkkil@3183
    79
        self.title = pagename
alex@82
    80
        self.root = self.doc.documentElement
mvirkkil@3228
    81
mvirkkil@3225
    82
        if not self.include_kludge and self.doctype == "article":
mvirkkil@3225
    83
            info = self.doc.createElement("articleinfo")
mvirkkil@3225
    84
            self.root.appendChild(info)
mvirkkil@3225
    85
            self._addTitleElement(self.title, targetNode=info)
mvirkkil@3228
    86
            self._addRevisionHistory(targetNode=info)
mvirkkil@3225
    87
        else:
mvirkkil@3225
    88
            self._addTitleElement(self.title, targetNode=self.root)
alex@82
    89
alex@82
    90
        self.cur = self.root
alex@82
    91
        return ""
alex@82
    92
mvirkkil@3183
    93
    def startContent(self, content_id="content", **kw):
mvirkkil@3184
    94
        if self.include_kludge and not self.cur:
mvirkkil@3184
    95
            return self.startDocument("OnlyAnIdiotWouldCreateSuchaPage")
mvirkkil@3183
    96
        return ""
mvirkkil@3183
    97
alex@82
    98
    def endContent(self):
mvirkkil@3184
    99
        if self.include_kludge:
mvirkkil@3184
   100
            return self.endDocument()
mvirkkil@3183
   101
        return ""
alex@82
   102
alex@82
   103
    def endDocument(self):
mvirkkil@3183
   104
        from xml.dom.ext import PrettyPrint, Print
mvirkkil@3183
   105
        import StringIO
mvirkkil@3183
   106
mvirkkil@3183
   107
        f = StringIO.StringIO()
mvirkkil@3183
   108
        Print(self.doc, f)
mvirkkil@3183
   109
        txt = f.getvalue()
mvirkkil@3183
   110
        f.close()
mvirkkil@3183
   111
mvirkkil@3183
   112
        self.cur = None
mvirkkil@3183
   113
        return txt
alex@82
   114
tw@407
   115
    def text(self, text, **kw):
alex@82
   116
        if text == "\\n":
alex@82
   117
            srcText = "\n"
alex@82
   118
        else:
alex@82
   119
            srcText = text
mvirkkil@3192
   120
mvirkkil@3192
   121
        if srcText and self._isInsidePreformatted():
mvirkkil@3192
   122
mvirkkil@3192
   123
            if self.cur.lastChild is not None and self.cur.lastChild.nodeType == Node.CDATA_SECTION_NODE:
mvirkkil@3192
   124
                # We can add it to a previous CDATA section
mvirkkil@3192
   125
                self.cur.lastChild.nodeValue = self.cur.lastChild.nodeValue + srcText
alex@82
   126
            else:
mvirkkil@3192
   127
                # We create a new cdata section
alex@82
   128
                self.cur.appendChild(self.doc.createCDATASection(srcText))
mvirkkil@3192
   129
mvirkkil@3187
   130
        elif self.cur.nodeName in self.wrap_text_in_para:
mvirkkil@3187
   131
            """
mvirkkil@3187
   132
            If we already wrapped one text item in a para, we should add to that para
mvirkkil@3187
   133
            and not create a new one. Another question is if we should add a space?
mvirkkil@3187
   134
            """
mvirkkil@3187
   135
            if self.cur.lastChild is not None and self.cur.lastChild.nodeName == 'para':
mvirkkil@3187
   136
                self.cur.lastChild.appendChild(self.doc.createTextNode(srcText))
mvirkkil@3187
   137
            else:
mvirkkil@3187
   138
                self.paragraph(1)
mvirkkil@3187
   139
                self.text(text)
mvirkkil@3187
   140
                self.paragraph(0)
alex@82
   141
        else:
alex@82
   142
            self.cur.appendChild(self.doc.createTextNode(srcText))
alex@82
   143
        return ""
alex@82
   144
alex@82
   145
    def heading(self, on, depth, **kw):
alex@82
   146
        while self.cur.nodeName in self.section_should_break:
alex@82
   147
            self.cur = self.cur.parentNode
tw@999
   148
alex@82
   149
        if on:
alex@82
   150
            # try to go to higher level if needed
alex@82
   151
            if depth <= self.curdepth:
alex@82
   152
                # number of levels we want to go higher
tw@2450
   153
                numberOfLevels = self.curdepth - depth + 1
tw@1793
   154
                for dummy in range(numberOfLevels):
mvirkkil@3225
   155
                    # find first non section node
mvirkkil@3191
   156
                    while not self.cur.nodeName in self._can_contain_section:
alex@82
   157
                        self.cur = self.cur.parentNode
alex@82
   158
mvirkkil@3191
   159
                    if self.cur.nodeName == "section":
tw@407
   160
                        self.cur = self.cur.parentNode
alex@82
   161
alex@82
   162
            section = self.doc.createElement("section")
alex@82
   163
            self.cur.appendChild(section)
alex@82
   164
            self.cur = section
alex@82
   165
alex@82
   166
            title = self.doc.createElement("title")
alex@82
   167
            self.cur.appendChild(title)
alex@82
   168
            self.cur = title
alex@82
   169
            self.curdepth = depth
alex@82
   170
        else:
tw@407
   171
            self.cur = self.cur.parentNode
alex@82
   172
alex@82
   173
        return ""
alex@82
   174
tw@407
   175
    def paragraph(self, on, **kw):
alex@82
   176
        FormatterBase.paragraph(self, on)
mvirkkil@3184
   177
mvirkkil@3195
   178
        # Let's prevent empty paras
mvirkkil@3195
   179
        if not on:
mvirkkil@3195
   180
            if not self._hasContent(self.cur):
mvirkkil@3195
   181
                oldnode = self.cur
mvirkkil@3195
   182
                self.cur = oldnode.parentNode
mvirkkil@3195
   183
                self.cur.removeChild(oldnode)
mvirkkil@3195
   184
                return ""
mvirkkil@3195
   185
mvirkkil@3184
   186
        # Let's prevent para inside para
mvirkkil@3184
   187
        if on and self.cur.nodeName == "para":
mvirkkil@3184
   188
            return ""
mvirkkil@3184
   189
        return self._handleNode("para", on)
alex@82
   190
alex@82
   191
    def linebreak(self, preformatted=1):
mvirkkil@3188
   192
        """
mvirkkil@3188
   193
        If preformatted, it will simply output a linebreak.
mvirkkil@3188
   194
        If we are in a paragraph, we will close it, and open another one.
mvirkkil@3188
   195
        """
alex@82
   196
        if preformatted:
alex@82
   197
            self.text('\\n')
mvirkkil@3188
   198
        elif self.cur.nodeName == "para":
mvirkkil@3188
   199
            self.paragraph(0)
mvirkkil@3188
   200
            self.paragraph(1)
alex@82
   201
        else:
mvirkkil@3188
   202
            self._emitComment("Warning: Probably not emitting right sort of linebreak")
mvirkkil@3188
   203
            self.text('\n')
alex@82
   204
        return ""
alex@82
   205
alex@82
   206
### Inline ##########################################################
alex@82
   207
tw@407
   208
    def strong(self, on, **kw):
tw@999
   209
        return self._handleFormatting("emphasis", on, (('role', 'strong'), ))
alex@82
   210
tw@407
   211
    def emphasis(self, on, **kw):
alex@82
   212
        return self._handleFormatting("emphasis", on)
alex@82
   213
tw@407
   214
    def underline(self, on, **kw):
tw@999
   215
        return self._handleFormatting("emphasis", on, (('role', 'underline'), ))
alex@82
   216
tw@407
   217
    def highlight(self, on, **kw):
tw@999
   218
        return self._handleFormatting("emphasis", on, (('role', 'highlight'), ))
alex@82
   219
tw@407
   220
    def sup(self, on, **kw):
alex@82
   221
        return self._handleFormatting("superscript", on)
alex@82
   222
tw@407
   223
    def sub(self, on, **kw):
alex@82
   224
        return self._handleFormatting("subscript", on)
alex@82
   225
tw@407
   226
    def strike(self, on, **kw):
alex@264
   227
        # does not yield <strike> using the HTML XSLT files here ...
alex@264
   228
        # but seems to be correct
Florian@217
   229
        return self._handleFormatting("emphasis", on,
tw@999
   230
                                      (('role', 'strikethrough'), ))
Florian@217
   231
tw@407
   232
    def code(self, on, **kw):
mvirkkil@3195
   233
        # Let's prevent empty code
mvirkkil@3195
   234
        if not on:
mvirkkil@3195
   235
            if not self._hasContent(self.cur):
mvirkkil@3195
   236
                oldnode = self.cur
mvirkkil@3195
   237
                self.cur = oldnode.parentNode
mvirkkil@3195
   238
                self.cur.removeChild(oldnode)
mvirkkil@3195
   239
                return ""
alex@82
   240
        return self._handleFormatting("code", on)
alex@82
   241
tw@407
   242
    def preformatted(self, on, **kw):
alex@82
   243
        return self._handleFormatting("screen", on)
alex@82
   244
alex@82
   245
alex@82
   246
### Lists ###########################################################
alex@82
   247
tw@407
   248
    def number_list(self, on, type=None, start=None, **kw):
tw@999
   249
        docbook_ol_types = {'1': "arabic",
tw@999
   250
                            'a': "loweralpha",
alex@82
   251
                            'A': "upperalpha",
alex@82
   252
                            'i': "lowerroman",
alex@82
   253
                            'I': "upperroman"}
alex@82
   254
alex@82
   255
        if type and docbook_ol_types.has_key(type):
tw@407
   256
            attrs = [("numeration", docbook_ol_types[type])]
alex@82
   257
        else:
tw@407
   258
            attrs = []
alex@82
   259
alex@82
   260
        return self._handleNode('orderedlist', on, attrs)
alex@82
   261
tw@407
   262
    def bullet_list(self, on, **kw):
alex@82
   263
        return self._handleNode("itemizedlist", on)
alex@82
   264
mvirkkil@3193
   265
    def listitem(self, on, style=None, **kw):
mvirkkil@3193
   266
        if self.cur.nodeName == "glosslist" or self.cur.nodeName == "glossentry":
mvirkkil@3193
   267
            return self.definition_desc(on)
mvirkkil@3193
   268
        if on and self.cur.nodeName == "listitem":
mvirkkil@3193
   269
            """If we are inside a listitem, and someone wants to create a new one, it
mvirkkil@3193
   270
            means they forgot to close the old one, and we need to do it for them."""
mvirkkil@3193
   271
            self.listitem(0)
mvirkkil@3193
   272
mvirkkil@3193
   273
        args = []
mvirkkil@3193
   274
        if on and style:
mvirkkil@3193
   275
            styles = self._convertStylesToDict(style)
mvirkkil@3193
   276
            if styles.has_key('list-style-type'):
mvirkkil@3193
   277
                args.append(('override', styles['list-style-type']))
mvirkkil@3193
   278
mvirkkil@3193
   279
        return self._handleNode("listitem", on, attributes=args)
mvirkkil@3193
   280
tw@407
   281
    def definition_list(self, on, **kw):
tw@999
   282
        return self._handleNode("glosslist", on)
alex@82
   283
tw@407
   284
    def definition_term(self, on, compact=0, **kw):
alex@82
   285
        if on:
mvirkkil@3193
   286
            self._handleNode("glossentry", on)
mvirkkil@3193
   287
            self._handleNode("glossterm", on)
alex@82
   288
        else:
mvirkkil@3193
   289
            if self._hasContent(self.cur):
mvirkkil@3193
   290
                self._handleNode("glossterm", on)
mvirkkil@3193
   291
                self._handleNode("glossentry", on)
mvirkkil@3193
   292
            else:
mvirkkil@3193
   293
                # No term info :(
mvirkkil@3193
   294
                term = self.cur
mvirkkil@3193
   295
                entry = term.parentNode
mvirkkil@3193
   296
                self.cur = entry.parentNode
mvirkkil@3193
   297
                self.cur.removeChild(entry)
alex@82
   298
        return ""
tw@999
   299
tw@407
   300
    def definition_desc(self, on, **kw):
alex@82
   301
        if on:
mvirkkil@3193
   302
            if self.cur.nodeName == "glossentry":
mvirkkil@3193
   303
                # Good, we can add it here.
mvirkkil@3193
   304
                self._handleNode("glossdef", on)
mvirkkil@3193
   305
                return ""
mvirkkil@3193
   306
mvirkkil@3193
   307
            # We are somewhere else, let's see...
mvirkkil@3193
   308
            if self.cur.nodeName != "glosslist":
mvirkkil@3193
   309
                self._emitComment("Trying to add a definition, but we arent in a glosslist")
mvirkkil@3193
   310
                return ""
mvirkkil@3193
   311
            if not self.cur.lastChild or self.cur.lastChild.nodeName != "glossentry":
mvirkkil@3193
   312
                self._emitComment("Trying to add a definition, but there is no entry")
mvirkkil@3193
   313
                return ""
mvirkkil@3193
   314
mvirkkil@3193
   315
            # Found it, calling again
mvirkkil@3193
   316
            self.cur = self.cur.lastChild
mvirkkil@3193
   317
            return self.definition_desc(on)
alex@82
   318
        else:
mvirkkil@3193
   319
            if not self._hasContent(self.cur):
mvirkkil@3193
   320
                # Seems no valuable info was added
mvirkkil@3193
   321
                assert(self.cur.nodeName == "glossdef")
mvirkkil@3193
   322
                toRemove = self.cur
mvirkkil@3193
   323
                self.cur = toRemove.parentNode
mvirkkil@3193
   324
                self.cur.removeChild(toRemove)
alex@82
   325
mvirkkil@3193
   326
            while self.cur.nodeName != "glosslist":
mvirkkil@3193
   327
                self.cur = self.cur.parentNode
alex@82
   328
        return ""
alex@82
   329
alex@82
   330
### Links ###########################################################
mvirkkil@3247
   331
    # TODO: Fix anchors to documents which are included. Needs probably to be
mvirkkil@3247
   332
    #       a postprocessing rule. Could be done by having the anchors have
mvirkkil@3247
   333
    #       the "linkend" value of PageName#anchor. Then at post process the
mvirkkil@3247
   334
    #       following would be done for all urls:
mvirkkil@3247
   335
    #        - get all ulinks with an anchor part in their url
mvirkkil@3247
   336
    #        - get the ulink's PageName#anchor -part by removing baseurl part
mvirkkil@3247
   337
    #        - if any of our <anchor> elements have the same PageName#anchor
mvirkkil@3247
   338
    #          value as our <ulink>, then replace the ulink with a link
mvirkkil@3247
   339
    #          element.
mvirkkil@3247
   340
    #       Note: This would the case when someone wants to link to a
mvirkkil@3247
   341
    #             section on the original webpage impossible. The link would
mvirkkil@3247
   342
    #             instead point within the docbook page and not to the webpage.
mvirkkil@3247
   343
alex@82
   344
alex@82
   345
    def pagelink(self, on, pagename='', page=None, **kw):
alex@82
   346
        FormatterBase.pagelink(self, on, pagename, page, **kw)
mvirkkil@3247
   347
        return self.interwikilink(on, 'Self', pagename, **kw)
alex@82
   348
alex@82
   349
    def interwikilink(self, on, interwiki='', pagename='', **kw):
alex@82
   350
        if not on:
mvirkkil@3247
   351
            return self.url(on, **kw)
alex@82
   352
tw@2728
   353
        wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_interwiki(self.request, interwiki, pagename)
alex@82
   354
        wikiurl = wikiutil.mapURL(self.request, wikiurl)
alex@82
   355
        href = wikiutil.join_wiki(wikiurl, wikitail)
mvirkkil@3247
   356
        if kw.has_key("anchor"):
mvirkkil@3247
   357
            href="%s#%s"%(href, kw['anchor'])
alex@82
   358
mvirkkil@3247
   359
        if pagename == self.page.page_name:
mvirkkil@3247
   360
            kw['is_self']=True
mvirkkil@3247
   361
mvirkkil@3247
   362
        return self.url(on, href, **kw)
tw@999
   363
alex@82
   364
    def url(self, on, url=None, css=None, **kw):
mvirkkil@3198
   365
        if url and url.startswith("/"):
mvirkkil@3198
   366
            # convert to absolute path:
mvirkkil@3204
   367
            url = "%s%s"%(self.request.getBaseURL(), url)
mvirkkil@3198
   368
mvirkkil@3198
   369
        if not on:
mvirkkil@3198
   370
            self._cleanupUlinkNode()
mvirkkil@3198
   371
mvirkkil@3247
   372
        if kw.has_key("anchor") and kw.has_key("is_self") and kw["is_self"]:
mvirkkil@3247
   373
            #handle the case where we are pointing to somewhere insidee our own document
mvirkkil@3247
   374
            return self._handleNode("link", on, attributes=(('linkend', kw["anchor"]), ))
mvirkkil@3247
   375
        else:
mvirkkil@3247
   376
            return self._handleNode("ulink", on, attributes=(('url', url), ))
alex@82
   377
alex@82
   378
    def anchordef(self, name):
mvirkkil@3198
   379
        self._handleNode("anchor", True, attributes=(('id', name), ))
mvirkkil@3198
   380
        self._handleNode("anchor", False)
alex@82
   381
        return ""
alex@82
   382
tw@407
   383
    def anchorlink(self, on, name='', **kw):
mvirkkil@3198
   384
        linkid = kw.get('id', None)
alex@82
   385
        attrs = []
alex@82
   386
        if name != '':
alex@82
   387
            attrs.append(('endterm', name))
tw@999
   388
        if id is not None:
mvirkkil@3198
   389
            attrs.append(('linkend', linkid))
alex@82
   390
        elif name != '':
alex@82
   391
            attrs.append(('linkend', name))
alex@82
   392
alex@82
   393
        return self._handleNode("link", on, attrs)
alex@82
   394
tw@407
   395
### Attachments ######################################################
Florian@200
   396
tw@2701
   397
    def attachment_link(self, on, url=None, **kw):
tw@3008
   398
        assert on in (0, 1, False, True) # make sure we get called the new way, not like the 1.5 api was
tw@2701
   399
        # we do not output a "upload link" when outputting docbook
tw@2701
   400
        if on:
tw@3008
   401
            pagename, filename = AttachFile.absoluteName(url, self.page.page_name)
tw@3008
   402
            fname = wikiutil.taintfilename(filename)
tw@3008
   403
            target = AttachFile.getAttachUrl(pagename, filename, self.request)
tw@2701
   404
            return self.url(1, target, title="attachment:%s" % url)
Florian@200
   405
        else:
tw@2701
   406
            return self.url(0)
Florian@200
   407
Florian@200
   408
    def attachment_image(self, url, **kw):
mvirkkil@3201
   409
        """
mvirkkil@3201
   410
        Figures out the absolute path to the image and then hands over to
mvirkkil@3201
   411
        the image function. Any title is also handed over, and an additional
mvirkkil@3201
   412
        title suggestion is made based on filename. The image function will
mvirkkil@3201
   413
        use the suggestion if no other text alternative is found.
mvirkkil@3204
   414
mvirkkil@3201
   415
        If the file is not found, then a simple text will replace it.
mvirkkil@3201
   416
        """
Florian@200
   417
        _ = self.request.getText
Florian@200
   418
        pagename, filename = AttachFile.absoluteName(url, self.page.page_name)
Florian@200
   419
        fname = wikiutil.taintfilename(filename)
Florian@200
   420
        fpath = AttachFile.getFilename(self.request, pagename, fname)
Florian@200
   421
        if not os.path.exists(fpath):
Florian@200
   422
            return self.text("[attachment:%s]" % url)
Florian@200
   423
        else:
mvirkkil@3201
   424
            return self.image(
mvirkkil@3201
   425
                src=AttachFile.getAttachUrl(pagename, filename,
mvirkkil@3201
   426
                                            self.request, addts=1),
mvirkkil@3201
   427
                attachment_title=url,
mvirkkil@3201
   428
                **kw)
mvirkkil@3204
   429
Florian@200
   430
Florian@200
   431
    def attachment_drawing(self, url, text, **kw):
Florian@200
   432
        _ = self.request.getText
Florian@200
   433
        pagename, filename = AttachFile.absoluteName(url, self.page.page_name)
Florian@200
   434
        fname = wikiutil.taintfilename(filename)
Florian@200
   435
        drawing = fname
Florian@200
   436
        fname = fname + ".png"
Florian@200
   437
        filename = filename + ".png"
tw@1791
   438
        fpath = AttachFile.getFilename(self.request, pagename, fname)
Florian@200
   439
        if not os.path.exists(fpath):
Florian@200
   440
            return self.text("[drawing:%s]" % url)
Florian@200
   441
        else:
tw@3158
   442
            src = AttachFile.getAttachUrl(pagename, filename, self.request, addts=1)
tw@3150
   443
            return self.image(alt=drawing, src=src, html_class="drawing")
Florian@200
   444
tw@407
   445
### Images and Smileys ##############################################
alex@82
   446
tw@407
   447
    def image(self, src=None, **kw):
tw@407
   448
        if src:
tw@407
   449
            kw['src'] = src
alex@82
   450
        media = self.doc.createElement('inlinemediaobject')
alex@82
   451
alex@82
   452
        imagewrap = self.doc.createElement('imageobject')
alex@82
   453
        media.appendChild(imagewrap)
alex@82
   454
alex@82
   455
        image = self.doc.createElement('imagedata')
alex@82
   456
        if kw.has_key('src'):
mvirkkil@3201
   457
            src = kw['src']
mvirkkil@3201
   458
            if src.startswith("/"):
mvirkkil@3201
   459
                # convert to absolute path:
mvirkkil@3201
   460
                src = self.request.getBaseURL()+src
mvirkkil@3201
   461
            image.setAttribute('fileref', src)
alex@82
   462
        if kw.has_key('width'):
tw@734
   463
            image.setAttribute('width', str(kw['width']))
alex@82
   464
        if kw.has_key('height'):
tw@734
   465
            image.setAttribute('depth', str(kw['height']))
alex@82
   466
        imagewrap.appendChild(image)
tw@407
   467
mvirkkil@3225
   468
        # Look for any suitable title, order is important.
tw@407
   469
        title = ''
mvirkkil@3201
   470
        for a in ('title', 'html_title', 'alt', 'html_alt', 'attachment_title'):
tw@466
   471
            if kw.has_key(a):
tw@407
   472
                title = kw[a]
tw@407
   473
                break
tw@407
   474
        if title:
alex@82
   475
            txtcontainer = self.doc.createElement('textobject')
mvirkkil@3225
   476
            self._addTextElem(txtcontainer, "phrase", title)
tw@999
   477
            media.appendChild(txtcontainer)
tw@999
   478
alex@82
   479
        self.cur.appendChild(media)
tw@999
   480
        return ""
tw@999
   481
tw@2713
   482
    def transclusion(self, on, **kw):
tw@2713
   483
        # TODO, see text_html formatter
mvirkkil@3195
   484
        self._emitComment('transclusion is not implemented in DocBook formatter')
tw@2713
   485
        return ""
tw@2713
   486
tw@2713
   487
    def transclusion_param(self, **kw):
tw@2713
   488
        # TODO, see text_html formatter
mvirkkil@3195
   489
        self._emitComment('transclusion parameters are not implemented in DocBook formatter')
tw@2713
   490
        return ""
tw@2713
   491
alex@82
   492
    def smiley(self, text):
tw@734
   493
        return self.request.theme.make_icon(text)
alex@82
   494
alex@82
   495
    def icon(self, type):
tw@407
   496
        return '' # self.request.theme.make_icon(type)
alex@82
   497
alex@82
   498
mvirkkil@3190
   499
### Code area #######################################################
tw@407
   500
mvirkkil@3194
   501
    def code_area(self, on, code_id, code_type=None, show=0, start=-1, step=-1):
mvirkkil@3198
   502
        """Creates a formatted code region using screen or programlisting,
mvirkkil@3198
   503
        depending on if a programming language was defined (code_type).
mvirkkil@3198
   504
mvirkkil@3198
   505
        The code_id is not used for anything in this formatter, but is just
mvirkkil@3198
   506
        there to remain compatible with the HTML formatter's function.
mvirkkil@3204
   507
mvirkkil@3204
   508
        Line numbering is supported natively by DocBook so if linenumbering
mvirkkil@3198
   509
        is requested the relevant attribute will be set.
mvirkkil@3198
   510
mvirkkil@3198
   511
        Call once with on=1 to start the region, and a second time
mvirkkil@3198
   512
        with on=0 to end it.
mvirkkil@3198
   513
        """
mvirkkil@3198
   514
mvirkkil@3194
   515
        if not on:
mvirkkil@3194
   516
            return self._handleNode(None, on)
mvirkkil@3194
   517
alex@82
   518
        show = show and 'numbered' or 'unnumbered'
alex@82
   519
        if start < 1:
alex@82
   520
            start = 1
tw@999
   521
mvirkkil@3194
   522
        programming_languages = {"ColorizedJava": "java",
mvirkkil@3194
   523
                                 "ColorizedPython": "python",
mvirkkil@3194
   524
                                 "ColorizedCPlusPlus": "c++",
mvirkkil@3194
   525
                                 "ColorizedPascal": "pascal",
mvirkkil@3194
   526
                                }
mvirkkil@3194
   527
mvirkkil@3198
   528
        if code_type is None:
mvirkkil@3194
   529
            attrs = (('linenumbering', show),
mvirkkil@3194
   530
                     ('startinglinenumber', str(start)),
mvirkkil@3194
   531
                     ('format', 'linespecific'),
mvirkkil@3194
   532
                     )
mvirkkil@3194
   533
            return self._handleNode("screen", on, attributes=attrs)
mvirkkil@3194
   534
        else:
mvirkkil@3198
   535
            if programming_languages.has_key(code_type):
mvirkkil@3198
   536
                code_type = programming_languages[code_type]
mvirkkil@3204
   537
mvirkkil@3194
   538
            attrs = (('linenumbering', show),
mvirkkil@3194
   539
                     ('startinglinenumber', str(start)),
mvirkkil@3194
   540
                     ('language', code_type),
mvirkkil@3194
   541
                     ('format', 'linespecific'),
mvirkkil@3194
   542
                     )
mvirkkil@3194
   543
            return self._handleNode("programlisting", on, attributes=attrs)
alex@82
   544
alex@82
   545
    def code_line(self, on):
mvirkkil@3194
   546
        if on:
mvirkkil@3194
   547
            self.cur.appendChild(self.doc.createTextNode('\n'))
mvirkkil@3194
   548
        return ''
alex@82
   549
alex@82
   550
    def code_token(self, on, tok_type):
mvirkkil@3198
   551
        """
mvirkkil@3198
   552
        DocBook has some support for semantic annotation of code so the
mvirkkil@3198
   553
        known tokens will be mapped to DocBook entities.
mvirkkil@3198
   554
        """
tw@2286
   555
        toks_map = {'ID': 'methodname',
tw@2286
   556
                    'Operator': '',
tw@2286
   557
                    'Char': '',
tw@2286
   558
                    'Comment': 'lineannotation',
tw@2286
   559
                    'Number': '',
tw@2286
   560
                    'String': 'phrase',
tw@2286
   561
                    'SPChar': '',
tw@2286
   562
                    'ResWord': 'token',
tw@2286
   563
                    'ConsWord': 'symbol',
tw@2286
   564
                    'Error': 'errortext',
mvirkkil@3194
   565
                    'ResWord2': 'type',
tw@2286
   566
                    'Special': '',
tw@2286
   567
                    'Preprc': '',
mvirkkil@3194
   568
                    'Text': '',
mvirkkil@3194
   569
                   }
mvirkkil@3194
   570
        if toks_map.has_key(tok_type) and toks_map[tok_type]:
alex@82
   571
            return self._handleFormatting(toks_map[tok_type], on)
alex@82
   572
        else:
alex@82
   573
            return ""
mvirkkil@3190
   574
### Macro ###########################################################
alex@82
   575
tw@2779
   576
    def macro(self, macro_obj, name, args, markup=None):
mvirkkil@3198
   577
        """As far as the DocBook formatter is conserned there are three
mvirkkil@3204
   578
        kinds of macros: Bad, Handled and Unknown.
mvirkkil@3204
   579
mvirkkil@3198
   580
        The Bad ones are the ones that are known not to work, and are on its
mvirkkil@3198
   581
        blacklist. They will be ignored and an XML comment will be written
mvirkkil@3198
   582
        noting that the macro is not supported.
mvirkkil@3204
   583
mvirkkil@3198
   584
        Handled macros are such macros that code is written to handle them.
mvirkkil@3198
   585
        For example for the FootNote macro it means that instead of executing
mvirkkil@3198
   586
        the macro, a DocBook footnote entity is created, with the relevant
mvirkkil@3198
   587
        pieces of information filles in.
mvirkkil@3204
   588
mvirkkil@3198
   589
        The Unknown are handled by executing the macro and capturing any
mvirkkil@3204
   590
        textual output. There shouldn't be any textual output since macros
mvirkkil@3198
   591
        should call formatter methods. This is unfortunately not always true,
mvirkkil@3204
   592
        so the output it is then fed in to an xml parser and the
mvirkkil@3204
   593
        resulting nodes copied to the DocBook-dom tree. If the output is not
mvirkkil@3204
   594
        valid xml then a comment is written in the DocBook that the macro
mvirkkil@3198
   595
        should be fixed.
mvirkkil@3204
   596
mvirkkil@3198
   597
        """
mvirkkil@3225
   598
        # Another alternative would be to feed the output to rawHTML or even
mvirkkil@3225
   599
        # combining these two approaches. The _best_ alternative would be to
mvirkkil@3225
   600
        # fix the macros.
mvirkkil@3225
   601
        excludes=("articleinfo", "title")
mvirkkil@3228
   602
mvirkkil@3185
   603
        if name in self.blacklisted_macros:
mvirkkil@3185
   604
            self._emitComment("The macro %s doesn't work with the DocBook formatter." % name)
mvirkkil@3186
   605
mvirkkil@3186
   606
        elif name == "FootNote":
mvirkkil@3186
   607
            footnote = self.doc.createElement('footnote')
mvirkkil@3225
   608
            self._addTextElem(footnote, "para", str(args))
mvirkkil@3186
   609
            self.cur.appendChild(footnote)
mvirkkil@3186
   610
mvirkkil@3185
   611
        elif name == "Include":
mvirkkil@3228
   612
            was_in_para = self.cur.nodeName == "para"
mvirkkil@3225
   613
            if was_in_para:
mvirkkil@3225
   614
                self.paragraph(0)
tw@4499
   615
            
tw@4499
   616
            # Regular Expression to match editlink arg, remove it because it causes trouble.
tw@4499
   617
            _arg_editlink = r'(,\s*(?P<editlink>editlink))?'
tw@4499
   618
            macro_args = re.sub(_arg_editlink, '', args)
tw@4499
   619
        
tw@4499
   620
            text = FormatterBase.macro(self, macro_obj, name, macro_args)
mvirkkil@3184
   621
            if text.strip():
mvirkkil@3225
   622
                self._copyExternalNodes(Sax.FromXml(text).documentElement.childNodes, exclude=excludes)
mvirkkil@3225
   623
            if was_in_para:
mvirkkil@3225
   624
                self.paragraph(1)
mvirkkil@3198
   625
mvirkkil@3184
   626
        else:
mvirkkil@3185
   627
            text = FormatterBase.macro(self, macro_obj, name, args)
mvirkkil@3185
   628
            if text:
mvirkkil@3185
   629
                from xml.parsers.expat import ExpatError
mvirkkil@3185
   630
                try:
mvirkkil@3198
   631
                    xml_dom = Sax.FromXml(text).documentElement.childNodes
mvirkkil@3225
   632
                    self._copyExternalNodes(xml_dom, exclude=excludes)
mvirkkil@3185
   633
                except ExpatError:
mvirkkil@3185
   634
                    self._emitComment("The macro %s caused an error and should be blacklisted. It returned the data '%s' which caused the docbook-formatter to choke. Please file a bug." % (name, text))
mvirkkil@3185
   635
alex@82
   636
        return u""
alex@82
   637
mvirkkil@3190
   638
### Util functions ##################################################
mvirkkil@3190
   639
mvirkkil@3184
   640
    def _copyExternalNodes(self, nodes, deep=1, target=None, exclude=()):
mvirkkil@3184
   641
        if not target:
mvirkkil@3184
   642
            target = self.cur
mvirkkil@3184
   643
mvirkkil@3184
   644
        for node in nodes:
mvirkkil@3184
   645
            if node.nodeName in exclude:
mvirkkil@3184
   646
                pass
mvirkkil@3184
   647
            elif target.nodeName == "para" and node.nodeName == "para":
mvirkkil@3184
   648
                self._copyExternalNodes(node.childNodes, target=target)
mvirkkil@3184
   649
                self.cur = target.parentNode
mvirkkil@3184
   650
            else:
mvirkkil@3184
   651
                target.appendChild(self.doc.importNode(node, deep))
mvirkkil@3184
   652
mvirkkil@3184
   653
    def _emitComment(self, text):
mvirkkil@3184
   654
        text = text.replace("--", "- -") # There cannot be "--" in XML comment
mvirkkil@3184
   655
        self.cur.appendChild(self.doc.createComment(text))
mvirkkil@3184
   656
mvirkkil@3190
   657
    def _handleNode(self, name, on, attributes=()):
mvirkkil@3190
   658
        if on:
mvirkkil@3190
   659
            node = self.doc.createElement(name)
mvirkkil@3190
   660
            self.cur.appendChild(node)
mvirkkil@3190
   661
            if len(attributes) > 0:
mvirkkil@3190
   662
                for name, value in attributes:
mvirkkil@3190
   663
                    node.setAttribute(name, value)
mvirkkil@3190
   664
            self.cur = node
mvirkkil@3190
   665
        else:
mvirkkil@3190
   666
            """
mvirkkil@3198
   667
                Because we prevent para inside para, we might get extra "please
mvirkkil@3198
   668
                exit para" when we are no longer inside one.
mvirkkil@3190
   669
mvirkkil@3190
   670
                TODO: Maybe rethink the para in para case
mvirkkil@3190
   671
            """
mvirkkil@3190
   672
            if name == "para" and self.cur.nodeName != "para":
mvirkkil@3190
   673
                return ""
mvirkkil@3190
   674
mvirkkil@3190
   675
            self.cur = self.cur.parentNode
mvirkkil@3190
   676
        return ""
mvirkkil@3190
   677
mvirkkil@3190
   678
    def _handleFormatting(self, name, on, attributes=()):
mvirkkil@3190
   679
        # We add all the elements we create to the list of elements that should not contain a section
mvirkkil@3190
   680
        if name not in self.section_should_break:
mvirkkil@3190
   681
            self.section_should_break.append(name)
mvirkkil@3190
   682
        return self._handleNode(name, on, attributes)
mvirkkil@3190
   683
mvirkkil@3192
   684
    def _isInsidePreformatted(self):
mvirkkil@3192
   685
        """Walks all parents and checks if one is of a preformatted type, which
mvirkkil@3192
   686
           means the child would need to be preformatted == embedded in a cdata
mvirkkil@3192
   687
           section"""
mvirkkil@3192
   688
        n = self.cur
mvirkkil@3192
   689
        while n:
mvirkkil@3192
   690
            if n.nodeName in ("screen", "programlisting"):
mvirkkil@3192
   691
                return True
mvirkkil@3192
   692
            n = n.parentNode
mvirkkil@3192
   693
        return False
mvirkkil@3192
   694
mvirkkil@3193
   695
    def _hasContent(self, node):
mvirkkil@3193
   696
        if node.attributes and len(node.attributes):
mvirkkil@3193
   697
            return True
mvirkkil@3193
   698
        for child in node.childNodes:
mvirkkil@3193
   699
            if child.nodeType == Node.TEXT_NODE and child.nodeValue.strip():
mvirkkil@3193
   700
                return True
mvirkkil@3193
   701
            elif child.nodeType == Node.CDATA_SECTION_NODE and child.nodeValue.strip():
mvirkkil@3193
   702
                return True
mvirkkil@3193
   703
mvirkkil@3193
   704
            if self._hasContent(child):
mvirkkil@3193
   705
                return True
mvirkkil@3193
   706
        return False
mvirkkil@3193
   707
mvirkkil@3183
   708
    def _addTitleElement(self, titleTxt, targetNode=None):
mvirkkil@3183
   709
        if not targetNode:
mvirkkil@3183
   710
            targetNode = self.cur
mvirkkil@3225
   711
        self._addTextElem(targetNode, "title", titleTxt)
mvirkkil@3183
   712
mvirkkil@3189
   713
    def _convertStylesToDict(self, styles):
mvirkkil@3189
   714
        '''Takes the CSS styling information and converts it to a dict'''
mvirkkil@3189
   715
        attrs = {}
mvirkkil@3189
   716
        for s in styles.split(";"):
mvirkkil@3189
   717
            if s.strip(' "') == "":
mvirkkil@3189
   718
                continue
mvirkkil@3282
   719
            if ":" not in s:
mvirkkil@3258
   720
                continue
mvirkkil@3189
   721
            (key, value) = s.split(":", 1)
mvirkkil@3189
   722
            key = key.strip(' "')
mvirkkil@3189
   723
            value = value.strip(' "')
mvirkkil@3189
   724
mvirkkil@3189
   725
            if key == 'vertical-align':
mvirkkil@3189
   726
                key = 'valign'
mvirkkil@3189
   727
            elif key == 'text-align':
mvirkkil@3189
   728
                key = 'align'
mvirkkil@3189
   729
            elif key == 'background-color':
mvirkkil@3189
   730
                key = 'bgcolor'
mvirkkil@3189
   731
mvirkkil@3189
   732
            attrs[key] = value
mvirkkil@3189
   733
        return attrs
mvirkkil@3189
   734
mvirkkil@3198
   735
    def _cleanupUlinkNode(self):
mvirkkil@3198
   736
        """
mvirkkil@3204
   737
        Moin adds the url as the text to a link, if no text is specified.
mvirkkil@3198
   738
        Docbook does it when a docbook is rendered, so we don't want moin to
mvirkkil@3198
   739
        do it and so if the url is exactly the same as the text node inside
mvirkkil@3198
   740
        the ulink, we remove the text node.
mvirkkil@3198
   741
        """
mvirkkil@3198
   742
        if self.cur.nodeName == "ulink" and len(self.cur.childNodes) == 1 \
mvirkkil@3198
   743
                and self.cur.firstChild.nodeType == Node.TEXT_NODE \
mvirkkil@3198
   744
                and self.cur.firstChild.nodeValue.strip() == self.cur.getAttribute('url').strip():
mvirkkil@3198
   745
            self.cur.removeChild(self.cur.firstChild)
mvirkkil@3228
   746
mvirkkil@3225
   747
    def _addTextElem(self, target, elemName, text):
mvirkkil@3227
   748
        """
mvirkkil@3227
   749
        Creates an element of the name elemName and adds a text node to it
mvirkkil@3227
   750
        with the nodeValue of text. The new element is then added as a child
mvirkkil@3227
   751
        to the element target.
mvirkkil@3227
   752
        """
mvirkkil@3227
   753
        newElement = self.doc.createElement(elemName)
mvirkkil@3227
   754
        newElement.appendChild(self.doc.createTextNode(text))
mvirkkil@3227
   755
        target.appendChild(newElement)
mvirkkil@3228
   756
mvirkkil@3228
   757
mvirkkil@3225
   758
    def _addRevisionHistory(self, targetNode):
mvirkkil@3225
   759
        """
mvirkkil@3225
   760
        This will generate a revhistory element which it will populate with
mvirkkil@3225
   761
        revision nodes. Each revision has the revnumber, date and author-
mvirkkil@3225
   762
        initial elements, and if a comment was supplied, the comment element.
mvirkkil@3228
   763
mvirkkil@3225
   764
        The date elements format depends on the users settings, so it will
mvirkkil@3225
   765
        be in the same format as the revision history as viewed in the
mvirkkil@3225
   766
        page info on the wiki.
mvirkkil@3228
   767
mvirkkil@3225
   768
        The authorinitials will be the UserName or if it was an anonymous
mvirkkil@3225
   769
        edit, then it will be the hostname/ip-address.
mvirkkil@3228
   770
mvirkkil@3225
   771
        The revision history of included documents is NOT included at the
mvirkkil@3225
   772
        moment due to technical difficulties.
mvirkkil@3225
   773
        """
mvirkkil@3291
   774
        _ = self.request.getText
mvirkkil@3225
   775
        log = editlog.EditLog(self.request, rootpagename=self.title)
mvirkkil@3228
   776
        user_cache = {}
mvirkkil@3228
   777
mvirkkil@3225
   778
        history = self.doc.createElement("revhistory")
mvirkkil@3225
   779
mvirkkil@3225
   780
        # read in the complete log of this page
mvirkkil@3225
   781
        for line in log.reverse():
mvirkkil@3225
   782
            if not line.action in ('SAVE', 'SAVENEW', 'SAVE/REVERT', 'SAVE/RENAME', ):
mvirkkil@3225
   783
                #Let's ignore adding of attachments
mvirkkil@3225
   784
                continue
mvirkkil@3225
   785
            revision = self.doc.createElement("revision")
mvirkkil@3228
   786
mvirkkil@3225
   787
            # Revision number (without preceeding zeros)
mvirkkil@3225
   788
            self._addTextElem(revision, "revnumber", line.rev.lstrip('0'))
mvirkkil@3228
   789
mvirkkil@3225
   790
            # Date of revision
mvirkkil@3225
   791
            date_text = self.request.user.getFormattedDateTime(
mvirkkil@3225
   792
                wikiutil.version2timestamp(line.ed_time_usecs))
mvirkkil@3225
   793
            self._addTextElem(revision, "date", date_text)
mvirkkil@3225
   794
mvirkkil@3225
   795
            # Author or revision
mvirkkil@3225
   796
            if not (line.userid in user_cache):
mvirkkil@3225
   797
                user_cache[line.userid] = user.User(self.request, line.userid, auth_method="text_docbook:740")
mvirkkil@3225
   798
            author = user_cache[line.userid]
mvirkkil@3225
   799
            if author and author.name:
mvirkkil@3225
   800
                self._addTextElem(revision, "authorinitials", author.name)
mvirkkil@3225
   801
            else:
mvirkkil@3225
   802
                self._addTextElem(revision, "authorinitials", line.hostname)
mvirkkil@3225
   803
mvirkkil@3225
   804
            # Comment from author of revision
mvirkkil@3225
   805
            comment = line.comment
mvirkkil@3225
   806
            if not comment:
mvirkkil@3225
   807
                if '/REVERT' in line.action:
mvirkkil@3225
   808
                    comment = _("Revert to revision %(rev)d.") % {'rev': int(line.extra)}
mvirkkil@3225
   809
                elif '/RENAME' in line.action:
mvirkkil@3225
   810
                    comment = _("Renamed from '%(oldpagename)s'.") % {'oldpagename': line.extra}
mvirkkil@3225
   811
            if comment:
mvirkkil@3225
   812
                self._addTextElem(revision, "revremark", comment)
mvirkkil@3225
   813
mvirkkil@3225
   814
            history.appendChild(revision)
mvirkkil@3225
   815
mvirkkil@3245
   816
        if history.firstChild:
mvirkkil@3245
   817
            #only add revision history is there is history to add
mvirkkil@3245
   818
            targetNode.appendChild(history)
mvirkkil@3183
   819
alex@82
   820
### Not supported ###################################################
tw@407
   821
tw@999
   822
    def rule(self, size=0, **kw):
mvirkkil@3195
   823
        self._emitComment('rule (<hr>) is not applicable to DocBook')
alex@82
   824
        return ""
alex@82
   825
tw@407
   826
    def small(self, on, **kw):
mvirkkil@3195
   827
        if on:
mvirkkil@3195
   828
            self._emitComment('"~-smaller-~" is not applicable to DocBook')
alex@82
   829
        return ""
alex@82
   830
tw@407
   831
    def big(self, on, **kw):
mvirkkil@3195
   832
        if on:
mvirkkil@3195
   833
            self._emitComment('"~+bigger+~" is not applicable to DocBook')
alex@82
   834
        return ""
tw@407
   835
mvirkkil@3196
   836
    def rawHTML(self, markup):
mvirkkil@3196
   837
        if markup.strip() == "":
mvirkkil@3196
   838
            return ""
mvirkkil@3196
   839
mvirkkil@3196
   840
        if "<" not in markup and ">" not in markup:
mvirkkil@3196
   841
            # Seems there are no tags.
mvirkkil@3196
   842
            # Let's get all the "entity references".
mvirkkil@3196
   843
            cleaned = markup
mvirkkil@3196
   844
            import re
mvirkkil@3196
   845
            entities = re.compile("&(?P<e>[a-zA-Z]+);").findall(cleaned)
mvirkkil@3196
   846
            from htmlentitydefs import name2codepoint
mvirkkil@3196
   847
            for ent in entities:
mvirkkil@3196
   848
                if name2codepoint.has_key(ent):
mvirkkil@3196
   849
                    cleaned = cleaned.replace("&%s;" % ent, unichr(name2codepoint[ent]))
mvirkkil@3196
   850
mvirkkil@3196
   851
            # Then we replace all escaped unicodes.
mvirkkil@3196
   852
            escapedunicodes = re.compile("&#(?P<h>[0-9]+);").findall(markup)
mvirkkil@3196
   853
            for uni in escapedunicodes:
mvirkkil@3196
   854
                cleaned = cleaned.replace("&#%s;" % uni, unichr(int(uni)))
mvirkkil@3196
   855
mvirkkil@3196
   856
            self.text(cleaned)
mvirkkil@3196
   857
mvirkkil@3196
   858
        self._emitComment("RAW HTML: "+markup)
mvirkkil@3196
   859
        return ""
mvirkkil@3196
   860
mvirkkil@3200
   861
    def div(self, on, **kw):
mvirkkil@3200
   862
        """A div cannot really be supported in DocBook as it carries no
mvirkkil@3206
   863
        semantic meaning, but the special cases can be handled when the class
mvirkkil@3206
   864
        of the div carries the information.
mvirkkil@3204
   865
mvirkkil@3206
   866
        A dictionary is used for mapping between class names and the
mvirkkil@3206
   867
        corresponding DocBook element.
mvirkkil@3204
   868
mvirkkil@3206
   869
        A MoinMoin comment is represented in DocBook by the remark element.
mvirkkil@3206
   870
mvirkkil@3206
   871
        The rest of the known classes are the admonitions in DocBook:
mvirkkil@3206
   872
        warning, caution, important, note and hint
mvirkkil@3204
   873
mvirkkil@3200
   874
        Note: The remark entity can only contain inline elements, so it is
mvirkkil@3206
   875
              very likely that the use of a comment div will produce invalid
mvirkkil@3206
   876
              DocBook.
mvirkkil@3200
   877
        """
mvirkkil@3225
   878
        # Map your styles to docbook elements.
mvirkkil@3225
   879
        # Even though comment is right now the only one that needs to be
mvirkkil@3225
   880
        # mapped, having two different ways is more complicated than having
mvirkkil@3225
   881
        # a single common way. Code clarity and generality first, especially
mvirkkil@3225
   882
        # since we might want to do more div to docbook mappings in the future.
mvirkkil@3206
   883
        class_to_docbook = {"warning":   "warning",
mvirkkil@3206
   884
                            "caution":   "caution",
mvirkkil@3206
   885
                            "important": "important",
mvirkkil@3206
   886
                            "note":      "note",
mvirkkil@3206
   887
                            "tip":       "tip",
mvirkkil@3206
   888
                            "comment":   "remark"}
mvirkkil@3206
   889
mvirkkil@3206
   890
        if on and kw.get('css_class'):
mvirkkil@3206
   891
            css_classes = kw.get('css_class').split()
mvirkkil@3206
   892
            for style in class_to_docbook.keys():
mvirkkil@3206
   893
                if style in css_classes:
mvirkkil@3206
   894
                    return self._handleNode(class_to_docbook[style], on)
mvirkkil@3206
   895
mvirkkil@3206
   896
        elif not on:
mvirkkil@3206
   897
            if self.cur.nodeName in class_to_docbook.values():
mvirkkil@3206
   898
                return self._handleNode(self.cur.nodeName, on)
mvirkkil@3206
   899
mvirkkil@3200
   900
        return ""
mvirkkil@3204
   901
mvirkkil@3200
   902
    def span(self, on, **kw):
mvirkkil@3200
   903
        """A span cannot really be supported in DocBook as it carries no
mvirkkil@3200
   904
        semantic meaning, but the special case of a comment can be handled.
mvirkkil@3204
   905
mvirkkil@3200
   906
        A comment is represented in DocBook by the remark element.
mvirkkil@3204
   907
mvirkkil@3204
   908
        A comment span is recognized by the fact that it has the class
mvirkkil@3200
   909
        "comment". Other cases of div use are ignored.
mvirkkil@3200
   910
        """
mvirkkil@3200
   911
        css_class = kw.get('css_class')
mvirkkil@3200
   912
        if on and css_class and 'comment' in css_class.split():
mvirkkil@3200
   913
            self._handleFormatting("remark", on)
mvirkkil@3200
   914
        if not on and self.cur.nodeName == "remark":
mvirkkil@3200
   915
            self._handleFormatting("remark", on)
mvirkkil@3200
   916
        return ""
mvirkkil@3204
   917
mvirkkil@3204
   918
mvirkkil@3204
   919
mvirkkil@3189
   920
### Tables ##########################################################
mvirkkil@3189
   921
mvirkkil@3189
   922
    def table(self, on, attrs=(), **kw):
mvirkkil@3189
   923
        if(on):
mvirkkil@3291
   924
            if attrs:
mvirkkil@3291
   925
                self.curtable = Table(self, self.doc, self.cur, dict(attrs))
mvirkkil@3291
   926
            else:
mvirkkil@3291
   927
                self.curtable = Table(self, self.doc, self.cur)
mvirkkil@3189
   928
            self.cur = self.curtable.tableNode
mvirkkil@3189
   929
        else:
mvirkkil@3189
   930
            self.cur = self.curtable.finalizeTable()
mvirkkil@3189
   931
            self.curtable = None
mvirkkil@3189
   932
        return ""
mvirkkil@3189
   933
mvirkkil@3189
   934
    def table_row(self, on, attrs=(), **kw):
mvirkkil@3189
   935
        if(on):
mvirkkil@3291
   936
            if attrs:
mvirkkil@3291
   937
                self.curtable.addRow(dict(attrs))
mvirkkil@3291
   938
            else:
mvirkkil@3291
   939
                self.cur = self.curtable.addRow()
mvirkkil@3189
   940
        return ""
mvirkkil@3189
   941
mvirkkil@3189
   942
    def table_cell(self, on, attrs=(), **kw):
mvirkkil@3189
   943
        if(on):
mvirkkil@3291
   944
            if attrs:
mvirkkil@3291
   945
                self.cur = self.curtable.addCell(dict(attrs))
mvirkkil@3291
   946
            else:
mvirkkil@3291
   947
                self.cur = self.curtable.addCell()
mvirkkil@3189
   948
        return ""
mvirkkil@3189
   949
mvirkkil@3189
   950
class Table:
mvirkkil@3189
   951
    '''The Table class is used as a helper for collecting information about
mvirkkil@3189
   952
    what kind of table we are building. When all relelvant data is gathered
mvirkkil@3189
   953
    it calculates the different spans of the cells and columns.
mvirkkil@3291
   954
mvirkkil@3291
   955
    Note that it expects all arguments to be passed in a dict.
mvirkkil@3189
   956
    '''
mvirkkil@3189
   957
mvirkkil@3291
   958
    def __init__(self, formatter, doc, parent, argsdict={}):
mvirkkil@3189
   959
        self.formatter = formatter
mvirkkil@3189
   960
        self.doc = doc
mvirkkil@3189
   961
mvirkkil@3189
   962
        self.tableNode = self.doc.createElement('informaltable')
mvirkkil@3189
   963
        parent.appendChild(self.tableNode)
mvirkkil@3189
   964
        self.colWidths = {}
mvirkkil@3189
   965
        self.tgroup = self.doc.createElement('tgroup')
mvirkkil@3189
   966
        # Bug in yelp, the two lines below don't affect rendering
mvirkkil@3189
   967
        #self.tgroup.setAttribute('rowsep', '1')
mvirkkil@3189
   968
        #self.tgroup.setAttribute('colsep', '1')
mvirkkil@3189
   969
        self.curColumn = 0
mvirkkil@3189
   970
        self.maxColumn = 0
mvirkkil@3189
   971
        self.row = None
mvirkkil@3189
   972
        self.tableNode.appendChild(self.tgroup)
mvirkkil@3189
   973
mvirkkil@3189
   974
        self.tbody = self.doc.createElement('tbody') # Note: This gets appended in finalizeTable
mvirkkil@3189
   975
mvirkkil@3189
   976
    def finalizeTable(self):
mvirkkil@3189
   977
        """Calculates the final width of the whole table and the width of each
mvirkkil@3189
   978
        column. Adds the colspec-elements and applies the colwidth attributes.
mvirkkil@3189
   979
        Inserts the tbody element to the tgroup and returns the tables container
mvirkkil@3189
   980
        element.
mvirkkil@3204
   981
mvirkkil@3189
   982
        A lot of the information is gathered from the style attributes passed
mvirkkil@3189
   983
        to the functions
mvirkkil@3189
   984
        """
mvirkkil@3189
   985
        self.tgroup.setAttribute('cols', str(self.maxColumn))
mvirkkil@3189
   986
        for colnr in range(0, self.maxColumn):
mvirkkil@3189
   987
            colspecElem = self.doc.createElement('colspec')
mvirkkil@3189
   988
            colspecElem.setAttribute('colname', 'col_%s' % str(colnr))
mvirkkil@3189
   989
            if self.colWidths.has_key(str(colnr)) and self.colWidths[str(colnr)] != "1*":
mvirkkil@3189
   990
                colspecElem.setAttribute('colwidth', self.colWidths[str(colnr)])
mvirkkil@3189
   991
            self.tgroup.appendChild(colspecElem)
mvirkkil@3189
   992
        self.tgroup.appendChild(self.tbody)
mvirkkil@3189
   993
        return self.tableNode.parentNode
mvirkkil@3189
   994
mvirkkil@3291
   995
    def addRow(self, argsdict={}):
mvirkkil@3189
   996
        self.curColumn = 0
mvirkkil@3189
   997
        self.row = self.doc.createElement('row')
mvirkkil@3189
   998
        # Bug in yelp, doesn't affect the outcome.
mvirkkil@3291
   999
        self.row.setAttribute("rowsep", "1") #Rows should have lines between them
mvirkkil@3189
  1000
        self.tbody.appendChild(self.row)
mvirkkil@3189
  1001
        return self.row
mvirkkil@3189
  1002
mvirkkil@3291
  1003
    def addCell(self, argsdict={}):
mvirkkil@3291
  1004
        if 'style' in argsdict:
mvirkkil@3291
  1005
            argsdict.update(self.formatter._convertStylesToDict(argsdict['style'].strip('"')))
mvirkkil@3291
  1006
mvirkkil@3189
  1007
        cell = self.doc.createElement('entry')
mvirkkil@3189
  1008
        cell.setAttribute('rowsep', '1')
mvirkkil@3189
  1009
        cell.setAttribute('colsep', '1')
mvirkkil@3189
  1010
mvirkkil@3189
  1011
        self.row.appendChild(cell)
mvirkkil@3291
  1012
        self._handleSimpleCellAttributes(cell, argsdict)
mvirkkil@3291
  1013
        self._handleColWidth(argsdict)
mvirkkil@3291
  1014
        self.curColumn += self._handleColSpan(cell, argsdict)
mvirkkil@3189
  1015
mvirkkil@3189
  1016
        self.maxColumn = max(self.curColumn, self.maxColumn)
mvirkkil@3189
  1017
mvirkkil@3189
  1018
        return cell
mvirkkil@3189
  1019
mvirkkil@3291
  1020
    def _handleColWidth(self, argsdict={}):
mvirkkil@3291
  1021
        if not argsdict.has_key("width"):
mvirkkil@3189
  1022
            return
mvirkkil@3291
  1023
        argsdict["width"] = argsdict["width"].strip('"')
mvirkkil@3291
  1024
        if not argsdict["width"].endswith("%"):
mvirkkil@3291
  1025
            self.formatter._emitComment("Width %s not supported" % argsdict["width"])
mvirkkil@3189
  1026
            return
mvirkkil@3189
  1027
mvirkkil@3291
  1028
        self.colWidths[str(self.curColumn)] = argsdict["width"][:-1] + "*"
mvirkkil@3189
  1029
mvirkkil@3291
  1030
    def _handleColSpan(self, element, argsdict={}):
mvirkkil@3189
  1031
        """Returns the number of colums this entry spans"""
mvirkkil@3291
  1032
        if not argsdict or not argsdict.has_key('colspan'):
mvirkkil@3189
  1033
            return 1
mvirkkil@3189
  1034
        assert(element.nodeName == "entry")
mvirkkil@3291
  1035
        extracols = int(argsdict['colspan'].strip('"')) - 1
mvirkkil@3189
  1036
        element.setAttribute('namest', "col_" + str(self.curColumn))
mvirkkil@3189
  1037
        element.setAttribute('nameend', "col_" + str(self.curColumn + extracols))
mvirkkil@3189
  1038
        return 1 + extracols
mvirkkil@3189
  1039
mvirkkil@3291
  1040
    def _handleSimpleCellAttributes(self, element, argsdict={}):
mvirkkil@3291
  1041
        if not argsdict:
mvirkkil@3291
  1042
            return
mvirkkil@3291
  1043
        assert(element.nodeName == "entry")
mvirkkil@3291
  1044
mvirkkil@3189
  1045
        safe_values_for = {'valign': ('top', 'middle', 'bottom'),
mvirkkil@3189
  1046
                           'align': ('left', 'center', 'right'),
mvirkkil@3189
  1047
                          }
mvirkkil@3189
  1048
mvirkkil@3291
  1049
        if argsdict.has_key('rowspan'):
mvirkkil@3291
  1050
            extrarows = int(argsdict['rowspan'].strip('"')) - 1
mvirkkil@3189
  1051
            element.setAttribute('morerows', str(extrarows))
mvirkkil@3189
  1052
mvirkkil@3291
  1053
        if argsdict.has_key('align'):
mvirkkil@3291
  1054
            value = argsdict['align'].strip('"')
mvirkkil@3189
  1055
            if value in safe_values_for['align']:
mvirkkil@3189
  1056
                element.setAttribute('align', value)
mvirkkil@3189
  1057
            else:
mvirkkil@3189
  1058
                self.formatter._emitComment("Alignment %s not supported" % value)
mvirkkil@3189
  1059
                pass
mvirkkil@3189
  1060
mvirkkil@3291
  1061
        if argsdict.has_key('valign'):
mvirkkil@3291
  1062
            value = argsdict['valign'].strip('"')
mvirkkil@3189
  1063
            if value in safe_values_for['valign']:
mvirkkil@3189
  1064
                element.setAttribute('valign', value)
mvirkkil@3189
  1065
            else:
mvirkkil@3189
  1066
                self.formatter._emitComment("Vertical alignment %s not supported" % value)
mvirkkil@3189
  1067
                pass
mvirkkil@3189
  1068
mvirkkil@3189
  1069