# HG changeset patch # User Thomas Waldmann # Date 1337473067 -7200 # Node ID 223789ea05b23eaee55794e26f4015c7a5032aa0 # Parent a70598fc5ca61ee7a075c87e586028e93d582a3c add polyglot writer (for xmlish html5), add/fix tests tests: add some toplevel element that is not tested (avoids namespace issues that shall not get tested there) add tests for xml and polyglot writing, checking whether void and empty elements get written correctly diff -r a70598fc5ca6 -r 223789ea05b2 emeraldtree/tests/test_html.py --- a/emeraldtree/tests/test_html.py Sun May 20 01:02:20 2012 +0200 +++ b/emeraldtree/tests/test_html.py Sun May 20 02:17:47 2012 +0200 @@ -2,10 +2,10 @@ from .. import html, tree -def serialize(elem): +def serialize(elem, method): from StringIO import StringIO file = StringIO() - elem.write(file.write, method='html') + elem.write(file.write, method=method) return file.getvalue() def test_read_simple1(): @@ -46,5 +46,11 @@ assert len(elem) == 0 def test_write(): - elem = html.HTML(u'
') - assert serialize(elem) == u'
' + elem = html.HTML(u'

') + h = serialize(elem, 'html') + p = serialize(elem, 'polyglot') + x = serialize(elem, 'xml') + assert u'

' in h + assert u'

' in p + assert u'

' in x + diff -r a70598fc5ca6 -r 223789ea05b2 emeraldtree/tree.py --- a/emeraldtree/tree.py Sun May 20 01:02:20 2012 +0200 +++ b/emeraldtree/tree.py Sun May 20 02:17:47 2012 +0200 @@ -82,6 +82,8 @@ Writer = XMLWriter elif method == "html": Writer = HTMLWriter + elif method == "polyglot": + Writer = PolyglotWriter else: Writer = TextWriter @@ -1381,3 +1383,38 @@ for e in elem: self.serialize(write, e, qnames) + +class PolyglotWriter(MLBaseWriter): + """write a document that is valid html5 AND well-formed xml, + see http://www.w3.org/TR/html-polyglot/ """ + void_elements = frozenset(('area', 'base', 'br', 'col', 'command', 'embed', 'hr', + 'img', 'input', 'keygen', 'link', 'meta', 'param', + 'source', 'track', 'wbr')) + + def __init__(self, encoding=None, namespaces={}): + namespaces["http://www.w3.org/1999/xhtml"] = '' + super(PolyglotWriter, self).__init__(encoding, namespaces) + + def _serialize_element(self, write, elem, qnames, namespaces): + tag = qnames[elem.tag] + + if tag is not None: + attrib_str = self._attrib_string(elem.attrib, qnames) + namespace_str = self._namespace_string(namespaces) + if len(elem): + write(u"<%s%s%s>" % (tag, attrib_str, namespace_str)) + for e in elem: + self.serialize(write, e, qnames) + write(u"" % tag) + elif tag in self.void_elements: + write(u"<%s%s%s />" % (tag, attrib_str, namespace_str)) + else: + write(u"<%s%s%s>" % (tag, attrib_str, namespace_str, tag)) + + else: + for e in elem: + self.serialize(write, e, qnames) + + def serialize_start(self, write): + write(u"\n") +