changeset 84:223789ea05b2

add polyglot writer (for xmlish html5), add/fix tests tests: add some toplevel element that is not tested (avoids namespace issues that shall not get tested there) add tests for xml and polyglot writing, checking whether void and empty elements get written correctly
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sun, 20 May 2012 02:17:47 +0200
parents a70598fc5ca6
children ef22f92aa803
files emeraldtree/tests/test_html.py emeraldtree/tree.py
diffstat 2 files changed, 47 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/emeraldtree/tests/test_html.py	Sun May 20 01:02:20 2012 +0200
+++ b/emeraldtree/tests/test_html.py	Sun May 20 02:17:47 2012 +0200
@@ -2,10 +2,10 @@
 
 from .. import html, tree
 
-def serialize(elem):
+def serialize(elem, method):
     from StringIO import StringIO
     file = StringIO()
-    elem.write(file.write, method='html')
+    elem.write(file.write, method=method)
     return file.getvalue()
 
 def test_read_simple1():
@@ -46,5 +46,11 @@
     assert len(elem) == 0
 
 def test_write():
-    elem = html.HTML(u'<br>')
-    assert serialize(elem) == u'<br>'
+    elem = html.HTML(u'<html><br><p></html>')
+    h = serialize(elem, 'html')
+    p = serialize(elem, 'polyglot')
+    x = serialize(elem, 'xml')
+    assert u'<br><p>' in h
+    assert u'<br /><p></p>' in p
+    assert u'<br /><p />' in x
+
--- a/emeraldtree/tree.py	Sun May 20 01:02:20 2012 +0200
+++ b/emeraldtree/tree.py	Sun May 20 02:17:47 2012 +0200
@@ -82,6 +82,8 @@
             Writer = XMLWriter
         elif method == "html":
             Writer = HTMLWriter
+        elif method == "polyglot":
+            Writer = PolyglotWriter
         else:
             Writer = TextWriter
 
@@ -1381,3 +1383,38 @@
             for e in elem:
                 self.serialize(write, e, qnames)
 
+
+class PolyglotWriter(MLBaseWriter):
+    """write a document that is valid html5 AND well-formed xml,
+       see http://www.w3.org/TR/html-polyglot/ """
+    void_elements = frozenset(('area', 'base', 'br', 'col', 'command', 'embed', 'hr',
+                               'img', 'input', 'keygen', 'link', 'meta', 'param',
+                               'source', 'track', 'wbr'))
+
+    def __init__(self, encoding=None, namespaces={}):
+        namespaces["http://www.w3.org/1999/xhtml"] = ''
+        super(PolyglotWriter, self).__init__(encoding, namespaces)
+
+    def _serialize_element(self, write, elem, qnames, namespaces):
+        tag = qnames[elem.tag]
+
+        if tag is not None:
+            attrib_str = self._attrib_string(elem.attrib, qnames)
+            namespace_str = self._namespace_string(namespaces)
+            if len(elem):
+                write(u"<%s%s%s>" % (tag, attrib_str, namespace_str))
+                for e in elem:
+                    self.serialize(write, e, qnames)
+                write(u"</%s>" % tag)
+            elif tag in self.void_elements:
+                write(u"<%s%s%s />" % (tag, attrib_str, namespace_str))
+            else:
+                write(u"<%s%s%s></%s>" % (tag, attrib_str, namespace_str, tag))
+
+        else:
+            for e in elem:
+                self.serialize(write, e, qnames)
+
+    def serialize_start(self, write):
+        write(u"<!DOCTYPE html>\n")
+