comparison emeraldtree/html.py @ 68:d7e235461c97

HTML Support - Change imports to internal names
author Bastian Blank <bblank@thinkmo.de>
date Sun, 30 May 2010 17:39:14 +0200
parents 672b181a8ce0
children 54c60c7e7e35
comparison
equal deleted inserted replaced
67:47b1c1aa1a2b 68:d7e235461c97
1 # 1 #
2 # The ElementTree toolkit is 2 # The ElementTree toolkit is
3 # 3 #
4 # Copyright (c) 1999-2007 by Fredrik Lundh 4 # Copyright (c) 1999-2007 by Fredrik Lundh
5 # 2008-2009 Bastian Blank <bblank@thinkmo.de> 5 # 2008-2010 Bastian Blank <bblank@thinkmo.de>
6 # 6 #
7 # By obtaining, using, and/or copying this software and/or its 7 # By obtaining, using, and/or copying this software and/or its
8 # associated documentation, you agree that you have read, understood, 8 # associated documentation, you agree that you have read, understood,
9 # and will comply with the following terms and conditions: 9 # and will comply with the following terms and conditions:
10 # 10 #
34 import htmlentitydefs 34 import htmlentitydefs
35 import re 35 import re
36 import mimetools, StringIO 36 import mimetools, StringIO
37 from HTMLParser import HTMLParser as HTMLParserBase 37 from HTMLParser import HTMLParser as HTMLParserBase
38 38
39 import ElementTree 39 from . import tree
40 40
41 41
42 ## 42 ##
43 # ElementTree builder for HTML source code. This builder converts an 43 # ElementTree builder for HTML source code. This builder converts an
44 # HTML document or fragment to an ElementTree. 44 # HTML document or fragment to an ElementTree.
65 IGNOREEND = "img", "hr", "meta", "link", "br", "input", "col" 65 IGNOREEND = "img", "hr", "meta", "link", "br", "input", "col"
66 66
67 namespace = "http://www.w3.org/1999/xhtml" 67 namespace = "http://www.w3.org/1999/xhtml"
68 68
69 def __init__(self, encoding=None, builder=None): 69 def __init__(self, encoding=None, builder=None):
70 HTMLParserBase.__init__(self)
70 self.__stack = [] 71 self.__stack = []
71 if builder is None: 72 self.__builder = builder or tree.TreeBuilder()
72 builder = ElementTree.TreeBuilder()
73 self.__builder = builder
74 self.encoding = encoding or "iso-8859-1" 73 self.encoding = encoding or "iso-8859-1"
75 HTMLParserBase.__init__(self)
76 74
77 ## 75 ##
78 # Flushes parser buffers, and return the root element. 76 # Flushes parser buffers, and return the root element.
79 # 77 #
80 # @return An Element instance. 78 # @return An Element instance.
85 83
86 ## 84 ##
87 # (Internal) Handles start tags. 85 # (Internal) Handles start tags.
88 86
89 def handle_starttag(self, tag, attrs): 87 def handle_starttag(self, tag, attrs):
90 tag = ElementTree.QName(tag.lower(), self.namespace) 88 tag = tree.QName(tag.lower(), self.namespace)
91 if tag.name == "meta": 89 if tag.name == "meta":
92 # look for encoding directives 90 # look for encoding directives
93 http_equiv = content = None 91 http_equiv = content = None
94 for k, v in attrs: 92 for k, v in attrs:
95 if k == "http-equiv": 93 if k == "http-equiv":
112 if attrs: 110 if attrs:
113 for key, value in attrs: 111 for key, value in attrs:
114 # Handle short attributes 112 # Handle short attributes
115 if value is None: 113 if value is None:
116 value = key 114 value = key
117 key = ElementTree.QName(key.lower(), self.namespace) 115 key = tree.QName(key.lower(), self.namespace)
118 attrib[key] = value 116 attrib[key] = value
119 self.__builder.start(tag, attrib) 117 self.__builder.start(tag, attrib)
120 if tag.name in self.IGNOREEND: 118 if tag.name in self.IGNOREEND:
121 self.__stack.pop() 119 self.__stack.pop()
122 self.__builder.end(tag) 120 self.__builder.end(tag)
123 121
124 ## 122 ##
125 # (Internal) Handles end tags. 123 # (Internal) Handles end tags.
126 124
127 def handle_endtag(self, tag): 125 def handle_endtag(self, tag):
128 if not isinstance(tag, ElementTree.QName): 126 if not isinstance(tag, tree.QName):
129 tag = ElementTree.QName(tag.lower(), self.namespace) 127 tag = tree.QName(tag.lower(), self.namespace)
130 if tag.name in self.IGNOREEND: 128 if tag.name in self.IGNOREEND:
131 return 129 return
132 lasttag = self.__stack.pop() 130 lasttag = self.__stack.pop()
133 if tag != lasttag and lasttag.name in self.AUTOCLOSE: 131 if tag != lasttag and lasttag.name in self.AUTOCLOSE:
134 self.handle_endtag(lasttag) 132 self.handle_endtag(lasttag)