comparison emeraldtree/html.py @ 100:9d39d4bc2deb

ported to python 2.7 / >= 3.3 using six the port itself was half automated (using python-modernize) and half manual fixes and cleanups.
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sun, 19 May 2013 20:37:50 +0200
parents 54c60c7e7e35
children
comparison
equal deleted inserted replaced
99:f35162dc24d4 100:9d39d4bc2deb
29 29
30 ## 30 ##
31 # Tools to build element trees from HTML files. 31 # Tools to build element trees from HTML files.
32 ## 32 ##
33 33
34 import htmlentitydefs
35 from HTMLParser import HTMLParser as HTMLParserBase
36
37 from . import tree 34 from . import tree
35 import six
36 from six.moves import html_entities
37 from six.moves import html_parser
38 HTMLParserBase = html_parser.HTMLParser
38 39
39 40
40 ## 41 ##
41 # ElementTree builder for HTML source code. This builder converts an 42 # ElementTree builder for HTML source code. This builder converts an
42 # HTML document or fragment to an ElementTree. 43 # HTML document or fragment to an ElementTree.
146 147
147 ## 148 ##
148 # (Internal) Handles entity references. 149 # (Internal) Handles entity references.
149 150
150 def handle_entityref(self, name): 151 def handle_entityref(self, name):
151 entity = htmlentitydefs.entitydefs.get(name) 152 entity = html_entities.entitydefs.get(name)
152 if entity: 153 if entity:
153 if len(entity) == 1: 154 if len(entity) == 1:
154 entity = ord(entity) 155 entity = ord(entity)
155 else: 156 else:
156 entity = int(entity[2:-1]) 157 entity = int(entity[2:-1])
163 164
164 ## 165 ##
165 # (Internal) Handles character data. 166 # (Internal) Handles character data.
166 167
167 def handle_data(self, data): 168 def handle_data(self, data):
168 if isinstance(data, str): 169 if isinstance(data, six.binary_type):
169 # convert to unicode, but only if necessary 170 # convert to unicode, but only if necessary
170 data = unicode(data, self.encoding, "ignore") 171 data = data.decode(self.encoding, "ignore")
171 self.__builder.data(data) 172 self.__builder.data(data)
172 173
173 ## 174 ##
174 # (Hook) Handles unknown entity references. The default action 175 # (Hook) Handles unknown entity references. The default action
175 # is to ignore unknown entities. 176 # is to ignore unknown entities.