Mercurial > moin > 1.9
changeset 3839:a37ed69fafed
MoinMoinBugs/DoNotConvertUnicodeToUTF8ForXsltParser (backport from 1.8)
Initial patch applied and slightly improved.
Moved to correct place.
author | Thomas Waldmann <tw AT waldmann-edv DOT de> |
---|---|
date | Mon, 14 Jul 2008 02:25:33 +0200 |
parents | 592fe02ed976 |
children | a7db3c488083 42c4d8d6a80c |
files | MoinMoin/parser/_tests/test_unicode.py MoinMoin/parser/text_xslt.py |
diffstat | 2 files changed, 74 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MoinMoin/parser/_tests/test_unicode.py Mon Jul 14 02:25:33 2008 +0200 @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- +""" + MoinMoin - Test if MoinMoin.parser.* do write UNICODE objects + + Comment: + The test produces an exception in another place, if some routine encodes unicode to UTF8 too early. + The corresponding bug was found initially in MoinMoin.parser.text_xslt and the test was designed for + this problem, but the test actually examines all available parsers. + + @copyright: 2007,2008 by Raphael Bossek <raphael.bossek@gmail.com> + @license: GNU GPL, see COPYING for details. +""" + +import py +import sys, traceback + +import MoinMoin.parser +from MoinMoin.Page import Page + +class TestParserOutput(object): + """ Parser has to generate unicode output. """ + def test_ParserOutput(self): + """ This method aims generally at MoinMoin.parser.text_xslt - + this parser should encode Unicode input to UTF8 as late as possible. + """ + request = self.request + assert not request.cfg.allow_xslt, u'allow_xslt should be disabled' + errmsg = [] + + # Some examples to verify with additional stuff + parser_raw_input = { + u'text_html': u'<html><body><h1>%s</h1></body></html>', + u'text_irssi': u"[12:01] <RaphaelBosek> %s", + u'text_moin_wiki': u'||<#fefefe> %s ||', + u'text_python': u'if True: print "%s"', + u'text_xslt': u'<?xml version="1.0" encoding="ISO-8859-1"?><!-- %s -->', + } + + # Blacklist for parsers that don't work - this list should be empty ! + parser_blacklist = [] + + # Create a page if it doesn't exist already. + if not u'page' in request.formatter.__dict__ or not request.formatter.page: + request.formatter.page = Page(request, u'test_parser_unicode_page') + # this temporarily fixes an error with page-names, should be fixed at a central place some time + request.page = Page(request, u'test_parser_unicode_page') + + # Check all parsers for UNICODE output. + for parsername in MoinMoin.parser.modules: + if parsername in parser_blacklist: + continue + + module = __import__(u'MoinMoin.parser', globals(), {}, [parsername]) + parsermodule = getattr(module, parsername) + if u'Parser' in parsermodule.__dict__: + # Get the parser_input or use a simple fallback if the parser is not found in parser_raw_input + i = parser_raw_input.get(parsername, u'%s') % u'\xC3\x84\xC3\x96\xC3\x9C\xC3\xE2\x82\xAC\x27' + p = parsermodule.Parser(i, request) + + # This is the actual request that would produce an exception, which would usually look like the following: + # > UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 0: ordinal not in range(128) + # usually occurring in python/lib/StringIO.py:270 + r = request.redirectedOutput(p.format, request.formatter) + + # This assertion will only be triggered, if the parser does not write unicode at all + assert isinstance(r, unicode), u'MoinMoin.parser.%s does not write UNICODE data but %s' % (parsername, type(r), ) + +coverage_modules = ['MoinMoin.parser'] +
--- a/MoinMoin/parser/text_xslt.py Sun Jul 13 21:41:12 2008 +0200 +++ b/MoinMoin/parser/text_xslt.py Mon Jul 14 02:25:33 2008 +0200 @@ -28,7 +28,7 @@ Dependencies = Dependencies def __init__(self, raw, request, **kw): - self.raw = raw.encode(config.charset) + self.raw = raw self.request = request self.form = request.form self._ = request.getText @@ -87,14 +87,15 @@ input_factory = InputSource.InputSourceFactory(resolver=wiki_resolver) page_uri = self.base_uri + wikiutil.url_quote(formatter.page.page_name) - raw = self.raw.strip() - + # 4Suite needs an utf-8 encoded byte string instead of an unicode object + raw = self.raw.strip().encode('utf-8') self.processor = Processor() self.append_stylesheet() # hook, for extending this parser self.processor.run( input_factory.fromString(raw, uri=page_uri), outputStream=out_file) - result = out_file.getvalue() + # Convert utf-8 encoded byte string into unicode + result = out_file.getvalue().decode('utf-8') result = self.parse_result(result) # hook, for extending this parser except FtException, msg: