changeset 292:ebdfe9448e01

make converter api more homogeneous, adapt tests you can give a file-like object (like a revision), a str or unicode now to a input converter (no matter for what mimetype it is). you also need to give the contenttype (at least for the text input converters, they will extract the charset from there and decode appropriately). removed the feed_input_conv methods, we always give the revision and the contenttype now to the converters.
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sun, 17 Jul 2011 01:42:53 +0200
parents 46e37e4197d4
children e02f3f45ab5d
files MoinMoin/converter/_tests/test_creole_in.py MoinMoin/converter/_tests/test_docbook_in.py MoinMoin/converter/_tests/test_mediawiki_in.py MoinMoin/converter/_tests/test_moinwiki_in.py MoinMoin/converter/_tests/test_moinwiki_in_out.py MoinMoin/converter/_tests/test_rst_in.py MoinMoin/converter/_util.py MoinMoin/converter/archive_in.py MoinMoin/converter/audio_video_in.py MoinMoin/converter/creole_in.py MoinMoin/converter/docbook_in.py MoinMoin/converter/everything.py MoinMoin/converter/html_in.py MoinMoin/converter/image_in.py MoinMoin/converter/mediawiki_in.py MoinMoin/converter/moinwiki_in.py MoinMoin/converter/nonexistent_in.py MoinMoin/converter/pygments_in.py MoinMoin/converter/rst_in.py MoinMoin/converter/text_csv_in.py MoinMoin/converter/text_in.py MoinMoin/items/__init__.py
diffstat 22 files changed, 102 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/converter/_tests/test_creole_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/_tests/test_creole_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -257,6 +257,6 @@
         return self.output_re.sub(u'', buffer.getvalue())
 
     def do(self, input, output, args={}):
-        out = self.conv(input.split(u'\n'), **args)
+        out = self.conv(input, 'text/x.moin.creole;charset=utf-8', **args)
         assert self.serialize(out) == output
 
--- a/MoinMoin/converter/_tests/test_docbook_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/_tests/test_docbook_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -51,7 +51,7 @@
             to_conv = self.handle_input(input)
         elif args['nonamespace']:
             to_conv = input
-        out = self.conv([to_conv])
+        out = self.conv(to_conv, 'application/docbook+xml;charset=utf-8')
         f = StringIO.StringIO()
         out.write(f.write, namespaces=self.output_namespaces, )
         return self.output_re.sub(u'', f.getvalue())
--- a/MoinMoin/converter/_tests/test_mediawiki_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/_tests/test_mediawiki_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -138,7 +138,7 @@
         return self.output_re.sub(u'', buffer.getvalue())
 
     def do(self, input, output, args={}, skip=None):
-        out = self.conv(input.split(u'\n'), **args)
+        out = self.conv(input, 'text/x-mediawiki;charset=utf-8', **args)
         print self.serialize(out)
         assert self.serialize(out) == output
 
--- a/MoinMoin/converter/_tests/test_moinwiki_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/_tests/test_moinwiki_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -313,5 +313,5 @@
     def do(self, input, output, args={}, skip=None):
         if skip:
             py.test.skip(skip)
-        out = self.conv(input.split(u'\n'), **args)
+        out = self.conv(input, 'text/x.moin.wiki;charset=utf-8', **args)
         assert self.serialize(out) == output
--- a/MoinMoin/converter/_tests/test_moinwiki_in_out.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/_tests/test_moinwiki_in_out.py	Sun Jul 17 01:42:53 2011 +0200
@@ -520,7 +520,7 @@
     def do(self, input, output, args={}, skip=None):
         if skip:
             py.test.skip(skip)
-        out = self.conv_in(input.split(u'\n'), **args)
+        out = self.conv_in(input, 'text/x.moin.wiki;charset=utf-8', **args)
         out = self.conv_out(self.handle_input(self.serialize(out)), **args)
         assert self.handle_output(out) == output
 
--- a/MoinMoin/converter/_tests/test_rst_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/_tests/test_rst_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -150,7 +150,7 @@
         return self.output_re.sub(u'', buffer.getvalue())
 
     def do(self, input, output, args={}, skip=None):
-        out = self.conv(input.split(u'\n'), **args)
+        out = self.conv(input, 'text/x-rst;charset=utf-8', **args)
         assert self.serialize(out) == output
 
 coverage_modules = ['MoinMoin.converter.rst_in']
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/converter/_util.py	Sun Jul 17 01:42:53 2011 +0200
@@ -0,0 +1,47 @@
+# Copyright: 2011 MoinMoin:ThomasWaldmann
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - converter utilities
+"""
+
+
+from __future__ import absolute_import, division
+
+from MoinMoin.util.mime import Type
+
+
+def decode_data(data, contenttype=None):
+    """
+    read and decode data, return unicode text
+
+    supported types for data:
+    - rev object or other file-like object
+    - str
+    - unicode
+
+    file-like objects and str need to be either utf-8 (or ascii, which is a subset of utf-8)
+    encoded or contenttype (including a charset parameter) needs to be given.
+    """
+    if hasattr(data, 'read'):
+        # file-like object
+        data = data.read()
+    if isinstance(data, str):
+        coding = 'utf-8'
+        if contenttype is not None:
+            ct = Type(contenttype)
+            coding = ct.parameters.get('charset', coding)
+        data = data.decode(coding)
+    if not isinstance(data, unicode):
+        raise TypeError("data must be file-like or str (requires contenttype with charset) or unicode")
+    return data
+
+
+def normalize_split_text(text):
+    """
+    normalize line endings, split text into a list of lines
+    """
+    text = text.replace(u'\r\n', u'\n')
+    lines = text.split(u'\n')
+    return lines
+
--- a/MoinMoin/converter/archive_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/archive_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -47,7 +47,7 @@
     def process_size(self, size):
         return unicode(size)
 
-    def __call__(self, fileobj):
+    def __call__(self, fileobj, contenttype=None, arguments=None):
         # we get a revision as fileobj
         self.item_name = fileobj.item.name
         try:
--- a/MoinMoin/converter/audio_video_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/audio_video_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -26,7 +26,7 @@
     def __init__(self, input_type):
         self.input_type = input_type
 
-    def __call__(self, rev):
+    def __call__(self, rev, contenttype=None, arguments=None):
         item_name = rev.item.name
         attrib = {
             moin_page.type_: unicode(self.input_type),
--- a/MoinMoin/converter/creole_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/creole_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -33,6 +33,7 @@
 
 from ._args_wiki import parse as parse_arguments
 from ._wiki_macro import ConverterMacro
+from ._util import decode_data, normalize_split_text
 
 class _Iter(object):
     """
@@ -104,8 +105,10 @@
     def factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, content, arguments=None):
-        iter_content = _Iter(content)
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        lines = normalize_split_text(text)
+        iter_content = _Iter(lines)
 
         body = self.parse_block(iter_content, arguments)
         root = moin_page.page(children=[body])
--- a/MoinMoin/converter/docbook_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/docbook_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -27,6 +27,7 @@
 from MoinMoin.util.tree import moin_page, xlink, docbook, xml, html
 
 from ._wiki_macro import ConverterMacro
+from ._util import decode_data, normalize_split_text
 
 class NameSpaceError(Exception):
     pass
@@ -199,11 +200,12 @@
     def _factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, content, aruments=None):
-        """
-        Function called by the converter to process
-        the conversion.
-        """
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        content = normalize_split_text(text)
+        docbook_str = u'\n'.join(content)
+        logging.debug(docbook_str)
+
         # Initalize our attributes
         self.section_depth = 0
         self.heading_level = 0
@@ -215,11 +217,6 @@
         self.standard_attribute = {}
 
         # We will create an element tree from the DocBook content
-        # The content is given to the converter as a list of string,
-        # line per line.
-        # So we will concatenate all in one string.
-        docbook_str = u'\n'.join(content)
-        logging.debug(docbook_str)
         try:
             # XXX: The XML parser need bytestring.
             tree = ET.XML(docbook_str.encode('utf-8'))
--- a/MoinMoin/converter/everything.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/everything.py	Sun Jul 17 01:42:53 2011 +0200
@@ -21,7 +21,7 @@
     def _factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, rev):
+    def __call__(self, rev, contenttype=None, arguments=None):
         item_name = rev.item.name
         attrib = {
             xlink.href: Iri(scheme='wiki', authority='', path='/'+item_name, query='do=get&rev=%d' % rev.revno),
--- a/MoinMoin/converter/html_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/html_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -24,6 +24,7 @@
 from MoinMoin.util.tree import html, moin_page, xlink, xml
 
 from ._wiki_macro import ConverterMacro
+from ._util import decode_data, normalize_split_text
 
 class Converter(object):
     """
@@ -80,13 +81,15 @@
     def _factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, content, arguments=None):
+    def __call__(self, data, contenttype=None, arguments=None):
         """
         Function called by the converter to process the
         conversion.
 
         TODO: Add support for different arguments
         """
+        text = decode_data(data, contenttype)
+        content = normalize_split_text(text)
         # Be sure we have empty string in the base url
         self.base_url = ''
 
--- a/MoinMoin/converter/image_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/image_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -24,7 +24,7 @@
     def __init__(self, input_type):
         self.input_type = input_type
 
-    def __call__(self, rev):
+    def __call__(self, rev, contenttype=None, arguments=None):
         item_name = rev.item.name
         attrib = {
             moin_page.type_: unicode(self.input_type),
--- a/MoinMoin/converter/mediawiki_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/mediawiki_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -27,6 +27,7 @@
 from ._args import Arguments
 from ._args_wiki import parse as parse_arguments
 from ._wiki_macro import ConverterMacro
+from ._util import decode_data, normalize_split_text
 
 class _TableArguments(object):
     rules = r'''
@@ -87,7 +88,9 @@
     def factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, content, arguments=None):
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        content = normalize_split_text(text)
         iter_content = _Iter(content)
         self.preprocessor = self.Mediawiki_preprocessor()
         body = self.parse_block(iter_content, arguments)
--- a/MoinMoin/converter/moinwiki_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/moinwiki_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -25,6 +25,7 @@
 from ._args import Arguments
 from ._args_wiki import parse as parse_arguments
 from ._wiki_macro import ConverterMacro
+from ._util import decode_data, normalize_split_text
 from MoinMoin.i18n import _
 
 
@@ -219,8 +220,10 @@
     def factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, content, arguments=None):
-        iter_content = _Iter(content)
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        lines = normalize_split_text(text)
+        iter_content = _Iter(lines)
 
         body = self.parse_block(iter_content, arguments)
         root = moin_page.page(children=(body, ))
--- a/MoinMoin/converter/nonexistent_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/nonexistent_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -22,7 +22,7 @@
     def _factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, rev):
+    def __call__(self, rev, contenttype=None, arguments=None):
         item_name = rev.item.name
         attrib = {
             xlink.href: Iri(scheme='wiki', authority='', path='/'+item_name, query='do=modify'),
--- a/MoinMoin/converter/pygments_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/pygments_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -23,6 +23,7 @@
 
 from MoinMoin.util.mime import Type, type_moin_document
 from MoinMoin.util.tree import moin_page
+from ._util import decode_data, normalize_split_text
 
 
 if pygments:
@@ -103,7 +104,9 @@
                     lexer = pygments.lexers.get_lexer_for_mimetype('text/plain')
             self.lexer = lexer
 
-        def __call__(self, content, arguments=None):
+        def __call__(self, data, contenttype=None, arguments=None):
+            text = decode_data(data, contenttype)
+            content = normalize_split_text(text)
             content = u'\n'.join(content)
             blockcode = moin_page.blockcode(attrib={moin_page.class_: 'highlight'})
             pygments.highlight(content, self.lexer, TreeFormatter(), blockcode)
--- a/MoinMoin/converter/rst_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/rst_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -28,6 +28,8 @@
 from MoinMoin.util.iri import Iri
 from MoinMoin.util.tree import html, moin_page, xlink
 
+from ._util import decode_data, normalize_split_text
+
 #### TODO: try block (do not crash if we don't have docutils)
 from docutils import nodes, utils, writers, core
 from docutils.parsers.rst import Parser
@@ -746,7 +748,9 @@
     def factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, input, arguments=None):
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        input = normalize_split_text(text)
         parser = MoinDirectives()
         while True:
             input = u'\n'.join(input)
--- a/MoinMoin/converter/text_csv_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/text_csv_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -11,6 +11,7 @@
 import csv
 
 from ._table import TableMixin
+from ._util import decode_data, normalize_split_text
 
 class Converter(TableMixin):
     """
@@ -21,10 +22,9 @@
     def _factory(cls, type_input, type_output, **kw):
         return cls()
 
-    def __call__(self, content, arguments=None):
-        """
-        Parse the CSV text and return DOM tree.
-        """
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        content = normalize_split_text(text)
         # as of py 2.6.5 (and in the year 2010), the csv module seems to still
         # have troubles with unicode, thus we encode to utf-8 ...
         content = [line.encode('utf-8') for line in content]
--- a/MoinMoin/converter/text_in.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/converter/text_in.py	Sun Jul 17 01:42:53 2011 +0200
@@ -16,6 +16,7 @@
 from __future__ import absolute_import, division
 
 from MoinMoin.util.tree import moin_page
+from ._util import decode_data, normalize_split_text
 
 class Converter(object):
     """
@@ -26,8 +27,9 @@
     def _factory(cls, type_input, type_output, **kw):
         return cls()
 
-    def __call__(self, content, arguments=None):
-        """Parse the text and return DOM tree."""
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        content = normalize_split_text(text)
         blockcode = moin_page.blockcode()
         for line in content:
             if len(blockcode):
--- a/MoinMoin/items/__init__.py	Mon Jul 11 01:10:30 2011 +0200
+++ b/MoinMoin/items/__init__.py	Sun Jul 17 01:42:53 2011 +0200
@@ -218,9 +218,6 @@
         # override this in child classes
         return ''
 
-    def feed_input_conv(self):
-        return self.rev
-
     def internal_representation(self, converters=['smiley']):
         """
         Return the internal representation of a document using a DOM Tree
@@ -249,8 +246,7 @@
 
             # We can process the conversion
             links = Iri(scheme='wiki', authority='', path='/' + self.name)
-            input = self.feed_input_conv()
-            doc = input_conv(input)
+            doc = input_conv(self.rev, self.contenttype)
             # XXX is the following assuming that the top element of the doc tree
             # is a moin_page.page element? if yes, this is the wrong place to do that
             # as not every doc will have that element (e.g. for images, we just get
@@ -1108,9 +1104,6 @@
         """ convert data from storage format to memory format """
         return data.decode(config.charset).replace(u'\r\n', u'\n')
 
-    def feed_input_conv(self):
-        return self.data_storage_to_internal(self.data).split(u'\n')
-
     def _render_data_diff(self, oldrev, newrev):
         from MoinMoin.util.diff_html import diff
         old_text = self.data_storage_to_internal(oldrev.read())
@@ -1216,7 +1209,7 @@
 
         i = Iri(scheme='wiki', authority='', path='/' + self.name)
 
-        doc = input_conv(self.data_storage_to_internal(data).split(u'\n'))
+        doc = input_conv(self.rev, self.contenttype)
         doc.set(moin_page.page_href, unicode(i))
         doc = item_conv(doc)