changeset 492:fa1b9282a3ac

branch merged with main repo
author Akash Sinha <akash2607@gmail.com>
date Mon, 18 Jul 2011 02:06:34 +0530
parents 52605e7140e0 (current diff) e02f3f45ab5d (diff)
children 17132086b9d6
files MoinMoin/items/__init__.py
diffstat 26 files changed, 126 insertions(+), 46 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/converter/_tests/test_creole_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/_tests/test_creole_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -257,6 +257,6 @@
         return self.output_re.sub(u'', buffer.getvalue())
 
     def do(self, input, output, args={}):
-        out = self.conv(input.split(u'\n'), **args)
+        out = self.conv(input, 'text/x.moin.creole;charset=utf-8', **args)
         assert self.serialize(out) == output
 
--- a/MoinMoin/converter/_tests/test_docbook_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/_tests/test_docbook_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -51,7 +51,7 @@
             to_conv = self.handle_input(input)
         elif args['nonamespace']:
             to_conv = input
-        out = self.conv([to_conv])
+        out = self.conv(to_conv, 'application/docbook+xml;charset=utf-8')
         f = StringIO.StringIO()
         out.write(f.write, namespaces=self.output_namespaces, )
         return self.output_re.sub(u'', f.getvalue())
--- a/MoinMoin/converter/_tests/test_mediawiki_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/_tests/test_mediawiki_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -138,7 +138,7 @@
         return self.output_re.sub(u'', buffer.getvalue())
 
     def do(self, input, output, args={}, skip=None):
-        out = self.conv(input.split(u'\n'), **args)
+        out = self.conv(input, 'text/x-mediawiki;charset=utf-8', **args)
         print self.serialize(out)
         assert self.serialize(out) == output
 
--- a/MoinMoin/converter/_tests/test_moinwiki_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/_tests/test_moinwiki_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -313,5 +313,5 @@
     def do(self, input, output, args={}, skip=None):
         if skip:
             py.test.skip(skip)
-        out = self.conv(input.split(u'\n'), **args)
+        out = self.conv(input, 'text/x.moin.wiki;charset=utf-8', **args)
         assert self.serialize(out) == output
--- a/MoinMoin/converter/_tests/test_moinwiki_in_out.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/_tests/test_moinwiki_in_out.py	Mon Jul 18 02:06:34 2011 +0530
@@ -520,7 +520,7 @@
     def do(self, input, output, args={}, skip=None):
         if skip:
             py.test.skip(skip)
-        out = self.conv_in(input.split(u'\n'), **args)
+        out = self.conv_in(input, 'text/x.moin.wiki;charset=utf-8', **args)
         out = self.conv_out(self.handle_input(self.serialize(out)), **args)
         assert self.handle_output(out) == output
 
--- a/MoinMoin/converter/_tests/test_rst_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/_tests/test_rst_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -150,7 +150,7 @@
         return self.output_re.sub(u'', buffer.getvalue())
 
     def do(self, input, output, args={}, skip=None):
-        out = self.conv(input.split(u'\n'), **args)
+        out = self.conv(input, 'text/x-rst;charset=utf-8', **args)
         assert self.serialize(out) == output
 
 coverage_modules = ['MoinMoin.converter.rst_in']
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/converter/_util.py	Mon Jul 18 02:06:34 2011 +0530
@@ -0,0 +1,47 @@
+# Copyright: 2011 MoinMoin:ThomasWaldmann
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - converter utilities
+"""
+
+
+from __future__ import absolute_import, division
+
+from MoinMoin.util.mime import Type
+
+
+def decode_data(data, contenttype=None):
+    """
+    read and decode data, return unicode text
+
+    supported types for data:
+    - rev object or other file-like object
+    - str
+    - unicode
+
+    file-like objects and str need to be either utf-8 (or ascii, which is a subset of utf-8)
+    encoded or contenttype (including a charset parameter) needs to be given.
+    """
+    if hasattr(data, 'read'):
+        # file-like object
+        data = data.read()
+    if isinstance(data, str):
+        coding = 'utf-8'
+        if contenttype is not None:
+            ct = Type(contenttype)
+            coding = ct.parameters.get('charset', coding)
+        data = data.decode(coding)
+    if not isinstance(data, unicode):
+        raise TypeError("data must be file-like or str (requires contenttype with charset) or unicode")
+    return data
+
+
+def normalize_split_text(text):
+    """
+    normalize line endings, split text into a list of lines
+    """
+    text = text.replace(u'\r\n', u'\n')
+    lines = text.split(u'\n')
+    return lines
+
--- a/MoinMoin/converter/archive_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/archive_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -47,7 +47,7 @@
     def process_size(self, size):
         return unicode(size)
 
-    def __call__(self, fileobj):
+    def __call__(self, fileobj, contenttype=None, arguments=None):
         # we get a revision as fileobj
         self.item_name = fileobj.item.name
         try:
--- a/MoinMoin/converter/audio_video_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/audio_video_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -15,6 +15,7 @@
 from MoinMoin.util.iri import Iri
 from MoinMoin.util.tree import moin_page, xlink
 
+
 class Converter(object):
     """
     Convert audio/video to the corresponding <object> in the DOM Tree
@@ -26,7 +27,7 @@
     def __init__(self, input_type):
         self.input_type = input_type
 
-    def __call__(self, rev):
+    def __call__(self, rev, contenttype=None, arguments=None):
         item_name = rev.item.name
         attrib = {
             moin_page.type_: unicode(self.input_type),
--- a/MoinMoin/converter/creole_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/creole_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -33,6 +33,8 @@
 
 from ._args_wiki import parse as parse_arguments
 from ._wiki_macro import ConverterMacro
+from ._util import decode_data, normalize_split_text
+
 
 class _Iter(object):
     """
@@ -104,8 +106,10 @@
     def factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, content, arguments=None):
-        iter_content = _Iter(content)
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        lines = normalize_split_text(text)
+        iter_content = _Iter(lines)
 
         body = self.parse_block(iter_content, arguments)
         root = moin_page.page(children=[body])
--- a/MoinMoin/converter/docbook_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/docbook_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -27,6 +27,8 @@
 from MoinMoin.util.tree import moin_page, xlink, docbook, xml, html
 
 from ._wiki_macro import ConverterMacro
+from ._util import decode_data, normalize_split_text
+
 
 class NameSpaceError(Exception):
     pass
@@ -199,11 +201,12 @@
     def _factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, content, aruments=None):
-        """
-        Function called by the converter to process
-        the conversion.
-        """
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        content = normalize_split_text(text)
+        docbook_str = u'\n'.join(content)
+        logging.debug(docbook_str)
+
         # Initalize our attributes
         self.section_depth = 0
         self.heading_level = 0
@@ -215,11 +218,6 @@
         self.standard_attribute = {}
 
         # We will create an element tree from the DocBook content
-        # The content is given to the converter as a list of string,
-        # line per line.
-        # So we will concatenate all in one string.
-        docbook_str = u'\n'.join(content)
-        logging.debug(docbook_str)
         try:
             # XXX: The XML parser need bytestring.
             tree = ET.XML(docbook_str.encode('utf-8'))
--- a/MoinMoin/converter/docbook_out.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/docbook_out.py	Mon Jul 18 02:06:34 2011 +0530
@@ -15,9 +15,11 @@
 
 from MoinMoin import log
 logging = log.getLogger(__name__)
+
 from MoinMoin import wikiutil
 from MoinMoin.util.tree import html, moin_page, xlink, docbook, xml
 
+
 class Converter(object):
     """
     Converter application/x.moin.document -> application/docbook+xml
--- a/MoinMoin/converter/everything.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/everything.py	Mon Jul 18 02:06:34 2011 +0530
@@ -13,6 +13,7 @@
 from MoinMoin.util.iri import Iri
 from MoinMoin.util.tree import moin_page, xlink
 
+
 class Converter(object):
     """
     Convert a unsupported item to DOM Tree.
@@ -21,7 +22,7 @@
     def _factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, rev):
+    def __call__(self, rev, contenttype=None, arguments=None):
         item_name = rev.item.name
         attrib = {
             xlink.href: Iri(scheme='wiki', authority='', path='/'+item_name, query='do=get&rev=%d' % rev.revno),
--- a/MoinMoin/converter/html_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/html_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -24,6 +24,8 @@
 from MoinMoin.util.tree import html, moin_page, xlink, xml
 
 from ._wiki_macro import ConverterMacro
+from ._util import decode_data, normalize_split_text
+
 
 class Converter(object):
     """
@@ -80,13 +82,15 @@
     def _factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, content, arguments=None):
+    def __call__(self, data, contenttype=None, arguments=None):
         """
         Function called by the converter to process the
         conversion.
 
         TODO: Add support for different arguments
         """
+        text = decode_data(data, contenttype)
+        content = normalize_split_text(text)
         # Be sure we have empty string in the base url
         self.base_url = ''
 
--- a/MoinMoin/converter/image_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/image_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -13,6 +13,7 @@
 from MoinMoin.util.iri import Iri
 from MoinMoin.util.tree import moin_page, xlink
 
+
 class Converter(object):
     """
     Convert an image to the corresponding <object> in the DOM Tree
@@ -24,7 +25,7 @@
     def __init__(self, input_type):
         self.input_type = input_type
 
-    def __call__(self, rev):
+    def __call__(self, rev, contenttype=None, arguments=None):
         item_name = rev.item.name
         attrib = {
             moin_page.type_: unicode(self.input_type),
--- a/MoinMoin/converter/include.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/include.py	Mon Jul 18 02:06:34 2011 +0530
@@ -26,6 +26,8 @@
 from MoinMoin.util.tree import html, moin_page, xinclude, xlink
 
 from MoinMoin.converter.html_out import wrap_object_with_overlay
+
+
 class XPointer(list):
     """
     Simple XPointer parser
@@ -265,6 +267,7 @@
 
         return tree
 
+
 from . import default_registry
 from MoinMoin.util.mime import Type, type_moin_document
 default_registry.register(Converter._factory, type_moin_document, type_moin_document)
--- a/MoinMoin/converter/link.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/link.py	Mon Jul 18 02:06:34 2011 +0530
@@ -19,6 +19,7 @@
 from MoinMoin.util.tree import html, moin_page, xlink, xinclude
 from MoinMoin.wikiutil import AbsItemName
 
+
 class ConverterBase(object):
     _tag_xlink_href = xlink.href
     _tag_xinclude_href = xinclude.href
@@ -194,6 +195,7 @@
 
         elem.set(self._tag_xlink_href, output)
 
+
 class ConverterItemRefs(ConverterBase):
     """
     determine all links and transclusions to other wiki items in this document
--- a/MoinMoin/converter/mediawiki_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/mediawiki_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -27,6 +27,8 @@
 from ._args import Arguments
 from ._args_wiki import parse as parse_arguments
 from ._wiki_macro import ConverterMacro
+from ._util import decode_data, normalize_split_text
+
 
 class _TableArguments(object):
     rules = r'''
@@ -87,7 +89,9 @@
     def factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, content, arguments=None):
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        content = normalize_split_text(text)
         iter_content = _Iter(content)
         self.preprocessor = self.Mediawiki_preprocessor()
         body = self.parse_block(iter_content, arguments)
--- a/MoinMoin/converter/moinwiki_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/moinwiki_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -22,10 +22,12 @@
 from MoinMoin.util.iri import Iri
 from MoinMoin.util.tree import html, moin_page, xlink, xinclude
 from MoinMoin.util.interwiki import resolve_interwiki
+from MoinMoin.i18n import _
+
 from ._args import Arguments
 from ._args_wiki import parse as parse_arguments
 from ._wiki_macro import ConverterMacro
-from MoinMoin.i18n import _
+from ._util import decode_data, normalize_split_text
 
 
 class _Iter(object):
@@ -219,8 +221,10 @@
     def factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, content, arguments=None):
-        iter_content = _Iter(content)
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        lines = normalize_split_text(text)
+        iter_content = _Iter(lines)
 
         body = self.parse_block(iter_content, arguments)
         root = moin_page.page(children=(body, ))
--- a/MoinMoin/converter/nonexistent_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/nonexistent_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -14,6 +14,7 @@
 from MoinMoin.util.iri import Iri
 from MoinMoin.util.tree import moin_page, xlink
 
+
 class Converter(object):
     """
     Convert a non-existing item to DOM Tree.
@@ -22,7 +23,7 @@
     def _factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, rev):
+    def __call__(self, rev, contenttype=None, arguments=None):
         item_name = rev.item.name
         attrib = {
             xlink.href: Iri(scheme='wiki', authority='', path='/'+item_name, query='do=modify'),
--- a/MoinMoin/converter/pygments_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/pygments_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -23,6 +23,7 @@
 
 from MoinMoin.util.mime import Type, type_moin_document
 from MoinMoin.util.tree import moin_page
+from ._util import decode_data, normalize_split_text
 
 
 if pygments:
@@ -103,7 +104,9 @@
                     lexer = pygments.lexers.get_lexer_for_mimetype('text/plain')
             self.lexer = lexer
 
-        def __call__(self, content, arguments=None):
+        def __call__(self, data, contenttype=None, arguments=None):
+            text = decode_data(data, contenttype)
+            content = normalize_split_text(text)
             content = u'\n'.join(content)
             blockcode = moin_page.blockcode(attrib={moin_page.class_: 'highlight'})
             pygments.highlight(content, self.lexer, TreeFormatter(), blockcode)
--- a/MoinMoin/converter/rst_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/rst_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -28,6 +28,8 @@
 from MoinMoin.util.iri import Iri
 from MoinMoin.util.tree import html, moin_page, xlink
 
+from ._util import decode_data, normalize_split_text
+
 #### TODO: try block (do not crash if we don't have docutils)
 from docutils import nodes, utils, writers, core
 from docutils.parsers.rst import Parser
@@ -36,6 +38,7 @@
 from docutils.parsers.rst import directives, roles
 #####
 
+
 class NodeVisitor(object):
     """
     Part of docutils which converts docutils DOM tree to Moin DOM tree
@@ -741,12 +744,13 @@
 
 
 class Converter(object):
-
     @classmethod
     def factory(cls, input, output, **kw):
         return cls()
 
-    def __call__(self, input, arguments=None):
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        input = normalize_split_text(text)
         parser = MoinDirectives()
         while True:
             input = u'\n'.join(input)
@@ -768,6 +772,7 @@
         walkabout(docutils_tree, visitor)
         return visitor.tree()
 
+
 from . import default_registry
 from MoinMoin.util.mime import Type, type_moin_document
 default_registry.register(Converter.factory,
--- a/MoinMoin/converter/smiley.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/smiley.py	Mon Jul 18 02:06:34 2011 +0530
@@ -10,12 +10,15 @@
 """
 
 
+from __future__ import absolute_import, division
+
 import re
 
 from emeraldtree import ElementTree as ET
 
 from MoinMoin.util.tree import moin_page
 
+
 class Converter(object):
     """
     Replace each smiley by the corresponding element in the DOM Tree
--- a/MoinMoin/converter/text_csv_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/text_csv_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -11,6 +11,8 @@
 import csv
 
 from ._table import TableMixin
+from ._util import decode_data, normalize_split_text
+
 
 class Converter(TableMixin):
     """
@@ -21,10 +23,9 @@
     def _factory(cls, type_input, type_output, **kw):
         return cls()
 
-    def __call__(self, content, arguments=None):
-        """
-        Parse the CSV text and return DOM tree.
-        """
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        content = normalize_split_text(text)
         # as of py 2.6.5 (and in the year 2010), the csv module seems to still
         # have troubles with unicode, thus we encode to utf-8 ...
         content = [line.encode('utf-8') for line in content]
--- a/MoinMoin/converter/text_in.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/converter/text_in.py	Mon Jul 18 02:06:34 2011 +0530
@@ -16,6 +16,8 @@
 from __future__ import absolute_import, division
 
 from MoinMoin.util.tree import moin_page
+from ._util import decode_data, normalize_split_text
+
 
 class Converter(object):
     """
@@ -26,8 +28,9 @@
     def _factory(cls, type_input, type_output, **kw):
         return cls()
 
-    def __call__(self, content, arguments=None):
-        """Parse the text and return DOM tree."""
+    def __call__(self, data, contenttype=None, arguments=None):
+        text = decode_data(data, contenttype)
+        content = normalize_split_text(text)
         blockcode = moin_page.blockcode()
         for line in content:
             if len(blockcode):
--- a/MoinMoin/items/__init__.py	Mon Jul 18 01:56:06 2011 +0530
+++ b/MoinMoin/items/__init__.py	Mon Jul 18 02:06:34 2011 +0530
@@ -219,9 +219,6 @@
         # override this in child classes
         return ''
 
-    def feed_input_conv(self):
-        return self.rev
-
     def internal_representation(self, converters=['smiley']):
         """
         Return the internal representation of a document using a DOM Tree
@@ -250,8 +247,7 @@
 
             # We can process the conversion
             links = Iri(scheme='wiki', authority='', path='/' + self.name)
-            input = self.feed_input_conv()
-            doc = input_conv(input)
+            doc = input_conv(self.rev, self.contenttype)
             # XXX is the following assuming that the top element of the doc tree
             # is a moin_page.page element? if yes, this is the wrong place to do that
             # as not every doc will have that element (e.g. for images, we just get
@@ -1105,9 +1101,6 @@
         """ convert data from storage format to memory format """
         return data.decode(config.charset).replace(u'\r\n', u'\n')
 
-    def feed_input_conv(self):
-        return self.data_storage_to_internal(self.data).split(u'\n')
-
     def _render_data_diff(self, oldrev, newrev):
         from MoinMoin.util.diff_html import diff
         old_text = self.data_storage_to_internal(oldrev.read())
@@ -1213,7 +1206,7 @@
 
         i = Iri(scheme='wiki', authority='', path='/' + self.name)
 
-        doc = input_conv(self.data_storage_to_internal(data).split(u'\n'))
+        doc = input_conv(self.rev, self.contenttype)
         doc.set(moin_page.page_href, unicode(i))
         doc = item_conv(doc)