changeset 221:faa06b4873dd

more contenttype related changes/fixes, see below change CONTENTTYPE constant, fix xml file fix PygmentsConverter to only use type/subtype, no params when calling get_lexer_for_mimetype() fix default mimetype of existing revisions to be 'application/octet-stream' (this is only used if there is no CONTENTTYPE metadata) Item: use mime.Type objects (not str) to find best-matching item class for text/* content types, use charset=utf-8 param same for application/docbook+xml;charset=utf-8 fix some calls to use MimeType.content_type() rather than .mime_type() rename class attr of Item subclasses from supported_mimetypes to supported_types fix strings there so they can be used for Type initialization modify tests accordingly maint/create_item.py: use --content-type
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Mon, 09 May 2011 19:06:11 +0200
parents 60242b1ba235
children f6b21ea7a2dd
files MoinMoin/config/__init__.py MoinMoin/converter/pygments_in.py MoinMoin/items/__init__.py MoinMoin/items/_tests/test_Item.py MoinMoin/script/maint/create_item.py MoinMoin/storage/backends/fileserver.py MoinMoin/storage/backends/fs19.py contrib/xml/preloaded_items.xml
diffstat 8 files changed, 77 insertions(+), 71 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/config/__init__.py	Sat May 07 20:23:40 2011 +0200
+++ b/MoinMoin/config/__init__.py	Mon May 09 19:06:11 2011 +0200
@@ -96,7 +96,7 @@
 # needs more precise name / use case:
 SOMEDICT = "somedict"
 
-CONTENTTYPE = "mimetype" # XXX change later to "content-type"
+CONTENTTYPE = "contenttype"
 SIZE = "size"
 LANGUAGE = "language"
 ITEMLINKS = "itemlinks"
--- a/MoinMoin/converter/pygments_in.py	Sat May 07 20:23:40 2011 +0200
+++ b/MoinMoin/converter/pygments_in.py	Mon May 09 19:06:11 2011 +0200
@@ -87,14 +87,16 @@
                 lexer = pygments.lexers.find_lexer_class(pygments_name)
                 return cls(lexer())
 
-        def __init__(self, lexer=None, mimetype=None):
+        def __init__(self, lexer=None, contenttype=None):
             """
             Create a Pygments Converter.
 
             :param lexer: pygments lexer instance
-            :param mimetype: mimetype to get a lexer for
+            :param contenttype: contenttype to get a lexer for
             """
-            if lexer is None and mimetype is not None:
+            if lexer is None and contenttype is not None:
+                ct = Type(contenttype)
+                mimetype = '%s/%s' % (ct.type, ct.subtype) # pygments can't process parameters (like e.g. ...;charset=utf-8)
                 try:
                     lexer = pygments.lexers.get_lexer_for_mimetype(mimetype)
                 except pygments.util.ClassNotFound:
--- a/MoinMoin/items/__init__.py	Sat May 07 20:23:40 2011 +0200
+++ b/MoinMoin/items/__init__.py	Mon May 09 19:06:11 2011 +0200
@@ -124,6 +124,7 @@
             logging.debug("Got item: %r" % name)
             try:
                 rev = item.get_revision(rev_no)
+                contenttype = 'application/octet-stream' # it exists
             except NoSuchRevisionError:
                 try:
                     rev = item.get_revision(-1) # fall back to current revision
@@ -133,21 +134,22 @@
                     rev = DummyRev(item, contenttype)
                     logging.debug("Item %r, created dummy revision with contenttype %r" % (name, contenttype))
             logging.debug("Got item %r, revision: %r" % (name, rev_no))
-        contenttype = rev.get(CONTENTTYPE) or contenttype # XXX: Why do we need ... or ... ?
+        contenttype = rev.get(CONTENTTYPE) or contenttype # use contenttype in case our metadata does not provide CONTENTTYPE
         logging.debug("Item %r, got contenttype %r from revision meta" % (name, contenttype))
         logging.debug("Item %r, rev meta dict: %r" % (name, dict(rev)))
 
-        def _find_item_class(contenttype, BaseClass, best_match_len=-1):
+        def _find_item_class(contenttype, BaseClass, best_match_len=-1): # XXX use MoinMoin.util.registry for this?
             #logging.debug("_find_item_class(%r,%r,%r)" % (contenttype, BaseClass, best_match_len))
+            ct = Type(contenttype)
             Class = None
             for ItemClass in BaseClass.__subclasses__():
-                for supported_mimetype in ItemClass.supported_mimetypes:
-                    if contenttype.startswith(supported_mimetype):
-                        match_len = len(supported_mimetype)
+                for supported_type in ItemClass.supported_types:
+                    if Type(supported_type).issupertype(ct): # XXX optimize by directly putting Type(...) into supported_types list
+                        match_len = len(supported_type)
                         if match_len > best_match_len:
                             best_match_len = match_len
                             Class = ItemClass
-                            #logging.debug("_find_item_class: new best match: %r by %r)" % (supported_mimetype, ItemClass))
+                            #logging.debug("_find_item_class: new best match: %r by %r)" % (supported_type, ItemClass))
                 best_match_len, better_Class = _find_item_class(contenttype, ItemClass, best_match_len)
                 if better_Class:
                     Class = better_Class
@@ -371,18 +373,18 @@
     def modify(self):
         # called from modify UI/POST
         data_file = request.files.get('data_file')
-        contenttype = request.values.get('contenttype', 'text/plain') # XXX
+        contenttype = request.values.get('contenttype', 'text/plain;charset=utf-8')
         if data_file and data_file.filename:
             # user selected a file to upload
             data = data_file.stream
-            contenttype = MimeType(filename=data_file.filename).mime_type()
+            contenttype = MimeType(filename=data_file.filename).content_type()
         else:
             # take text from textarea
-            data = request.form.get('data_text', '')
+            data = request.form.get('data_text', u'') # we get unicode from the form
             if data:
                 data = self.data_form_to_internal(data)
                 data = self.data_internal_to_storage(data)
-                contenttype = 'text/plain'
+                contenttype = 'text/plain;charset=utf-8' # XXX is there a way to get the charset of the form?
             else:
                 data = '' # could've been u'' also!
                 contenttype = None
@@ -518,22 +520,22 @@
 
 
 class NonExistent(Item):
-    supported_mimetypes = ['application/x-nonexistent']
+    supported_types = ['application/x-nonexistent', ]
     contenttype_groups = [
         ('markup text items', [
-            ('text/x.moin.wiki', 'Wiki (MoinMoin)'),
-            ('text/x.moin.creole', 'Wiki (Creole)'),
-            ('text/x-mediawiki', 'Wiki (MediaWiki)'),
-            ('text/x-rst', 'ReST'),
-            ('application/docbook+xml', 'DocBook'),
-            ('text/html', 'HTML'),
+            ('text/x.moin.wiki;charset=utf-8', 'Wiki (MoinMoin)'),
+            ('text/x.moin.creole;charset=utf-8', 'Wiki (Creole)'),
+            ('text/x-mediawiki;charset=utf-8', 'Wiki (MediaWiki)'),
+            ('text/x-rst;charset=utf-8', 'ReST'),
+            ('application/docbook+xml;charset=utf-8', 'DocBook'),
+            ('text/html;charset=utf-8', 'HTML'),
         ]),
         ('other text items', [
-            ('text/plain', 'plain text'),
-            ('text/x-diff', 'diff/patch'),
-            ('text/x-python', 'python code'),
-            ('text/csv', 'csv'),
-            ('text/x-irclog', 'IRC log'),
+            ('text/plain;charset=utf-8', 'plain text'),
+            ('text/x-diff;charset=utf-8', 'diff/patch'),
+            ('text/x-python;charset=utf-8', 'python code'),
+            ('text/csv;charset=utf-8', 'csv'),
+            ('text/x-irclog;charset=utf-8', 'IRC log'),
         ]),
         ('image items', [
             ('image/jpeg', 'JPEG'),
@@ -582,7 +584,7 @@
 
 class Binary(Item):
     """ An arbitrary binary item, fallback class for every item mimetype. """
-    supported_mimetypes = [''] # fallback, because every mimetype starts with ''
+    supported_types = ['*/*']
 
     modify_help = """\
 There is no help, you're doomed!
@@ -673,8 +675,9 @@
             try:
                 mimestr = rev[CONTENTTYPE]
             except KeyError:
-                mimestr = mimetypes.guess_type(rev.item.name)[0]
-            mt = MimeType(mimestr=mimestr)
+                mt = MimeType(filename=rev.item.name)
+            else:
+                mt = MimeType(mimestr=mimestr)
             content_disposition = mt.content_disposition(app.cfg)
             content_type = mt.content_type()
             content_length = rev[SIZE]
@@ -692,11 +695,11 @@
 
 class RenderableBinary(Binary):
     """ This is a base class for some binary stuff that renders with a object tag. """
-    supported_mimetypes = []
+    supported_types = []
 
 
 class Application(Binary):
-    supported_mimetypes = []
+    supported_types = []
 
 
 class TarMixin(object):
@@ -768,7 +771,7 @@
 
 
 class ApplicationXTar(TarMixin, Application):
-    supported_mimetypes = ['application/x-tar', 'application/x-gtar']
+    supported_types = ['application/x-tar', 'application/x-gtar', ]
 
     def feed_input_conv(self):
         return self.rev
@@ -802,48 +805,48 @@
 
 
 class ApplicationZip(ZipMixin, Application):
-    supported_mimetypes = ['application/zip']
+    supported_types = ['application/zip', ]
 
     def feed_input_conv(self):
         return self.rev
 
 
 class PDF(Application):
-    supported_mimetypes = ['application/pdf', ]
+    supported_types = ['application/pdf', ]
 
 
 class Video(Binary):
-    supported_mimetypes = ['video/', ]
+    supported_types = ['video/*', ]
 
 
 class Audio(Binary):
-    supported_mimetypes = ['audio/', ]
+    supported_types = ['audio/*', ]
 
 
 class Image(Binary):
     """ Any Image mimetype """
-    supported_mimetypes = ['image/', ]
+    supported_types = ['image/*', ]
 
 
 class RenderableImage(RenderableBinary):
     """ Any Image mimetype """
-    supported_mimetypes = []
+    supported_types = []
 
 
 class SvgImage(RenderableImage):
     """ SVG images use <object> tag mechanism from RenderableBinary base class """
-    supported_mimetypes = ['image/svg+xml']
+    supported_types = ['image/svg+xml', ]
 
 
 class RenderableBitmapImage(RenderableImage):
     """ PNG/JPEG/GIF images use <img> tag (better browser support than <object>) """
-    supported_mimetypes = [] # if mimetype is also transformable, please list
-                             # in TransformableImage ONLY!
+    supported_types = [] # if mimetype is also transformable, please list
+                         # in TransformableImage ONLY!
 
 
 class TransformableBitmapImage(RenderableBitmapImage):
     """ We can transform (resize, rotate, mirror) some image types """
-    supported_mimetypes = ['image/png', 'image/jpeg', 'image/gif', ]
+    supported_types = ['image/png', 'image/jpeg', 'image/gif', ]
 
     def _transform(self, content_type, size=None, transpose_op=None):
         """ resize to new size (optional), transpose according to exif infos,
@@ -983,7 +986,7 @@
 
 class Text(Binary):
     """ Any kind of text """
-    supported_mimetypes = ['text/']
+    supported_types = ['text/*', ]
 
     template = "modify_text.html"
 
@@ -1035,7 +1038,7 @@
         data_text = self.data_storage_to_internal(self.data)
         # TODO: use registry as soon as it is in there
         from MoinMoin.converter.pygments_in import Converter as PygmentsConverter
-        pygments_conv = PygmentsConverter(mimetype=self.contenttype)
+        pygments_conv = PygmentsConverter(contenttype=self.contenttype)
         doc = pygments_conv(data_text.split(u'\n'))
         # TODO: Real output format
         html_conv = reg.get(type_moin_document, Type('application/x-xhtml-moin-page'))
@@ -1102,22 +1105,22 @@
 
 class MoinWiki(MarkupItem):
     """ MoinMoin wiki markup """
-    supported_mimetypes = ['text/x.moin.wiki']
+    supported_types = ['text/x.moin.wiki', ]
 
 
 class CreoleWiki(MarkupItem):
     """ Creole wiki markup """
-    supported_mimetypes = ['text/x.moin.creole']
+    supported_types = ['text/x.moin.creole', ]
 
 
 class MediaWiki(MarkupItem):
     """ MediaWiki markup """
-    supported_mimetypes = ['text/x-mediawiki']
+    supported_types = ['text/x-mediawiki', ]
 
 
 class ReST(MarkupItem):
     """ ReStructured Text markup """
-    supported_mimetypes = ['text/x-rst']
+    supported_types = ['text/x-rst', ]
 
 
 class HTML(Text):
@@ -1130,7 +1133,7 @@
 
     Note: If raw revision data is accessed, unsafe stuff might be present!
     """
-    supported_mimetypes = ['text/html']
+    supported_types = ['text/html', ]
 
     template = "modify_text_html.html"
 
@@ -1160,7 +1163,7 @@
 
 class DocBook(MarkupItem):
     """ DocBook Document """
-    supported_mimetypes = ['application/docbook+xml']
+    supported_types = ['application/docbook+xml', ]
 
     def _convert(self, doc):
         from emeraldtree import ElementTree as ET
@@ -1189,7 +1192,7 @@
         tree.write(file_to_send, namespaces=output_namespaces)
 
         # We determine the different parameters for the reply
-        mt = MimeType(mimestr='application/docbook+xml')
+        mt = MimeType(mimestr='application/docbook+xml;charset=utf-8')
         content_disposition = mt.content_disposition(app.cfg)
         content_type = mt.content_type()
         # After creation of the StringIO, we are at the end of the file
@@ -1210,7 +1213,7 @@
     """
     drawings by TWikiDraw applet. It creates three files which are stored as tar file.
     """
-    supported_mimetypes = ["application/x-twikidraw"]
+    supported_types = ["application/x-twikidraw", ]
     modify_help = ""
     template = "modify_twikidraw.html"
 
@@ -1286,7 +1289,7 @@
     """
     drawings by AnyWikiDraw applet. It creates three files which are stored as tar file.
     """
-    supported_mimetypes = ["application/x-anywikidraw"]
+    supported_types = ["application/x-anywikidraw", ]
     modify_help = ""
     template = "modify_anywikidraw.html"
 
@@ -1362,7 +1365,7 @@
 class SvgDraw(TarMixin, Image):
     """ drawings by svg-edit. It creates two files (svg, png) which are stored as tar file. """
 
-    supported_mimetypes = ['application/x-svgdraw']
+    supported_types = ['application/x-svgdraw', ]
     modify_help = ""
     template = "modify_svg-edit.html"
 
--- a/MoinMoin/items/_tests/test_Item.py	Sat May 07 20:23:40 2011 +0200
+++ b/MoinMoin/items/_tests/test_Item.py	Mon May 09 19:06:11 2011 +0200
@@ -26,6 +26,7 @@
         for contenttype, ExpectedClass in [
                 ('application/x-foobar', Binary),
                 ('text/plain', Text),
+                ('text/plain;charset=utf-8', Text),
                 ('image/tiff', Image),
                 ('image/png', TransformableBitmapImage),
             ]:
@@ -34,7 +35,7 @@
 
     def testCRUD(self):
         name = u'NewItem'
-        contenttype = 'text/plain'
+        contenttype = 'text/plain;charset=utf-8'
         data = 'foobar'
         meta = dict(foo='bar')
         comment = u'saved it'
@@ -83,19 +84,19 @@
         basename = u'Foo'
         for name in ['', '/ab', '/cd/ef', '/gh', '/ij/kl', ]:
             item = Item.create(basename + name)
-            item._save({}, "foo", contenttype='text/plain')
+            item._save({}, "foo", contenttype='text/plain;charset=utf-8')
 
         # check index
         baseitem = Item.create(basename)
         index = baseitem.get_index()
-        assert index == [(u'Foo/ab', u'ab', 'text/plain'),
-                         (u'Foo/cd/ef', u'cd/ef', 'text/plain'),
-                         (u'Foo/gh', u'gh', 'text/plain'),
-                         (u'Foo/ij/kl', u'ij/kl', 'text/plain'),
+        assert index == [(u'Foo/ab', u'ab', 'text/plain;charset=utf-8'),
+                         (u'Foo/cd/ef', u'cd/ef', 'text/plain;charset=utf-8'),
+                         (u'Foo/gh', u'gh', 'text/plain;charset=utf-8'),
+                         (u'Foo/ij/kl', u'ij/kl', 'text/plain;charset=utf-8'),
                         ]
         flat_index = baseitem.flat_index()
-        assert flat_index == [(u'Foo/ab', u'ab', 'text/plain'),
-                              (u'Foo/gh', u'gh', 'text/plain'),
+        assert flat_index == [(u'Foo/ab', u'ab', 'text/plain;charset=utf-8'),
+                              (u'Foo/gh', u'gh', 'text/plain;charset=utf-8'),
                              ]
 
 
--- a/MoinMoin/script/maint/create_item.py	Sat May 07 20:23:40 2011 +0200
+++ b/MoinMoin/script/maint/create_item.py	Mon May 09 19:06:11 2011 +0200
@@ -23,8 +23,8 @@
             help='Name of the item to create'),
         Option('--file', '-f', dest='data_file', type=unicode, required=True,
             help='Filename of file to read in and store as item.'),
-        Option('--contenttype', '-m', dest='contenttype', type=unicode, required=True,
-            help='contenttype of item'),
+        Option('--content-type', '-m', dest='contenttype', type=unicode, required=True,
+            help='content-type of item'),
         Option('--comment', '-c', dest='comment', type=unicode,
             help='comment for item')
     )
--- a/MoinMoin/storage/backends/fileserver.py	Sat May 07 20:23:40 2011 +0200
+++ b/MoinMoin/storage/backends/fileserver.py	Mon May 09 19:06:11 2011 +0200
@@ -196,7 +196,7 @@
     """ A filesystem file """
     def __init__(self, item, revno):
         FileDirRevision.__init__(self, item, revno)
-        contenttype = MimeType(filename=self._fs_data_fname).mime_type()
+        contenttype = MimeType(filename=self._fs_data_fname).content_type()
         self._fs_meta.update({
             CONTENTTYPE: contenttype,
         })
--- a/MoinMoin/storage/backends/fs19.py	Sat May 07 20:23:40 2011 +0200
+++ b/MoinMoin/storage/backends/fs19.py	Mon May 09 19:06:11 2011 +0200
@@ -492,7 +492,7 @@
         # attachments in moin 1.9 were protected by their "parent" page's acl
         if item._fs_parent_acl is not None:
             meta[ACL] = item._fs_parent_acl # XXX not needed for acl_hierarchic
-        meta[CONTENTTYPE] = unicode(MimeType(filename=item._fs_attachname).mime_type())
+        meta[CONTENTTYPE] = unicode(MimeType(filename=item._fs_attachname).content_type())
         with open(attpath, 'rb') as f:
             size, hash_name, hash_digest = hash_hexdigest(f)
         meta[hash_name] = hash_digest
--- a/contrib/xml/preloaded_items.xml	Sat May 07 20:23:40 2011 +0200
+++ b/contrib/xml/preloaded_items.xml	Mon May 09 19:06:11 2011 +0200
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <backend><item name="Home"><meta></meta>
-<revision revno="0"><meta><entry key="mimetype"><str>text/x.moin.wiki</str>
+<revision revno="0"><meta><entry key="contenttype"><str>text/x.moin.wiki;charset=utf-8</str>
 </entry>
 <entry key="sha1"><str>71b13de8c73fe3c3b9dd1cac8848dd1ad2f5d1be</str>
 </entry>
@@ -33,7 +33,7 @@
 </revision>
 </item>
 <item name="rst"><meta></meta>
-<revision revno="0"><meta><entry key="mimetype"><str>text/x-rst</str>
+<revision revno="0"><meta><entry key="contenttype"><str>text/x-rst;charset=utf-8</str>
 </entry>
 <entry key="sha1"><str>f4785f356c041ddfd1e1efbafc73b32141805a50</str>
 </entry>
@@ -62,7 +62,7 @@
 </revision>
 </item>
 <item name="moin"><meta></meta>
-<revision revno="0"><meta><entry key="mimetype"><str>text/x.moin.wiki</str>
+<revision revno="0"><meta><entry key="contenttype"><str>text/x.moin.wiki;charset=utf-8</str>
 </entry>
 <entry key="sha1"><str>7042df8e761faffbbf1f08a87c1404f75f7b3e64</str>
 </entry>
@@ -116,7 +116,7 @@
 </revision>
 </item>
 <item name="creole"><meta></meta>
-<revision revno="0"><meta><entry key="mimetype"><str>text/x.moin.creole</str>
+<revision revno="0"><meta><entry key="contenttype"><str>text/x.moin.creole;charset=utf-8</str>
 </entry>
 <entry key="sha1"><str>342b4fb55122cd53583639792218c501b9f36488</str>
 </entry>
@@ -161,7 +161,7 @@
 </revision>
 </item>
 <item name="docbook"><meta></meta>
-<revision revno="0"><meta><entry key="mimetype"><str>application/docbook+xml</str>
+<revision revno="0"><meta><entry key="contenttype"><str>application/docbook+xml;charset=utf-8</str>
 </entry>
 <entry key="sha1"><str>20187313253eb0ffc0a608fcc73999da61147826</str>
 </entry>