changeset 173:89f50aed143f

move mimetype related code from wikiutil to util.mimetype
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Wed, 06 Apr 2011 16:46:54 +0200
parents 0b2454d84586
children e8f61cbd661b b728fdb660d7
files MoinMoin/items/__init__.py MoinMoin/storage/backends/fileserver.py MoinMoin/storage/backends/fs19.py MoinMoin/util/mimetype.py MoinMoin/util/plugins.py MoinMoin/wikiutil.py
diffstat 6 files changed, 217 insertions(+), 202 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/items/__init__.py	Wed Apr 06 03:49:00 2011 +0200
+++ b/MoinMoin/items/__init__.py	Wed Apr 06 16:46:54 2011 +0200
@@ -20,8 +20,10 @@
 import zipfile
 import tempfile
 from StringIO import StringIO
+
 from MoinMoin.security.textcha import TextCha, TextChaizedForm, TextChaValid
 from MoinMoin.util.forms import make_generator
+from MoinMoin.util.mimetype import MimeType
 
 try:
     import PIL
@@ -371,7 +373,7 @@
         if data_file and data_file.filename:
             # user selected a file to upload
             data = data_file.stream
-            mimetype = wikiutil.MimeType(filename=data_file.filename).mime_type()
+            mimetype = MimeType(filename=data_file.filename).mime_type()
         else:
             # take text from textarea
             data = request.form.get('data_text', '')
@@ -654,7 +656,7 @@
         filename = None
         if member: # content = file contained within a archive item revision
             path, filename = os.path.split(member)
-            mt = wikiutil.MimeType(filename=filename)
+            mt = MimeType(filename=filename)
             content_disposition = mt.content_disposition(app.cfg)
             content_type = mt.content_type()
             content_length = None
@@ -665,7 +667,7 @@
                 mimestr = rev[MIMETYPE]
             except KeyError:
                 mimestr = mimetypes.guess_type(rev.item.name)[0]
-            mt = wikiutil.MimeType(mimestr=mimestr)
+            mt = MimeType(mimestr=mimestr)
             content_disposition = mt.content_disposition(app.cfg)
             content_type = mt.content_type()
             content_length = rev[SIZE]
@@ -1173,7 +1175,7 @@
         tree.write(file_to_send, namespaces=output_namespaces)
 
         # We determine the different parameters for the reply
-        mt = wikiutil.MimeType(mimestr='application/docbook+xml')
+        mt = MimeType(mimestr='application/docbook+xml')
         content_disposition = mt.content_disposition(app.cfg)
         content_type = mt.content_type()
         # After creation of the StringIO, we are at the end of the file
--- a/MoinMoin/storage/backends/fileserver.py	Wed Apr 06 03:49:00 2011 +0200
+++ b/MoinMoin/storage/backends/fileserver.py	Wed Apr 06 16:46:54 2011 +0200
@@ -18,10 +18,11 @@
 from MoinMoin import log
 logging = log.getLogger(__name__)
 
-from MoinMoin import wikiutil, config
+from MoinMoin import config
 
 from MoinMoin.storage import Backend, Item, StoredRevision
 from MoinMoin.storage.error import NoSuchItemError, NoSuchRevisionError
+from MoinMoin.util.mimetype import MimeType
 
 from MoinMoin.config import ACL, MIMETYPE, ACTION, COMMENT, MTIME, SIZE
 
@@ -193,7 +194,7 @@
     """ A filesystem file """
     def __init__(self, item, revno):
         FileDirRevision.__init__(self, item, revno)
-        mimetype = wikiutil.MimeType(filename=self._fs_data_fname).mime_type()
+        mimetype = MimeType(filename=self._fs_data_fname).mime_type()
         self._fs_meta.update({
             MIMETYPE: mimetype,
         })
--- a/MoinMoin/storage/backends/fs19.py	Wed Apr 06 03:49:00 2011 +0200
+++ b/MoinMoin/storage/backends/fs19.py	Wed Apr 06 16:46:54 2011 +0200
@@ -31,7 +31,7 @@
 from MoinMoin import log
 logging = log.getLogger(__name__)
 
-from MoinMoin import wikiutil, config
+from MoinMoin import config
 from MoinMoin.config import ACL, MIMETYPE, UUID, NAME, NAME_OLD, REVERTED_TO, \
                             ACTION, ADDRESS, HOSTNAME, USERID, MTIME, EXTRA, COMMENT, \
                             IS_SYSITEM, SYSITEM_VERSION, \
@@ -39,10 +39,10 @@
 from MoinMoin.storage import Backend, Item, StoredRevision
 from MoinMoin.storage.backends._fsutils import quoteWikinameFS, unquoteWikiname
 from MoinMoin.storage.backends._flatutils import split_body
+from MoinMoin.storage.error import NoSuchItemError, NoSuchRevisionError
+from MoinMoin.util.mimetype import MimeType
 
 
-from MoinMoin.storage.error import NoSuchItemError, NoSuchRevisionError
-
 DELETED_MODE_KEEP = 'keep'
 DELETED_MODE_KILL = 'kill'
 
@@ -482,7 +482,7 @@
         # attachments in moin 1.9 were protected by their "parent" page's acl
         if item._fs_parent_acl is not None:
             meta[ACL] = item._fs_parent_acl # XXX not needed for acl_hierarchic
-        meta[MIMETYPE] = unicode(wikiutil.MimeType(filename=item._fs_attachname).mime_type())
+        meta[MIMETYPE] = unicode(MimeType(filename=item._fs_attachname).mime_type())
         size, hash_name, hash_digest = hash_hexdigest(open(attpath, 'rb'))
         meta[hash_name] = hash_digest
         meta[SIZE] = size
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/util/mimetype.py	Wed Apr 06 16:46:54 2011 +0200
@@ -0,0 +1,201 @@
+# Copyright: 2005-2011 MoinMoin:ThomasWaldmann
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+MoinMoin - mimetype support
+"""
+
+
+from __future__ import absolute_import, division
+
+import mimetypes
+
+from MoinMoin import config
+
+
+MIMETYPES_MORE = {
+ # OpenOffice 2.x & other open document stuff
+ '.odt': 'application/vnd.oasis.opendocument.text',
+ '.ods': 'application/vnd.oasis.opendocument.spreadsheet',
+ '.odp': 'application/vnd.oasis.opendocument.presentation',
+ '.odg': 'application/vnd.oasis.opendocument.graphics',
+ '.odc': 'application/vnd.oasis.opendocument.chart',
+ '.odf': 'application/vnd.oasis.opendocument.formula',
+ '.odb': 'application/vnd.oasis.opendocument.database',
+ '.odi': 'application/vnd.oasis.opendocument.image',
+ '.odm': 'application/vnd.oasis.opendocument.text-master',
+ '.ott': 'application/vnd.oasis.opendocument.text-template',
+ '.ots': 'application/vnd.oasis.opendocument.spreadsheet-template',
+ '.otp': 'application/vnd.oasis.opendocument.presentation-template',
+ '.otg': 'application/vnd.oasis.opendocument.graphics-template',
+ # some systems (like Mac OS X) don't have some of these:
+ '.patch': 'text/x-diff',
+ '.diff': 'text/x-diff',
+ '.py': 'text/x-python',
+ '.cfg': 'text/plain',
+ '.conf': 'text/plain',
+ '.irc': 'text/plain',
+ '.md5': 'text/plain',
+ '.csv': 'text/csv',
+ '.flv': 'video/x-flv',
+ '.wmv': 'video/x-ms-wmv',
+ '.swf': 'application/x-shockwave-flash',
+ '.moin': 'text/x.moin.wiki',
+ '.creole': 'text/x.moin.creole',
+}
+
+# add all mimetype patterns of pygments
+import pygments.lexers
+
+for name, short, patterns, mime in pygments.lexers.get_all_lexers():
+    for pattern in patterns:
+        if pattern.startswith('*.') and mime:
+            MIMETYPES_MORE[pattern[1:]] = mime[0]
+
+[mimetypes.add_type(mimetype, ext, True) for ext, mimetype in MIMETYPES_MORE.items()]
+
+MIMETYPES_sanitize_mapping = {
+    # this stuff is text, but got application/* for unknown reasons
+    ('application', 'docbook+xml'): ('text', 'docbook'),
+    ('application', 'x-latex'): ('text', 'latex'),
+    ('application', 'x-tex'): ('text', 'tex'),
+    ('application', 'javascript'): ('text', 'javascript'),
+}
+
+MIMETYPES_spoil_mapping = {} # inverse mapping of above
+for _key, _value in MIMETYPES_sanitize_mapping.items():
+    MIMETYPES_spoil_mapping[_value] = _key
+
+
+class MimeType(object):
+    """ represents a mimetype like text/plain """
+
+    def __init__(self, mimestr=None, filename=None):
+        self.major = self.minor = None # sanitized mime type and subtype
+        self.params = {} # parameters like "charset" or others
+        self.charset = None # this stays None until we know for sure!
+        self.raw_mimestr = mimestr
+        self.filename = filename
+        if mimestr:
+            self.parse_mimetype(mimestr)
+        elif filename:
+            self.parse_filename(filename)
+
+    def parse_filename(self, filename):
+        mtype, encoding = mimetypes.guess_type(filename)
+        if mtype is None:
+            mtype = 'application/octet-stream'
+        self.parse_mimetype(mtype)
+
+    def parse_mimetype(self, mimestr):
+        """ take a string like used in content-type and parse it into components,
+            alternatively it also can process some abbreviated string like "wiki"
+        """
+        parameters = mimestr.split(";")
+        parameters = [p.strip() for p in parameters]
+        mimetype, parameters = parameters[0], parameters[1:]
+        mimetype = mimetype.split('/')
+        if len(mimetype) >= 2:
+            major, minor = mimetype[:2] # we just ignore more than 2 parts
+        else:
+            major, minor = self.parse_format(mimetype[0])
+        self.major = major.lower()
+        self.minor = minor.lower()
+        for param in parameters:
+            key, value = param.split('=')
+            if value[0] == '"' and value[-1] == '"': # remove quotes
+                value = value[1:-1]
+            self.params[key.lower()] = value
+        if 'charset' in self.params:
+            self.charset = self.params['charset'].lower()
+        self.sanitize()
+
+    def parse_format(self, format):
+        """ maps from what we currently use on-page in a #format xxx processing
+            instruction to a sanitized mimetype major, minor tuple.
+            can also be user later for easier entry by the user, so he can just
+            type "wiki" instead of "text/x.moin.wiki".
+        """
+        format = format.lower()
+        if format in config.parser_text_mimetype:
+            mimetype = 'text', format
+        else:
+            mapping = {
+                'wiki': ('text', 'x.moin.wiki'),
+                'irc': ('text', 'irssi'),
+            }
+            try:
+                mimetype = mapping[format]
+            except KeyError:
+                mimetype = 'text', 'x-%s' % format
+        return mimetype
+
+    def sanitize(self):
+        """ convert to some representation that makes sense - this is not necessarily
+            conformant to /etc/mime.types or IANA listing, but if something is
+            readable text, we will return some ``text/*`` mimetype, not ``application/*``,
+            because we need text/plain as fallback and not application/octet-stream.
+        """
+        self.major, self.minor = MIMETYPES_sanitize_mapping.get((self.major, self.minor), (self.major, self.minor))
+
+    def spoil(self):
+        """ this returns something conformant to /etc/mime.type or IANA as a string,
+            kind of inverse operation of sanitize(), but doesn't change self
+        """
+        major, minor = MIMETYPES_spoil_mapping.get((self.major, self.minor), (self.major, self.minor))
+        return self.content_type(major, minor)
+
+    def content_type(self, major=None, minor=None, charset=None, params=None):
+        """ return a string suitable for Content-Type header
+        """
+        major = major or self.major
+        minor = minor or self.minor
+        params = params or self.params or {}
+        if major == 'text':
+            charset = charset or self.charset or params.get('charset', config.charset)
+            params['charset'] = charset
+        mimestr = "%s/%s" % (major, minor)
+        params = ['%s="%s"' % (key.lower(), value) for key, value in params.items()]
+        params.insert(0, mimestr)
+        return "; ".join(params)
+
+    def mime_type(self):
+        """ return a string major/minor only, no params """
+        return "%s/%s" % (self.major, self.minor)
+
+    def content_disposition(self, cfg):
+        # for dangerous files (like .html), when we are in danger of cross-site-scripting attacks,
+        # we just let the user store them to disk ('attachment').
+        # For safe files, we directly show them inline (this also works better for IE).
+        mime_type = self.mime_type()
+        dangerous = mime_type in cfg.mimetypes_xss_protect
+        content_disposition = dangerous and 'attachment' or 'inline'
+        filename = self.filename
+        if filename is not None:
+            # TODO: fix the encoding here, plain 8 bit is not allowed according to the RFCs
+            # There is no solution that is compatible to IE except stripping non-ascii chars
+            if isinstance(filename, unicode):
+                filename = filename.encode(config.charset)
+            content_disposition += '; filename="%s"' % filename
+        return content_disposition
+
+    def module_name(self):
+        """ convert this mimetype to a string useable as python module name,
+            we yield the exact module name first and then proceed to shorter
+            module names (useful for falling back to them, if the more special
+            module is not found) - e.g. first "text_python", next "text".
+            Finally, we yield "application_octet_stream" as the most general
+            mimetype we have.
+
+            Hint: the fallback handler module for text/* should be implemented
+            in module "text" (not "text_plain")
+        """
+        mimetype = self.mime_type()
+        modname = mimetype.replace("/", "_").replace("-", "_").replace(".", "_")
+        fragments = modname.split('_')
+        for length in range(len(fragments), 1, -1):
+            yield "_".join(fragments[:length])
+        yield self.raw_mimestr
+        yield fragments[0]
+        yield "application_octet_stream"
+
--- a/MoinMoin/util/plugins.py	Wed Apr 06 03:49:00 2011 +0200
+++ b/MoinMoin/util/plugins.py	Wed Apr 06 16:46:54 2011 +0200
@@ -17,6 +17,8 @@
 
 from MoinMoin import error
 from MoinMoin.util import pysupport
+from MoinMoin.util.mimetype import MimeType
+
 
 class PluginError(Exception):
     """ Base class for plugin errors """
--- a/MoinMoin/wikiutil.py	Wed Apr 06 03:49:00 2011 +0200
+++ b/MoinMoin/wikiutil.py	Wed Apr 06 16:46:54 2011 +0200
@@ -30,6 +30,7 @@
 
 from MoinMoin.i18n import _, L_, N_
 from MoinMoin.util import pysupport, lock
+from MoinMoin.util.mimetype import MimeType
 from MoinMoin.storage.error import NoSuchItemError, NoSuchRevisionError
 
 import werkzeug
@@ -209,198 +210,6 @@
 
 
 #############################################################################
-### mimetype support
-#############################################################################
-import mimetypes
-
-MIMETYPES_MORE = {
- # OpenOffice 2.x & other open document stuff
- '.odt': 'application/vnd.oasis.opendocument.text',
- '.ods': 'application/vnd.oasis.opendocument.spreadsheet',
- '.odp': 'application/vnd.oasis.opendocument.presentation',
- '.odg': 'application/vnd.oasis.opendocument.graphics',
- '.odc': 'application/vnd.oasis.opendocument.chart',
- '.odf': 'application/vnd.oasis.opendocument.formula',
- '.odb': 'application/vnd.oasis.opendocument.database',
- '.odi': 'application/vnd.oasis.opendocument.image',
- '.odm': 'application/vnd.oasis.opendocument.text-master',
- '.ott': 'application/vnd.oasis.opendocument.text-template',
- '.ots': 'application/vnd.oasis.opendocument.spreadsheet-template',
- '.otp': 'application/vnd.oasis.opendocument.presentation-template',
- '.otg': 'application/vnd.oasis.opendocument.graphics-template',
- # some systems (like Mac OS X) don't have some of these:
- '.patch': 'text/x-diff',
- '.diff': 'text/x-diff',
- '.py': 'text/x-python',
- '.cfg': 'text/plain',
- '.conf': 'text/plain',
- '.irc': 'text/plain',
- '.md5': 'text/plain',
- '.csv': 'text/csv',
- '.flv': 'video/x-flv',
- '.wmv': 'video/x-ms-wmv',
- '.swf': 'application/x-shockwave-flash',
- '.moin': 'text/x.moin.wiki',
- '.creole': 'text/x.moin.creole',
-}
-
-# add all mimetype patterns of pygments
-import pygments.lexers
-
-for name, short, patterns, mime in pygments.lexers.get_all_lexers():
-    for pattern in patterns:
-        if pattern.startswith('*.') and mime:
-            MIMETYPES_MORE[pattern[1:]] = mime[0]
-
-[mimetypes.add_type(mimetype, ext, True) for ext, mimetype in MIMETYPES_MORE.items()]
-
-MIMETYPES_sanitize_mapping = {
-    # this stuff is text, but got application/* for unknown reasons
-    ('application', 'docbook+xml'): ('text', 'docbook'),
-    ('application', 'x-latex'): ('text', 'latex'),
-    ('application', 'x-tex'): ('text', 'tex'),
-    ('application', 'javascript'): ('text', 'javascript'),
-}
-
-MIMETYPES_spoil_mapping = {} # inverse mapping of above
-for _key, _value in MIMETYPES_sanitize_mapping.items():
-    MIMETYPES_spoil_mapping[_value] = _key
-
-
-class MimeType(object):
-    """ represents a mimetype like text/plain """
-
-    def __init__(self, mimestr=None, filename=None):
-        self.major = self.minor = None # sanitized mime type and subtype
-        self.params = {} # parameters like "charset" or others
-        self.charset = None # this stays None until we know for sure!
-        self.raw_mimestr = mimestr
-        self.filename = filename
-        if mimestr:
-            self.parse_mimetype(mimestr)
-        elif filename:
-            self.parse_filename(filename)
-
-    def parse_filename(self, filename):
-        mtype, encoding = mimetypes.guess_type(filename)
-        if mtype is None:
-            mtype = 'application/octet-stream'
-        self.parse_mimetype(mtype)
-
-    def parse_mimetype(self, mimestr):
-        """ take a string like used in content-type and parse it into components,
-            alternatively it also can process some abbreviated string like "wiki"
-        """
-        parameters = mimestr.split(";")
-        parameters = [p.strip() for p in parameters]
-        mimetype, parameters = parameters[0], parameters[1:]
-        mimetype = mimetype.split('/')
-        if len(mimetype) >= 2:
-            major, minor = mimetype[:2] # we just ignore more than 2 parts
-        else:
-            major, minor = self.parse_format(mimetype[0])
-        self.major = major.lower()
-        self.minor = minor.lower()
-        for param in parameters:
-            key, value = param.split('=')
-            if value[0] == '"' and value[-1] == '"': # remove quotes
-                value = value[1:-1]
-            self.params[key.lower()] = value
-        if 'charset' in self.params:
-            self.charset = self.params['charset'].lower()
-        self.sanitize()
-
-    def parse_format(self, format):
-        """ maps from what we currently use on-page in a #format xxx processing
-            instruction to a sanitized mimetype major, minor tuple.
-            can also be user later for easier entry by the user, so he can just
-            type "wiki" instead of "text/x.moin.wiki".
-        """
-        format = format.lower()
-        if format in config.parser_text_mimetype:
-            mimetype = 'text', format
-        else:
-            mapping = {
-                'wiki': ('text', 'x.moin.wiki'),
-                'irc': ('text', 'irssi'),
-            }
-            try:
-                mimetype = mapping[format]
-            except KeyError:
-                mimetype = 'text', 'x-%s' % format
-        return mimetype
-
-    def sanitize(self):
-        """ convert to some representation that makes sense - this is not necessarily
-            conformant to /etc/mime.types or IANA listing, but if something is
-            readable text, we will return some ``text/*`` mimetype, not ``application/*``,
-            because we need text/plain as fallback and not application/octet-stream.
-        """
-        self.major, self.minor = MIMETYPES_sanitize_mapping.get((self.major, self.minor), (self.major, self.minor))
-
-    def spoil(self):
-        """ this returns something conformant to /etc/mime.type or IANA as a string,
-            kind of inverse operation of sanitize(), but doesn't change self
-        """
-        major, minor = MIMETYPES_spoil_mapping.get((self.major, self.minor), (self.major, self.minor))
-        return self.content_type(major, minor)
-
-    def content_type(self, major=None, minor=None, charset=None, params=None):
-        """ return a string suitable for Content-Type header
-        """
-        major = major or self.major
-        minor = minor or self.minor
-        params = params or self.params or {}
-        if major == 'text':
-            charset = charset or self.charset or params.get('charset', config.charset)
-            params['charset'] = charset
-        mimestr = "%s/%s" % (major, minor)
-        params = ['%s="%s"' % (key.lower(), value) for key, value in params.items()]
-        params.insert(0, mimestr)
-        return "; ".join(params)
-
-    def mime_type(self):
-        """ return a string major/minor only, no params """
-        return "%s/%s" % (self.major, self.minor)
-
-    def content_disposition(self, cfg):
-        # for dangerous files (like .html), when we are in danger of cross-site-scripting attacks,
-        # we just let the user store them to disk ('attachment').
-        # For safe files, we directly show them inline (this also works better for IE).
-        mime_type = self.mime_type()
-        dangerous = mime_type in cfg.mimetypes_xss_protect
-        content_disposition = dangerous and 'attachment' or 'inline'
-        filename = self.filename
-        if filename is not None:
-            # TODO: fix the encoding here, plain 8 bit is not allowed according to the RFCs
-            # There is no solution that is compatible to IE except stripping non-ascii chars
-            if isinstance(filename, unicode):
-                filename = filename.encode(config.charset)
-            content_disposition += '; filename="%s"' % filename
-        return content_disposition
-
-    def module_name(self):
-        """ convert this mimetype to a string useable as python module name,
-            we yield the exact module name first and then proceed to shorter
-            module names (useful for falling back to them, if the more special
-            module is not found) - e.g. first "text_python", next "text".
-            Finally, we yield "application_octet_stream" as the most general
-            mimetype we have.
-
-            Hint: the fallback handler module for text/* should be implemented
-            in module "text" (not "text_plain")
-        """
-        mimetype = self.mime_type()
-        modname = mimetype.replace("/", "_").replace("-", "_").replace(".", "_")
-        fragments = modname.split('_')
-        for length in range(len(fragments), 1, -1):
-            yield "_".join(fragments[:length])
-        yield self.raw_mimestr
-        yield fragments[0]
-        yield "application_octet_stream"
-
-
-#############################################################################
 ### Misc
 #############################################################################