view MoinMoin/util/ @ 1973:286e315935ae

refactor usage of constants don't import from config module, but from constants.* use uppercase for constant names
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sun, 10 Feb 2013 22:50:21 +0100
parents 4ac437141bbe
children 384555088cab
line wrap: on
line source
# Copyright: 2005-2011 MoinMoin:ThomasWaldmann
# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.

MoinMoin - mimetype support

from __future__ import absolute_import, division

import mimetypes

from MoinMoin.constants.contenttypes import PARSER_TEXT_MIMETYPE

# prevents unexpected results on Windows
# see

 # OpenOffice 2.x & other open document stuff
 '.odt': 'application/vnd.oasis.opendocument.text',
 '.ods': 'application/vnd.oasis.opendocument.spreadsheet',
 '.odp': 'application/vnd.oasis.opendocument.presentation',
 '.odg': 'application/',
 '.odc': 'application/vnd.oasis.opendocument.chart',
 '.odf': 'application/vnd.oasis.opendocument.formula',
 '.odb': 'application/vnd.oasis.opendocument.database',
 '.odi': 'application/vnd.oasis.opendocument.image',
 '.odm': 'application/vnd.oasis.opendocument.text-master',
 '.ott': 'application/vnd.oasis.opendocument.text-template',
 '.ots': 'application/vnd.oasis.opendocument.spreadsheet-template',
 '.otp': 'application/vnd.oasis.opendocument.presentation-template',
 '.otg': 'application/',
 # some systems (like Mac OS X) don't have some of these:
 '.patch': 'text/x-diff',
 '.diff': 'text/x-diff',
 '.py': 'text/x-python',
 '.cfg': 'text/plain',
 '.conf': 'text/plain',
 '.irc': 'text/plain',
 '.md5': 'text/plain',
 '.csv': 'text/csv',
 '.rst': 'text/x-rst',
 '.flv': 'video/x-flv',
 '.wmv': 'video/x-ms-wmv',
 '.wma': 'audio/x-ms-wma',
 '.swf': 'application/x-shockwave-flash',
 '.awd': 'application/x-anywikidraw',
 '.twd': 'application/x-twikidraw',
 '.swd': 'application/x-svgdraw',
 '.dbx': 'application/docbook+xml',
 '.moin': 'text/',
 '.creole': 'text/x.moin.creole',
 '.mediawiki': 'text/x-mediawiki',
 '.ico': 'image/x-icon',
 '.svg': 'image/svg+xml'

# add all mimetype patterns of pygments
import pygments.lexers

for name, short, patterns, mime in pygments.lexers.get_all_lexers():
    for pattern in patterns:
        if pattern.startswith('*.') and mime:
            MIMETYPES_MORE[pattern[1:]] = mime[0]

[mimetypes.add_type(mimetype, ext, True) for ext, mimetype in MIMETYPES_MORE.items()]

MIMETYPES_sanitize_mapping = {
    # this stuff is text, but got application/* for unknown reasons
    ('application', 'docbook+xml'): ('text', 'docbook'),
    ('application', 'x-latex'): ('text', 'latex'),
    ('application', 'x-tex'): ('text', 'tex'),
    ('application', 'javascript'): ('text', 'javascript'),

MIMETYPES_spoil_mapping = {} # inverse mapping of above
for _key, _value in MIMETYPES_sanitize_mapping.items():
    MIMETYPES_spoil_mapping[_value] = _key

class MimeType(object):
    """ represents a mimetype like text/plain """

    def __init__(self, mimestr=None, filename=None):
        self.major = self.minor = None # sanitized mime type and subtype
        self.params = {} # parameters like "charset" or others
        self.charset = None # this stays None until we know for sure!
        self.raw_mimestr = mimestr
        self.filename = filename
        if mimestr:
        elif filename:

    def parse_filename(self, filename):
        mtype, encoding = mimetypes.guess_type(filename)
        if mtype is None:
            mtype = 'application/octet-stream'

    def parse_mimetype(self, mimestr):
        """ take a string like used in content-type and parse it into components,
            alternatively it also can process some abbreviated string like "wiki"
        parameters = mimestr.split(";")
        parameters = [p.strip() for p in parameters]
        mimetype, parameters = parameters[0], parameters[1:]
        mimetype = mimetype.split('/')
        if len(mimetype) >= 2:
            major, minor = mimetype[:2] # we just ignore more than 2 parts
            major, minor = self.parse_format(mimetype[0])
        self.major = major.lower()
        self.minor = minor.lower()
        for param in parameters:
            key, value = param.split('=')
            if value[0] == '"' and value[-1] == '"': # remove quotes
                value = value[1:-1]
            self.params[key.lower()] = value
        if 'charset' in self.params:
            self.charset = self.params['charset'].lower()

    def parse_format(self, format):
        """ maps from what we currently use on-page in a #format xxx processing
            instruction to a sanitized mimetype major, minor tuple.
            can also be user later for easier entry by the user, so he can just
            type "wiki" instead of "text/".
        format = format.lower()
        if format in PARSER_TEXT_MIMETYPE:
            mimetype = 'text', format
            mapping = {
                'wiki': ('text', ''),
                'irc': ('text', 'irssi'),
                mimetype = mapping[format]
            except KeyError:
                mimetype = 'text', 'x-{0}'.format(format)
        return mimetype

    def sanitize(self):
        """ convert to some representation that makes sense - this is not necessarily
            conformant to /etc/mime.types or IANA listing, but if something is
            readable text, we will return some ``text/*`` mimetype, not ``application/*``,
            because we need text/plain as fallback and not application/octet-stream.
        self.major, self.minor = MIMETYPES_sanitize_mapping.get((self.major, self.minor), (self.major, self.minor))

    def spoil(self):
        """ this returns something conformant to /etc/mime.type or IANA as a string,
            kind of inverse operation of sanitize(), but doesn't change self
        major, minor = MIMETYPES_spoil_mapping.get((self.major, self.minor), (self.major, self.minor))
        return self.content_type(major, minor)

    def content_type(self, major=None, minor=None, charset=None, params=None):
        """ return a string suitable for Content-Type header
        major = major or self.major
        minor = minor or self.minor
        params = params or self.params or {}
        if major == 'text':
            charset = charset or self.charset
            if charset:
                params['charset'] = charset
        mimestr = "{0}/{1}".format(major, minor)
        params = ['{0}="{1}"'.format(key.lower(), value) for key, value in params.items()]
        params.insert(0, mimestr)
        return "; ".join(params)

    def mime_type(self):
        """ return a string major/minor only, no params """
        return "{0}/{1}".format(self.major, self.minor)

    def as_attachment(self, cfg):
        # for dangerous files (like .html), when we are in danger of cross-site-scripting attacks,
        # we just let the user store them to disk ('attachment').
        # For safe files, we directly show them inline (this also works better for IE).
        mime_type = self.mime_type()
        return mime_type in cfg.mimetypes_xss_protect

    def module_name(self):
        """ convert this mimetype to a string useable as python module name,
            we yield the exact module name first and then proceed to shorter
            module names (useful for falling back to them, if the more special
            module is not found) - e.g. first "text_python", next "text".
            Finally, we yield "application_octet_stream" as the most general
            mimetype we have.

            Hint: the fallback handler module for text/* should be implemented
            in module "text" (not "text_plain")
        mimetype = self.mime_type()
        modname = mimetype.replace("/", "_").replace("-", "_").replace(".", "_")
        fragments = modname.split('_')
        for length in range(len(fragments), 1, -1):
            yield "_".join(fragments[:length])
        yield self.raw_mimestr
        yield fragments[0]
        yield "application_octet_stream"