view MoinMoin/script/migration/moin19/ @ 1973:286e315935ae

refactor usage of constants don't import from config module, but from constants.* use uppercase for constant names
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Sun, 10 Feb 2013 22:50:21 +0100
parents 4ac437141bbe
line wrap: on
line source
# Copyright: 2010 MoinMoin:ThomasWaldmann
# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.

MoinMoin - helpers for 1.9 migration

import re

from MoinMoin.constants.keys import NAME, ACL, CONTENTTYPE, MTIME, LANGUAGE

CHARSET = 'utf-8'

# Precompiled patterns for file name [un]quoting
UNSAFE = re.compile(r'[^a-zA-Z0-9_]+')
QUOTED = re.compile(r'\(([a-fA-F0-9]+)\)')

def split_body(body):
    Extract the processing instructions / acl / etc. at the beginning of a page's body.

    Hint: if you have a Page object p, you already have the result of this function in
          p.meta and (even better) parsed/processed stuff in p.pi.

    Returns a list of (pi, restofline) tuples and a string with the rest of the body.
    pi = {}
    while body.startswith('#'):
            line, body = body.split('\n', 1) # extract first line
            line = line.rstrip('\r')
        except ValueError:
            line = body
            body = ''

        # end parsing on empty (invalid) PI
        if line == "#":
            body = line + '\n' + body

        if line[1] == '#':# two hash marks are a comment
            comment = line[2:]
            if not comment.startswith(' '):
                # we don't require a blank after the ##, so we put one there
                comment = ' ' + comment
                line = '##{0}'.format(comment)

        verb, args = (line[1:] + ' ').split(' ', 1) # split at the first blank
        pi.setdefault(verb.lower(), []).append(args.strip())

    for key, value in pi.iteritems():
        if key in ['#', ]:
            # transform the lists to tuples:
            pi[key] = tuple(value)
        elif key in ['acl', ]:
            # join the list of values to a single value
            pi[key] = u' '.join(value)
            # for keys that can't occur multiple times, don't use a list:
            pi[key] = value[-1] # use the last value to copy 1.9 parsing behaviour

    return pi, body

def add_metadata_to_body(metadata, data):
    Adds the processing instructions to the data.

    metadata_data = ""
    for key, value in metadata.iteritems():
        if key not in meta_keys:
        # special handling for list metadata
        if isinstance(value, (list, tuple)):
            for line in value:
                metadata_data += "#{0} {1}\n".format(key, line)
            metadata_data += "#{0} {1}\n".format(key, value)
    return metadata_data + data

def quoteWikinameFS(wikiname, charset=CHARSET):
    Return file system representation of a Unicode WikiName.

    Warning: will raise UnicodeError if wikiname can not be encoded using
    charset. The default value 'utf-8' can encode any character.

    :param wikiname: wiki name [unicode]
    :param charset: charset to encode string (before quoting)
    :rtype: string
    :returns: quoted name, safe for any file system
    filename = wikiname.encode(charset)

    quoted = []
    location = 0
    for needle in UNSAFE.finditer(filename):
        # append leading safe stuff
        location = needle.end()
        # Quote and append unsafe stuff
        for character in

    # append rest of string
    return ''.join(quoted)

class InvalidFileNameError(Exception):
    """ Called when we find an invalid file name """

def unquoteWikiname(filename, charset=CHARSET):
    Return Unicode WikiName from quoted file name.

    raises an InvalidFileNameError in case of unquoting problems.

    :param filename: quoted wiki name
    :param charset: charset to use for decoding (after unquoting)
    :rtype: unicode
    :returns: WikiName
    # From some places we get called with Unicode strings
    if isinstance(filename, unicode):
        filename = filename.encode(CHARSET)

    parts = []
    start = 0
    for needle in QUOTED.finditer(filename):
        # append leading unquoted stuff
        start = needle.end()
        # Append quoted stuff
        group =
        # Filter invalid filenames
        if (len(group) % 2 != 0):
            raise InvalidFileNameError(filename)
            for i in range(0, len(group), 2):
                byte = group[i:i+2]
                character = chr(int(byte, 16))
        except ValueError:
            # byte not in hex, e.g 'xy'
            raise InvalidFileNameError(filename)

    # append rest of string
    if start == 0:
        wikiname = filename
        wikiname = ''.join(parts)

    return wikiname.decode(charset)