MoinMoin/wikiutil.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Wed, 11 Feb 2009 02:34:33 +0100
changeset 4569 3caaa8c74c41
parent 4567 6ac8b2f4cdac
child 4607 d8e5e9cfadf1
permissions -rw-r--r--
wikiutil: replace moin's cgi/urllib wrappers by calls to werkzeug.utils code
     1 # -*- coding: iso-8859-1 -*-
     2 """
     3     MoinMoin - Wiki Utility Functions
     4 
     5     @copyright: 2000-2004 Juergen Hermann <jh@web.de>,
     6                 2004 by Florian Festi,
     7                 2006 by Mikko Virkkil,
     8                 2005-2008 MoinMoin:ThomasWaldmann,
     9                 2007 MoinMoin:ReimarBauer
    10     @license: GNU GPL, see COPYING for details.
    11 """
    12 
    13 import cgi
    14 import codecs
    15 import os
    16 import re
    17 import time
    18 import urllib
    19 
    20 from MoinMoin import log
    21 logging = log.getLogger(__name__)
    22 
    23 from MoinMoin import config
    24 from MoinMoin.util import pysupport, lock
    25 from MoinMoin.support.python_compatibility import rsplit
    26 from inspect import getargspec, isfunction, isclass, ismethod
    27 
    28 from MoinMoin import web # needed so that next line works:
    29 import werkzeug.utils
    30 
    31 # Exceptions
    32 class InvalidFileNameError(Exception):
    33     """ Called when we find an invalid file name """
    34     pass
    35 
    36 # constants for page names
    37 PARENT_PREFIX = "../"
    38 PARENT_PREFIX_LEN = len(PARENT_PREFIX)
    39 CHILD_PREFIX = "/"
    40 CHILD_PREFIX_LEN = len(CHILD_PREFIX)
    41 
    42 #############################################################################
    43 ### Getting data from user/Sending data to user
    44 #############################################################################
    45 
    46 def decodeUnknownInput(text):
    47     """ Decode unknown input, like text attachments
    48 
    49     First we try utf-8 because it has special format, and it will decode
    50     only utf-8 files. Then we try config.charset, then iso-8859-1 using
    51     'replace'. We will never raise an exception, but may return junk
    52     data.
    53 
    54     WARNING: Use this function only for data that you view, not for data
    55     that you save in the wiki.
    56 
    57     @param text: the text to decode, string
    58     @rtype: unicode
    59     @return: decoded text (maybe wrong)
    60     """
    61     # Shortcut for unicode input
    62     if isinstance(text, unicode):
    63         return text
    64 
    65     try:
    66         return unicode(text, 'utf-8')
    67     except UnicodeError:
    68         if config.charset not in ['utf-8', 'iso-8859-1']:
    69             try:
    70                 return unicode(text, config.charset)
    71             except UnicodeError:
    72                 pass
    73         return unicode(text, 'iso-8859-1', 'replace')
    74 
    75 
    76 def decodeUserInput(s, charsets=[config.charset]):
    77     """
    78     Decodes input from the user.
    79 
    80     @param s: the string to unquote
    81     @param charsets: list of charsets to assume the string is in
    82     @rtype: unicode
    83     @return: the unquoted string as unicode
    84     """
    85     for charset in charsets:
    86         try:
    87             return s.decode(charset)
    88         except UnicodeError:
    89             pass
    90     raise UnicodeError('The string %r cannot be decoded.' % s)
    91 
    92 
    93 def url_quote(s, safe='/', want_unicode=None):
    94     """ see werkzeug.utils.url_quote, we use a different safe param default value """
    95     try:
    96         assert want_unicode is None
    97     except AssertionError:
    98         log.exception("call with deprecated want_unicode param, please fix caller")
    99     return werkzeug.utils.url_quote(s, charset=config.charset, safe=safe)
   100 
   101 def url_quote_plus(s, safe='/', want_unicode=None):
   102     """ see werkzeug.utils.url_quote_plus, we use a different safe param default value """
   103     try:
   104         assert want_unicode is None
   105     except AssertionError:
   106         log.exception("call with deprecated want_unicode param, please fix caller")
   107     return werkzeug.utils.url_quote_plus(s, charset=config.charset, safe=safe)
   108 
   109 def url_unquote(s, want_unicode=None):
   110     """ see werkzeug.utils.url_unquote """
   111     try:
   112         assert want_unicode is None
   113     except AssertionError:
   114         log.exception("call with deprecated want_unicode param, please fix caller")
   115     return werkzeug.utils.url_unquote(s, charset=config.charset, errors='fallback:iso-8859-1')
   116 
   117 
   118 def parseQueryString(qstr, want_unicode=None):
   119     """ see werkzeug.utils.url_decode """
   120     try:
   121         assert want_unicode is None
   122     except AssertionError:
   123         log.exception("call with deprecated want_unicode param, please fix caller")
   124     return werkzeug.utils.url_decode(qstr, charset=config.charset, errors='fallback:iso-8859-1',
   125                                      decode_keys=False, include_empty=False)
   126 
   127 def makeQueryString(qstr=None, want_unicode=None, **kw):
   128     """ Make a querystring from arguments.
   129 
   130     kw arguments overide values in qstr.
   131 
   132     If a string is passed in, it's returned verbatim and keyword parameters are ignored.
   133 
   134     See also: werkzeug.utils.url_encode
   135 
   136     @param qstr: dict to format as query string, using either ascii or unicode
   137     @param kw: same as dict when using keywords, using ascii or unicode
   138     @rtype: string
   139     @return: query string ready to use in a url
   140     """
   141     try:
   142         assert want_unicode is None
   143     except AssertionError:
   144         log.exception("call with deprecated want_unicode param, please fix caller")
   145     if qstr is None:
   146         qstr = {}
   147     elif isinstance(qstr, (str, unicode)):
   148         return qstr
   149     if isinstance(qstr, dict):
   150         qstr.update(kw)
   151         return werkzeug.utils.url_encode(qstr, charset=config.charset, encode_keys=True)
   152     else:
   153         raise ValueError("Unsupported argument type, should be dict.")
   154 
   155 
   156 def quoteWikinameURL(pagename, charset=config.charset):
   157     """ Return a url encoding of filename in plain ascii
   158 
   159     Use urllib.quote to quote any character that is not always safe.
   160 
   161     @param pagename: the original pagename (unicode)
   162     @param charset: url text encoding, 'utf-8' recommended. Other charset
   163                     might not be able to encode the page name and raise
   164                     UnicodeError. (default config.charset ('utf-8')).
   165     @rtype: string
   166     @return: the quoted filename, all unsafe characters encoded
   167     """
   168     # XXX please note that urllib.quote and werkzeug.utils.url_quote have
   169     # XXX different defaults for safe=...
   170     return werkzeug.utils.url_quote(pagename, charset=charset, safe='/')
   171 
   172 
   173 escape = werkzeug.utils.escape
   174 
   175 
   176 def clean_input(text, max_len=201):
   177     """ Clean input:
   178         replace CR, LF, TAB by whitespace
   179         delete control chars
   180 
   181         @param text: unicode text to clean
   182         @rtype: unicode
   183         @return: cleaned text
   184     """
   185     # we only have input fields with max 200 chars, but spammers send us more
   186     length = len(text)
   187     if length == 0 or length > max_len:
   188         return u''
   189     else:
   190         return text.translate(config.clean_input_translation_map)
   191 
   192 
   193 def make_breakable(text, maxlen):
   194     """ make a text breakable by inserting spaces into nonbreakable parts
   195     """
   196     text = text.split(" ")
   197     newtext = []
   198     for part in text:
   199         if len(part) > maxlen:
   200             while part:
   201                 newtext.append(part[:maxlen])
   202                 part = part[maxlen:]
   203         else:
   204             newtext.append(part)
   205     return " ".join(newtext)
   206 
   207 ########################################################################
   208 ### Storage
   209 ########################################################################
   210 
   211 # Precompiled patterns for file name [un]quoting
   212 UNSAFE = re.compile(r'[^a-zA-Z0-9_]+')
   213 QUOTED = re.compile(r'\(([a-fA-F0-9]+)\)')
   214 
   215 
   216 def quoteWikinameFS(wikiname, charset=config.charset):
   217     """ Return file system representation of a Unicode WikiName.
   218 
   219     Warning: will raise UnicodeError if wikiname can not be encoded using
   220     charset. The default value of config.charset, 'utf-8' can encode any
   221     character.
   222 
   223     @param wikiname: Unicode string possibly containing non-ascii characters
   224     @param charset: charset to encode string
   225     @rtype: string
   226     @return: quoted name, safe for any file system
   227     """
   228     filename = wikiname.encode(charset)
   229 
   230     quoted = []
   231     location = 0
   232     for needle in UNSAFE.finditer(filename):
   233         # append leading safe stuff
   234         quoted.append(filename[location:needle.start()])
   235         location = needle.end()
   236         # Quote and append unsafe stuff
   237         quoted.append('(')
   238         for character in needle.group():
   239             quoted.append('%02x' % ord(character))
   240         quoted.append(')')
   241 
   242     # append rest of string
   243     quoted.append(filename[location:])
   244     return ''.join(quoted)
   245 
   246 
   247 def unquoteWikiname(filename, charsets=[config.charset]):
   248     """ Return Unicode WikiName from quoted file name.
   249 
   250     We raise an InvalidFileNameError if we find an invalid name, so the
   251     wiki could alarm the admin or suggest the user to rename a page.
   252     Invalid file names should never happen in normal use, but are rather
   253     cheap to find.
   254 
   255     This function should be used only to unquote file names, not page
   256     names we receive from the user. These are handled in request by
   257     urllib.unquote, decodePagename and normalizePagename.
   258 
   259     Todo: search clients of unquoteWikiname and check for exceptions.
   260 
   261     @param filename: string using charset and possibly quoted parts
   262     @param charsets: list of charsets used by string
   263     @rtype: Unicode String
   264     @return: WikiName
   265     """
   266     ### Temporary fix start ###
   267     # From some places we get called with Unicode strings
   268     if isinstance(filename, type(u'')):
   269         filename = filename.encode(config.charset)
   270     ### Temporary fix end ###
   271 
   272     parts = []
   273     start = 0
   274     for needle in QUOTED.finditer(filename):
   275         # append leading unquoted stuff
   276         parts.append(filename[start:needle.start()])
   277         start = needle.end()
   278         # Append quoted stuff
   279         group = needle.group(1)
   280         # Filter invalid filenames
   281         if (len(group) % 2 != 0):
   282             raise InvalidFileNameError(filename)
   283         try:
   284             for i in range(0, len(group), 2):
   285                 byte = group[i:i+2]
   286                 character = chr(int(byte, 16))
   287                 parts.append(character)
   288         except ValueError:
   289             # byte not in hex, e.g 'xy'
   290             raise InvalidFileNameError(filename)
   291 
   292     # append rest of string
   293     if start == 0:
   294         wikiname = filename
   295     else:
   296         parts.append(filename[start:len(filename)])
   297         wikiname = ''.join(parts)
   298 
   299     # FIXME: This looks wrong, because at this stage "()" can be both errors
   300     # like open "(" without close ")", or unquoted valid characters in the file name.
   301     # Filter invalid filenames. Any left (xx) must be invalid
   302     #if '(' in wikiname or ')' in wikiname:
   303     #    raise InvalidFileNameError(filename)
   304 
   305     wikiname = decodeUserInput(wikiname, charsets)
   306     return wikiname
   307 
   308 # time scaling
   309 def timestamp2version(ts):
   310     """ Convert UNIX timestamp (may be float or int) to our version
   311         (long) int.
   312         We don't want to use floats, so we just scale by 1e6 to get
   313         an integer in usecs.
   314     """
   315     return long(ts*1000000L) # has to be long for py 2.2.x
   316 
   317 def version2timestamp(v):
   318     """ Convert version number to UNIX timestamp (float).
   319         This must ONLY be used for display purposes.
   320     """
   321     return v / 1000000.0
   322 
   323 
   324 # This is the list of meta attribute names to be treated as integers.
   325 # IMPORTANT: do not use any meta attribute names with "-" (or any other chars
   326 # invalid in python attribute names), use e.g. _ instead.
   327 INTEGER_METAS = ['current', 'revision', # for page storage (moin 2.0)
   328                  'data_format_revision', # for data_dir format spec (use by mig scripts)
   329                 ]
   330 
   331 class MetaDict(dict):
   332     """ store meta informations as a dict.
   333     """
   334     def __init__(self, metafilename, cache_directory):
   335         """ create a MetaDict from metafilename """
   336         dict.__init__(self)
   337         self.metafilename = metafilename
   338         self.dirty = False
   339         lock_dir = os.path.join(cache_directory, '__metalock__')
   340         self.rlock = lock.ReadLock(lock_dir, 60.0)
   341         self.wlock = lock.WriteLock(lock_dir, 60.0)
   342 
   343         if not self.rlock.acquire(3.0):
   344             raise EnvironmentError("Could not lock in MetaDict")
   345         try:
   346             self._get_meta()
   347         finally:
   348             self.rlock.release()
   349 
   350     def _get_meta(self):
   351         """ get the meta dict from an arbitrary filename.
   352             does not keep state, does uncached, direct disk access.
   353             @param metafilename: the name of the file to read
   354             @return: dict with all values or {} if empty or error
   355         """
   356 
   357         try:
   358             metafile = codecs.open(self.metafilename, "r", "utf-8")
   359             meta = metafile.read() # this is much faster than the file's line-by-line iterator
   360             metafile.close()
   361         except IOError:
   362             meta = u''
   363         for line in meta.splitlines():
   364             key, value = line.split(':', 1)
   365             value = value.strip()
   366             if key in INTEGER_METAS:
   367                 value = int(value)
   368             dict.__setitem__(self, key, value)
   369 
   370     def _put_meta(self):
   371         """ put the meta dict into an arbitrary filename.
   372             does not keep or modify state, does uncached, direct disk access.
   373             @param metafilename: the name of the file to write
   374             @param metadata: dict of the data to write to the file
   375         """
   376         meta = []
   377         for key, value in self.items():
   378             if key in INTEGER_METAS:
   379                 value = str(value)
   380             meta.append("%s: %s" % (key, value))
   381         meta = '\r\n'.join(meta)
   382 
   383         metafile = codecs.open(self.metafilename, "w", "utf-8")
   384         metafile.write(meta)
   385         metafile.close()
   386         self.dirty = False
   387 
   388     def sync(self, mtime_usecs=None):
   389         """ No-Op except for that parameter """
   390         if not mtime_usecs is None:
   391             self.__setitem__('mtime', str(mtime_usecs))
   392         # otherwise no-op
   393 
   394     def __getitem__(self, key):
   395         """ We don't care for cache coherency here. """
   396         return dict.__getitem__(self, key)
   397 
   398     def __setitem__(self, key, value):
   399         """ Sets a dictionary entry. """
   400         if not self.wlock.acquire(5.0):
   401             raise EnvironmentError("Could not lock in MetaDict")
   402         try:
   403             self._get_meta() # refresh cache
   404             try:
   405                 oldvalue = dict.__getitem__(self, key)
   406             except KeyError:
   407                 oldvalue = None
   408             if value != oldvalue:
   409                 dict.__setitem__(self, key, value)
   410                 self._put_meta() # sync cache
   411         finally:
   412             self.wlock.release()
   413 
   414 
   415 # Quoting of wiki names, file names, etc. (in the wiki markup) -----------------------------------
   416 
   417 # don't ever change this - DEPRECATED, only needed for 1.5 > 1.6 migration conversion
   418 QUOTE_CHARS = u'"'
   419 
   420 
   421 #############################################################################
   422 ### InterWiki
   423 #############################################################################
   424 INTERWIKI_PAGE = "InterWikiMap"
   425 
   426 def generate_file_list(request):
   427     """ generates a list of all files. for internal use. """
   428 
   429     # order is important here, the local intermap file takes
   430     # precedence over the shared one, and is thus read AFTER
   431     # the shared one
   432     intermap_files = request.cfg.shared_intermap
   433     if not isinstance(intermap_files, list):
   434         intermap_files = [intermap_files]
   435     else:
   436         intermap_files = intermap_files[:]
   437     intermap_files.append(os.path.join(request.cfg.data_dir, "intermap.txt"))
   438     request.cfg.shared_intermap_files = [filename for filename in intermap_files
   439                                          if filename and os.path.isfile(filename)]
   440 
   441 
   442 def get_max_mtime(file_list, page):
   443     """ Returns the highest modification time of the files in file_list and the
   444     page page. """
   445     timestamps = [os.stat(filename).st_mtime for filename in file_list]
   446     if page.exists():
   447         # exists() is cached and thus cheaper than mtime_usecs()
   448         timestamps.append(version2timestamp(page.mtime_usecs()))
   449     if timestamps:
   450         return max(timestamps)
   451     else:
   452         return 0 # no files / pages there
   453 
   454 def load_wikimap(request):
   455     """ load interwiki map (once, and only on demand) """
   456     from MoinMoin.Page import Page
   457 
   458     now = int(time.time())
   459     if getattr(request.cfg, "shared_intermap_files", None) is None:
   460         generate_file_list(request)
   461 
   462     try:
   463         _interwiki_list = request.cfg.cache.interwiki_list
   464         old_mtime = request.cfg.cache.interwiki_mtime
   465         if request.cfg.cache.interwiki_ts + (1*60) < now: # 1 minutes caching time
   466             max_mtime = get_max_mtime(request.cfg.shared_intermap_files, Page(request, INTERWIKI_PAGE))
   467             if max_mtime > old_mtime:
   468                 raise AttributeError # refresh cache
   469             else:
   470                 request.cfg.cache.interwiki_ts = now
   471     except AttributeError:
   472         _interwiki_list = {}
   473         lines = []
   474 
   475         for filename in request.cfg.shared_intermap_files:
   476             f = codecs.open(filename, "r", config.charset)
   477             lines.extend(f.readlines())
   478             f.close()
   479 
   480         # add the contents of the InterWikiMap page
   481         lines += Page(request, INTERWIKI_PAGE).get_raw_body().splitlines()
   482 
   483         for line in lines:
   484             if not line or line[0] == '#':
   485                 continue
   486             try:
   487                 line = "%s %s/InterWiki" % (line, request.script_root)
   488                 wikitag, urlprefix, dummy = line.split(None, 2)
   489             except ValueError:
   490                 pass
   491             else:
   492                 _interwiki_list[wikitag] = urlprefix
   493 
   494         del lines
   495 
   496         # add own wiki as "Self" and by its configured name
   497         _interwiki_list['Self'] = request.script_root + '/'
   498         if request.cfg.interwikiname:
   499             _interwiki_list[request.cfg.interwikiname] = request.script_root + '/'
   500 
   501         # save for later
   502         request.cfg.cache.interwiki_list = _interwiki_list
   503         request.cfg.cache.interwiki_ts = now
   504         request.cfg.cache.interwiki_mtime = get_max_mtime(request.cfg.shared_intermap_files, Page(request, INTERWIKI_PAGE))
   505 
   506     return _interwiki_list
   507 
   508 def split_wiki(wikiurl):
   509     """
   510     Split a wiki url.
   511 
   512     *** DEPRECATED FUNCTION FOR OLD 1.5 SYNTAX - ONLY STILL HERE FOR THE 1.5 -> 1.6 MIGRATION ***
   513     Use split_interwiki(), see below.
   514 
   515     @param wikiurl: the url to split
   516     @rtype: tuple
   517     @return: (tag, tail)
   518     """
   519     # !!! use a regex here!
   520     try:
   521         wikitag, tail = wikiurl.split(":", 1)
   522     except ValueError:
   523         try:
   524             wikitag, tail = wikiurl.split("/", 1)
   525         except ValueError:
   526             wikitag, tail = 'Self', wikiurl
   527     return wikitag, tail
   528 
   529 def split_interwiki(wikiurl):
   530     """ Split a interwiki name, into wikiname and pagename, e.g:
   531 
   532     'MoinMoin:FrontPage' -> "MoinMoin", "FrontPage"
   533     'FrontPage' -> "Self", "FrontPage"
   534     'MoinMoin:Page with blanks' -> "MoinMoin", "Page with blanks"
   535     'MoinMoin:' -> "MoinMoin", ""
   536 
   537     can also be used for:
   538 
   539     'attachment:filename with blanks.txt' -> "attachment", "filename with blanks.txt"
   540 
   541     @param wikiurl: the url to split
   542     @rtype: tuple
   543     @return: (wikiname, pagename)
   544     """
   545     try:
   546         wikiname, pagename = wikiurl.split(":", 1)
   547     except ValueError:
   548         wikiname, pagename = 'Self', wikiurl
   549     return wikiname, pagename
   550 
   551 def resolve_wiki(request, wikiurl):
   552     """
   553     Resolve an interwiki link.
   554 
   555     *** DEPRECATED FUNCTION FOR OLD 1.5 SYNTAX - ONLY STILL HERE FOR THE 1.5 -> 1.6 MIGRATION ***
   556     Use resolve_interwiki(), see below.
   557 
   558     @param request: the request object
   559     @param wikiurl: the InterWiki:PageName link
   560     @rtype: tuple
   561     @return: (wikitag, wikiurl, wikitail, err)
   562     """
   563     _interwiki_list = load_wikimap(request)
   564     # split wiki url
   565     wikiname, pagename = split_wiki(wikiurl)
   566 
   567     # return resolved url
   568     if wikiname in _interwiki_list:
   569         return (wikiname, _interwiki_list[wikiname], pagename, False)
   570     else:
   571         return (wikiname, request.script_root, "/InterWiki", True)
   572 
   573 def resolve_interwiki(request, wikiname, pagename):
   574     """ Resolve an interwiki reference (wikiname:pagename).
   575 
   576     @param request: the request object
   577     @param wikiname: interwiki wiki name
   578     @param pagename: interwiki page name
   579     @rtype: tuple
   580     @return: (wikitag, wikiurl, wikitail, err)
   581     """
   582     _interwiki_list = load_wikimap(request)
   583     if wikiname in _interwiki_list:
   584         return (wikiname, _interwiki_list[wikiname], pagename, False)
   585     else:
   586         return (wikiname, request.script_root, "/InterWiki", True)
   587 
   588 def join_wiki(wikiurl, wikitail):
   589     """
   590     Add a (url_quoted) page name to an interwiki url.
   591 
   592     Note: We can't know what kind of URL quoting a remote wiki expects.
   593           We just use a utf-8 encoded string with standard URL quoting.
   594 
   595     @param wikiurl: wiki url, maybe including a $PAGE placeholder
   596     @param wikitail: page name
   597     @rtype: string
   598     @return: generated URL of the page in the other wiki
   599     """
   600     wikitail = url_quote(wikitail)
   601     if '$PAGE' in wikiurl:
   602         return wikiurl.replace('$PAGE', wikitail)
   603     else:
   604         return wikiurl + wikitail
   605 
   606 
   607 #############################################################################
   608 ### Page types (based on page names)
   609 #############################################################################
   610 
   611 def isSystemPage(request, pagename):
   612     """ Is this a system page? Uses AllSystemPagesGroup internally.
   613 
   614     @param request: the request object
   615     @param pagename: the page name
   616     @rtype: bool
   617     @return: true if page is a system page
   618     """
   619     return (request.dicts.has_member('SystemPagesGroup', pagename) or
   620         isTemplatePage(request, pagename))
   621 
   622 
   623 def isTemplatePage(request, pagename):
   624     """ Is this a template page?
   625 
   626     @param pagename: the page name
   627     @rtype: bool
   628     @return: true if page is a template page
   629     """
   630     return request.cfg.cache.page_template_regexact.search(pagename) is not None
   631 
   632 
   633 def isGroupPage(pagename, cfg):
   634     """ Is this a name of group page?
   635 
   636     @param pagename: the page name
   637     @rtype: bool
   638     @return: true if page is a form page
   639     """
   640     return cfg.cache.page_group_regexact.search(pagename) is not None
   641 
   642 
   643 def filterCategoryPages(request, pagelist):
   644     """ Return category pages in pagelist
   645 
   646     WARNING: DO NOT USE THIS TO FILTER THE FULL PAGE LIST! Use
   647     getPageList with a filter function.
   648 
   649     If you pass a list with a single pagename, either that is returned
   650     or an empty list, thus you can use this function like a `isCategoryPage`
   651     one.
   652 
   653     @param pagelist: a list of pages
   654     @rtype: list
   655     @return: only the category pages of pagelist
   656     """
   657     func = request.cfg.cache.page_category_regexact.search
   658     return [pn for pn in pagelist if func(pn)]
   659 
   660 
   661 def getLocalizedPage(request, pagename): # was: getSysPage
   662     """ Get a system page according to user settings and available translations.
   663 
   664     We include some special treatment for the case that <pagename> is the
   665     currently rendered page, as this is the case for some pages used very
   666     often, like FrontPage, RecentChanges etc. - in that case we reuse the
   667     already existing page object instead creating a new one.
   668 
   669     @param request: the request object
   670     @param pagename: the name of the page
   671     @rtype: Page object
   672     @return: the page object of that system page, using a translated page,
   673              if it exists
   674     """
   675     from MoinMoin.Page import Page
   676     i18n_name = request.getText(pagename)
   677     pageobj = None
   678     if i18n_name != pagename:
   679         if request.page and i18n_name == request.page.page_name:
   680             # do not create new object for current page
   681             i18n_page = request.page
   682             if i18n_page.exists():
   683                 pageobj = i18n_page
   684         else:
   685             i18n_page = Page(request, i18n_name)
   686             if i18n_page.exists():
   687                 pageobj = i18n_page
   688 
   689     # if we failed getting a translated version of <pagename>,
   690     # we fall back to english
   691     if not pageobj:
   692         if request.page and pagename == request.page.page_name:
   693             # do not create new object for current page
   694             pageobj = request.page
   695         else:
   696             pageobj = Page(request, pagename)
   697     return pageobj
   698 
   699 
   700 def getFrontPage(request):
   701     """ Convenience function to get localized front page
   702 
   703     @param request: current request
   704     @rtype: Page object
   705     @return localized page_front_page, if there is a translation
   706     """
   707     return getLocalizedPage(request, request.cfg.page_front_page)
   708 
   709 
   710 def getHomePage(request, username=None):
   711     """
   712     Get a user's homepage, or return None for anon users and
   713     those who have not created a homepage.
   714 
   715     DEPRECATED - try to use getInterwikiHomePage (see below)
   716 
   717     @param request: the request object
   718     @param username: the user's name
   719     @rtype: Page
   720     @return: user's homepage object - or None
   721     """
   722     from MoinMoin.Page import Page
   723     # default to current user
   724     if username is None and request.user.valid:
   725         username = request.user.name
   726 
   727     # known user?
   728     if username:
   729         # Return home page
   730         page = Page(request, username)
   731         if page.exists():
   732             return page
   733 
   734     return None
   735 
   736 
   737 def getInterwikiHomePage(request, username=None):
   738     """
   739     Get a user's homepage.
   740 
   741     cfg.user_homewiki influences behaviour of this:
   742     'Self' does mean we store user homepage in THIS wiki.
   743     When set to our own interwikiname, it behaves like with 'Self'.
   744 
   745     'SomeOtherWiki' means we store user homepages in another wiki.
   746 
   747     @param request: the request object
   748     @param username: the user's name
   749     @rtype: tuple (or None for anon users)
   750     @return: (wikiname, pagename)
   751     """
   752     # default to current user
   753     if username is None and request.user.valid:
   754         username = request.user.name
   755     if not username:
   756         return None # anon user
   757 
   758     homewiki = request.cfg.user_homewiki
   759     if homewiki == request.cfg.interwikiname:
   760         homewiki = u'Self'
   761 
   762     return homewiki, username
   763 
   764 
   765 def AbsPageName(context, pagename):
   766     """
   767     Return the absolute pagename for a (possibly) relative pagename.
   768 
   769     @param context: name of the page where "pagename" appears on
   770     @param pagename: the (possibly relative) page name
   771     @rtype: string
   772     @return: the absolute page name
   773     """
   774     if pagename.startswith(PARENT_PREFIX):
   775         while context and pagename.startswith(PARENT_PREFIX):
   776             context = '/'.join(context.split('/')[:-1])
   777             pagename = pagename[PARENT_PREFIX_LEN:]
   778         pagename = '/'.join(filter(None, [context, pagename, ]))
   779     elif pagename.startswith(CHILD_PREFIX):
   780         if context:
   781             pagename = context + '/' + pagename[CHILD_PREFIX_LEN:]
   782         else:
   783             pagename = pagename[CHILD_PREFIX_LEN:]
   784     return pagename
   785 
   786 def RelPageName(context, pagename):
   787     """
   788     Return the relative pagename for some context.
   789 
   790     @param context: name of the page where "pagename" appears on
   791     @param pagename: the absolute page name
   792     @rtype: string
   793     @return: the relative page name
   794     """
   795     if context == '':
   796         # special case, context is some "virtual root" page with name == ''
   797         # every page is a subpage of this virtual root
   798         return CHILD_PREFIX + pagename
   799     elif pagename.startswith(context + CHILD_PREFIX):
   800         # simple child
   801         return pagename[len(context):]
   802     else:
   803         # some kind of sister/aunt
   804         context_frags = context.split('/')   # A, B, C, D, E
   805         pagename_frags = pagename.split('/') # A, B, C, F
   806         # first throw away common parents:
   807         common = 0
   808         for cf, pf in zip(context_frags, pagename_frags):
   809             if cf == pf:
   810                 common += 1
   811             else:
   812                 break
   813         context_frags = context_frags[common:] # D, E
   814         pagename_frags = pagename_frags[common:] # F
   815         go_up = len(context_frags)
   816         return PARENT_PREFIX * go_up + '/'.join(pagename_frags)
   817 
   818 
   819 def pagelinkmarkup(pagename, text=None):
   820     """ return markup that can be used as link to page <pagename> """
   821     from MoinMoin.parser.text_moin_wiki import Parser
   822     if re.match(Parser.word_rule + "$", pagename, re.U|re.X) and \
   823             (text is None or text == pagename):
   824         return pagename
   825     else:
   826         if text is None or text == pagename:
   827             text = ''
   828         else:
   829             text = '|%s' % text
   830         return u'[[%s%s]]' % (pagename, text)
   831 
   832 #############################################################################
   833 ### mimetype support
   834 #############################################################################
   835 import mimetypes
   836 
   837 MIMETYPES_MORE = {
   838  # OpenOffice 2.x & other open document stuff
   839  '.odt': 'application/vnd.oasis.opendocument.text',
   840  '.ods': 'application/vnd.oasis.opendocument.spreadsheet',
   841  '.odp': 'application/vnd.oasis.opendocument.presentation',
   842  '.odg': 'application/vnd.oasis.opendocument.graphics',
   843  '.odc': 'application/vnd.oasis.opendocument.chart',
   844  '.odf': 'application/vnd.oasis.opendocument.formula',
   845  '.odb': 'application/vnd.oasis.opendocument.database',
   846  '.odi': 'application/vnd.oasis.opendocument.image',
   847  '.odm': 'application/vnd.oasis.opendocument.text-master',
   848  '.ott': 'application/vnd.oasis.opendocument.text-template',
   849  '.ots': 'application/vnd.oasis.opendocument.spreadsheet-template',
   850  '.otp': 'application/vnd.oasis.opendocument.presentation-template',
   851  '.otg': 'application/vnd.oasis.opendocument.graphics-template',
   852  # some systems (like Mac OS X) don't have some of these:
   853  '.patch': 'text/x-diff',
   854  '.diff': 'text/x-diff',
   855  '.py': 'text/x-python',
   856  '.cfg': 'text/plain',
   857  '.conf': 'text/plain',
   858  '.irc': 'text/plain',
   859 }
   860 [mimetypes.add_type(mimetype, ext, True) for ext, mimetype in MIMETYPES_MORE.items()]
   861 
   862 MIMETYPES_sanitize_mapping = {
   863     # this stuff is text, but got application/* for unknown reasons
   864     ('application', 'docbook+xml'): ('text', 'docbook'),
   865     ('application', 'x-latex'): ('text', 'latex'),
   866     ('application', 'x-tex'): ('text', 'tex'),
   867     ('application', 'javascript'): ('text', 'javascript'),
   868 }
   869 
   870 MIMETYPES_spoil_mapping = {} # inverse mapping of above
   871 for _key, _value in MIMETYPES_sanitize_mapping.items():
   872     MIMETYPES_spoil_mapping[_value] = _key
   873 
   874 
   875 class MimeType(object):
   876     """ represents a mimetype like text/plain """
   877 
   878     def __init__(self, mimestr=None, filename=None):
   879         self.major = self.minor = None # sanitized mime type and subtype
   880         self.params = {} # parameters like "charset" or others
   881         self.charset = None # this stays None until we know for sure!
   882         self.raw_mimestr = mimestr
   883 
   884         if mimestr:
   885             self.parse_mimetype(mimestr)
   886         elif filename:
   887             self.parse_filename(filename)
   888 
   889     def parse_filename(self, filename):
   890         mtype, encoding = mimetypes.guess_type(filename)
   891         if mtype is None:
   892             mtype = 'application/octet-stream'
   893         self.parse_mimetype(mtype)
   894 
   895     def parse_mimetype(self, mimestr):
   896         """ take a string like used in content-type and parse it into components,
   897             alternatively it also can process some abbreviated string like "wiki"
   898         """
   899         parameters = mimestr.split(";")
   900         parameters = [p.strip() for p in parameters]
   901         mimetype, parameters = parameters[0], parameters[1:]
   902         mimetype = mimetype.split('/')
   903         if len(mimetype) >= 2:
   904             major, minor = mimetype[:2] # we just ignore more than 2 parts
   905         else:
   906             major, minor = self.parse_format(mimetype[0])
   907         self.major = major.lower()
   908         self.minor = minor.lower()
   909         for param in parameters:
   910             key, value = param.split('=')
   911             if value[0] == '"' and value[-1] == '"': # remove quotes
   912                 value = value[1:-1]
   913             self.params[key.lower()] = value
   914         if 'charset' in self.params:
   915             self.charset = self.params['charset'].lower()
   916         self.sanitize()
   917 
   918     def parse_format(self, format):
   919         """ maps from what we currently use on-page in a #format xxx processing
   920             instruction to a sanitized mimetype major, minor tuple.
   921             can also be user later for easier entry by the user, so he can just
   922             type "wiki" instead of "text/moin-wiki".
   923         """
   924         format = format.lower()
   925         if format in config.parser_text_mimetype:
   926             mimetype = 'text', format
   927         else:
   928             mapping = {
   929                 'wiki': ('text', 'moin-wiki'),
   930                 'irc': ('text', 'irssi'),
   931             }
   932             try:
   933                 mimetype = mapping[format]
   934             except KeyError:
   935                 mimetype = 'text', 'x-%s' % format
   936         return mimetype
   937 
   938     def sanitize(self):
   939         """ convert to some representation that makes sense - this is not necessarily
   940             conformant to /etc/mime.types or IANA listing, but if something is
   941             readable text, we will return some text/* mimetype, not application/*,
   942             because we need text/plain as fallback and not application/octet-stream.
   943         """
   944         self.major, self.minor = MIMETYPES_sanitize_mapping.get((self.major, self.minor), (self.major, self.minor))
   945 
   946     def spoil(self):
   947         """ this returns something conformant to /etc/mime.type or IANA as a string,
   948             kind of inverse operation of sanitize(), but doesn't change self
   949         """
   950         major, minor = MIMETYPES_spoil_mapping.get((self.major, self.minor), (self.major, self.minor))
   951         return self.content_type(major, minor)
   952 
   953     def content_type(self, major=None, minor=None, charset=None, params=None):
   954         """ return a string suitable for Content-Type header
   955         """
   956         major = major or self.major
   957         minor = minor or self.minor
   958         params = params or self.params or {}
   959         if major == 'text':
   960             charset = charset or self.charset or params.get('charset', config.charset)
   961             params['charset'] = charset
   962         mimestr = "%s/%s" % (major, minor)
   963         params = ['%s="%s"' % (key.lower(), value) for key, value in params.items()]
   964         params.insert(0, mimestr)
   965         return "; ".join(params)
   966 
   967     def mime_type(self):
   968         """ return a string major/minor only, no params """
   969         return "%s/%s" % (self.major, self.minor)
   970 
   971     def module_name(self):
   972         """ convert this mimetype to a string useable as python module name,
   973             we yield the exact module name first and then proceed to shorter
   974             module names (useful for falling back to them, if the more special
   975             module is not found) - e.g. first "text_python", next "text".
   976             Finally, we yield "application_octet_stream" as the most general
   977             mimetype we have.
   978             Hint: the fallback handler module for text/* should be implemented
   979                   in module "text" (not "text_plain")
   980         """
   981         mimetype = self.mime_type()
   982         modname = mimetype.replace("/", "_").replace("-", "_").replace(".", "_")
   983         fragments = modname.split('_')
   984         for length in range(len(fragments), 1, -1):
   985             yield "_".join(fragments[:length])
   986         yield self.raw_mimestr
   987         yield fragments[0]
   988         yield "application_octet_stream"
   989 
   990 
   991 #############################################################################
   992 ### Plugins
   993 #############################################################################
   994 
   995 class PluginError(Exception):
   996     """ Base class for plugin errors """
   997 
   998 class PluginMissingError(PluginError):
   999     """ Raised when a plugin is not found """
  1000 
  1001 class PluginAttributeError(PluginError):
  1002     """ Raised when plugin does not contain an attribtue """
  1003 
  1004 
  1005 def importPlugin(cfg, kind, name, function="execute"):
  1006     """ Import wiki or builtin plugin
  1007 
  1008     Returns <function> attr from a plugin module <name>.
  1009     If <function> attr is missing, raise PluginAttributeError.
  1010     If <function> is None, return the whole module object.
  1011 
  1012     If <name> plugin can not be imported, raise PluginMissingError.
  1013 
  1014     kind may be one of 'action', 'formatter', 'macro', 'parser' or any other
  1015     directory that exist in MoinMoin or data/plugin.
  1016 
  1017     Wiki plugins will always override builtin plugins. If you want
  1018     specific plugin, use either importWikiPlugin or importBuiltinPlugin
  1019     directly.
  1020 
  1021     @param cfg: wiki config instance
  1022     @param kind: what kind of module we want to import
  1023     @param name: the name of the module
  1024     @param function: the function name
  1025     @rtype: any object
  1026     @return: "function" of module "name" of kind "kind", or None
  1027     """
  1028     try:
  1029         return importWikiPlugin(cfg, kind, name, function)
  1030     except PluginMissingError:
  1031         return importBuiltinPlugin(kind, name, function)
  1032 
  1033 
  1034 def importWikiPlugin(cfg, kind, name, function="execute"):
  1035     """ Import plugin from the wiki data directory
  1036 
  1037     See importPlugin docstring.
  1038     """
  1039     plugins = wikiPlugins(kind, cfg)
  1040     modname = plugins.get(name, None)
  1041     if modname is None:
  1042         raise PluginMissingError()
  1043     moduleName = '%s.%s' % (modname, name)
  1044     return importNameFromPlugin(moduleName, function)
  1045 
  1046 
  1047 def importBuiltinPlugin(kind, name, function="execute"):
  1048     """ Import builtin plugin from MoinMoin package
  1049 
  1050     See importPlugin docstring.
  1051     """
  1052     if not name in builtinPlugins(kind):
  1053         raise PluginMissingError()
  1054     moduleName = 'MoinMoin.%s.%s' % (kind, name)
  1055     return importNameFromPlugin(moduleName, function)
  1056 
  1057 
  1058 def importNameFromPlugin(moduleName, name):
  1059     """ Return <name> attr from <moduleName> module,
  1060         raise PluginAttributeError if name does not exist.
  1061 
  1062         If name is None, return the <moduleName> module object.
  1063     """
  1064     if name is None:
  1065         fromlist = []
  1066     else:
  1067         fromlist = [name]
  1068     module = __import__(moduleName, globals(), {}, fromlist)
  1069     if fromlist:
  1070         # module has the obj for module <moduleName>
  1071         try:
  1072             return getattr(module, name)
  1073         except AttributeError:
  1074             raise PluginAttributeError
  1075     else:
  1076         # module now has the toplevel module of <moduleName> (see __import__ docs!)
  1077         components = moduleName.split('.')
  1078         for comp in components[1:]:
  1079             module = getattr(module, comp)
  1080         return module
  1081 
  1082 
  1083 def builtinPlugins(kind):
  1084     """ Gets a list of modules in MoinMoin.'kind'
  1085 
  1086     @param kind: what kind of modules we look for
  1087     @rtype: list
  1088     @return: module names
  1089     """
  1090     modulename = "MoinMoin." + kind
  1091     return pysupport.importName(modulename, "modules")
  1092 
  1093 
  1094 def wikiPlugins(kind, cfg):
  1095     """
  1096     Gets a dict containing the names of all plugins of @kind
  1097     as the key and the containing module name as the value.
  1098 
  1099     @param kind: what kind of modules we look for
  1100     @rtype: dict
  1101     @return: plugin name to containing module name mapping
  1102     """
  1103     # short-cut if we've loaded the dict already
  1104     # (or already failed to load it)
  1105     cache = cfg._site_plugin_lists
  1106     if kind in cache:
  1107         result = cache[kind]
  1108     else:
  1109         result = {}
  1110         for modname in cfg._plugin_modules:
  1111             try:
  1112                 module = pysupport.importName(modname, kind)
  1113                 packagepath = os.path.dirname(module.__file__)
  1114                 plugins = pysupport.getPluginModules(packagepath)
  1115                 for p in plugins:
  1116                     if not p in result:
  1117                         result[p] = '%s.%s' % (modname, kind)
  1118             except AttributeError:
  1119                 pass
  1120         cache[kind] = result
  1121     return result
  1122 
  1123 
  1124 def getPlugins(kind, cfg):
  1125     """ Gets a list of plugin names of kind
  1126 
  1127     @param kind: what kind of modules we look for
  1128     @rtype: list
  1129     @return: module names
  1130     """
  1131     # Copy names from builtin plugins - so we dont destroy the value
  1132     all_plugins = builtinPlugins(kind)[:]
  1133 
  1134     # Add extension plugins without duplicates
  1135     for plugin in wikiPlugins(kind, cfg):
  1136         if plugin not in all_plugins:
  1137             all_plugins.append(plugin)
  1138 
  1139     return all_plugins
  1140 
  1141 
  1142 def searchAndImportPlugin(cfg, type, name, what=None):
  1143     type2classname = {"parser": "Parser",
  1144                       "formatter": "Formatter",
  1145     }
  1146     if what is None:
  1147         what = type2classname[type]
  1148     mt = MimeType(name)
  1149     plugin = None
  1150     for module_name in mt.module_name():
  1151         try:
  1152             plugin = importPlugin(cfg, type, module_name, what)
  1153             break
  1154         except PluginMissingError:
  1155             pass
  1156     else:
  1157         raise PluginMissingError("Plugin not found!")
  1158     return plugin
  1159 
  1160 
  1161 #############################################################################
  1162 ### Parsers
  1163 #############################################################################
  1164 
  1165 def getParserForExtension(cfg, extension):
  1166     """
  1167     Returns the Parser class of the parser fit to handle a file
  1168     with the given extension. The extension should be in the same
  1169     format as os.path.splitext returns it (i.e. with the dot).
  1170     Returns None if no parser willing to handle is found.
  1171     The dict of extensions is cached in the config object.
  1172 
  1173     @param cfg: the Config instance for the wiki in question
  1174     @param extension: the filename extension including the dot
  1175     @rtype: class, None
  1176     @returns: the parser class or None
  1177     """
  1178     if not hasattr(cfg.cache, 'EXT_TO_PARSER'):
  1179         etp, etd = {}, None
  1180         for pname in getPlugins('parser', cfg):
  1181             try:
  1182                 Parser = importPlugin(cfg, 'parser', pname, 'Parser')
  1183             except PluginMissingError:
  1184                 continue
  1185             if hasattr(Parser, 'extensions'):
  1186                 exts = Parser.extensions
  1187                 if isinstance(exts, list):
  1188                     for ext in Parser.extensions:
  1189                         etp[ext] = Parser
  1190                 elif str(exts) == '*':
  1191                     etd = Parser
  1192         cfg.cache.EXT_TO_PARSER = etp
  1193         cfg.cache.EXT_TO_PARSER_DEFAULT = etd
  1194 
  1195     return cfg.cache.EXT_TO_PARSER.get(extension, cfg.cache.EXT_TO_PARSER_DEFAULT)
  1196 
  1197 
  1198 #############################################################################
  1199 ### Parameter parsing
  1200 #############################################################################
  1201 
  1202 class BracketError(Exception):
  1203     pass
  1204 
  1205 class BracketUnexpectedCloseError(BracketError):
  1206     def __init__(self, bracket):
  1207         self.bracket = bracket
  1208         BracketError.__init__(self, "Unexpected closing bracket %s" % bracket)
  1209 
  1210 class BracketMissingCloseError(BracketError):
  1211     def __init__(self, bracket):
  1212         self.bracket = bracket
  1213         BracketError.__init__(self, "Missing closing bracket %s" % bracket)
  1214 
  1215 class ParserPrefix:
  1216     """
  1217     Trivial container-class holding a single character for
  1218     the possible prefixes for parse_quoted_separated_ext
  1219     and implementing rich equal comparison.
  1220     """
  1221     def __init__(self, prefix):
  1222         self.prefix = prefix
  1223 
  1224     def __eq__(self, other):
  1225         return isinstance(other, ParserPrefix) and other.prefix == self.prefix
  1226 
  1227     def __repr__(self):
  1228         return '<ParserPrefix(%s)>' % self.prefix.encode('utf-8')
  1229 
  1230 def parse_quoted_separated_ext(args, separator=None, name_value_separator=None,
  1231                                brackets=None, seplimit=0, multikey=False,
  1232                                prefixes=None, quotes='"'):
  1233     """
  1234     Parses the given string according to the other parameters.
  1235 
  1236     Items can be quoted with any character from the quotes parameter
  1237     and each quote can be escaped by doubling it, the separator and
  1238     name_value_separator can both be quoted, when name_value_separator
  1239     is set then the name can also be quoted.
  1240 
  1241     Values that are not given are returned as None, while the
  1242     empty string as a value can be achieved by quoting it.
  1243 
  1244     If a name or value does not start with a quote, then the quote
  1245     looses its special meaning for that name or value, unless it
  1246     starts with one of the given prefixes (the parameter is unicode
  1247     containing all allowed prefixes.) The prefixes will be returned
  1248     as ParserPrefix() instances in the first element of the tuple
  1249     for that particular argument.
  1250 
  1251     If multiple separators follow each other, this is treated as
  1252     having None arguments inbetween, that is also true for when
  1253     space is used as separators (when separator is None), filter
  1254     them out afterwards.
  1255 
  1256     The function can also do bracketing, i.e. parse expressions
  1257     that contain things like
  1258         "(a (a b))" to ['(', 'a', ['(', 'a', 'b']],
  1259     in this case, as in this example, the returned list will
  1260     contain sub-lists and the brackets parameter must be a list
  1261     of opening and closing brackets, e.g.
  1262         brackets = ['()', '<>']
  1263     Each sub-list's first item is the opening bracket used for
  1264     grouping.
  1265     Nesting will be observed between the different types of
  1266     brackets given. If bracketing doesn't match, a BracketError
  1267     instance is raised with a 'bracket' property indicating the
  1268     type of missing or unexpected bracket, the instance will be
  1269     either of the class BracketMissingCloseError or of the class
  1270     BracketUnexpectedCloseError.
  1271 
  1272     If multikey is True (along with setting name_value_separator),
  1273     then the returned tuples for (key, value) pairs can also have
  1274     multiple keys, e.g.
  1275         "a=b=c" -> ('a', 'b', 'c')
  1276 
  1277     @param args: arguments to parse
  1278     @param separator: the argument separator, defaults to None, meaning any
  1279         space separates arguments
  1280     @param name_value_separator: separator for name=value, default '=',
  1281         name=value keywords not parsed if evaluates to False
  1282     @param brackets: a list of two-character strings giving
  1283         opening and closing brackets
  1284     @param seplimit: limits the number of parsed arguments
  1285     @param multikey: multiple keys allowed for a single value
  1286     @rtype: list
  1287     @returns: list of unicode strings and tuples containing
  1288         unicode strings, or lists containing the same for
  1289         bracketing support
  1290     """
  1291     idx = 0
  1292     assert name_value_separator is None or name_value_separator != separator
  1293     assert name_value_separator is None or len(name_value_separator) == 1
  1294     if not isinstance(args, unicode):
  1295         raise TypeError('args must be unicode')
  1296     max = len(args)
  1297     result = []         # result list
  1298     cur = [None]        # current item
  1299     quoted = None       # we're inside quotes, indicates quote character used
  1300     skipquote = 0       # next quote is a quoted quote
  1301     noquote = False     # no quotes expected because word didn't start with one
  1302     seplimit_reached = False # number of separators exhausted
  1303     separator_count = 0 # number of separators encountered
  1304     SPACE = [' ', '\t', ]
  1305     nextitemsep = [separator]   # used for skipping trailing space
  1306     SPACE = [' ', '\t', ]
  1307     if separator is None:
  1308         nextitemsep = SPACE[:]
  1309         separators = SPACE
  1310     else:
  1311         nextitemsep = [separator]   # used for skipping trailing space
  1312         separators = [separator]
  1313     if name_value_separator:
  1314         nextitemsep.append(name_value_separator)
  1315 
  1316     # bracketing support
  1317     opening = []
  1318     closing = []
  1319     bracketstack = []
  1320     matchingbracket = {}
  1321     if brackets:
  1322         for o, c in brackets:
  1323             assert not o in opening
  1324             opening.append(o)
  1325             assert not c in closing
  1326             closing.append(c)
  1327             matchingbracket[o] = c
  1328 
  1329     def additem(result, cur, separator_count, nextitemsep):
  1330         if len(cur) == 1:
  1331             result.extend(cur)
  1332         elif cur:
  1333             result.append(tuple(cur))
  1334         cur = [None]
  1335         noquote = False
  1336         separator_count += 1
  1337         seplimit_reached = False
  1338         if seplimit and separator_count >= seplimit:
  1339             seplimit_reached = True
  1340             nextitemsep = [n for n in nextitemsep if n in separators]
  1341 
  1342         return cur, noquote, separator_count, seplimit_reached, nextitemsep
  1343 
  1344     while idx < max:
  1345         char = args[idx]
  1346         next = None
  1347         if idx + 1 < max:
  1348             next = args[idx+1]
  1349         if skipquote:
  1350             skipquote -= 1
  1351         if not separator is None and not quoted and char in SPACE:
  1352             spaces = ''
  1353             # accumulate all space
  1354             while char in SPACE and idx < max - 1:
  1355                 spaces += char
  1356                 idx += 1
  1357                 char = args[idx]
  1358             # remove space if args end with it
  1359             if char in SPACE and idx == max - 1:
  1360                 break
  1361             # remove space at end of argument
  1362             if char in nextitemsep:
  1363                 continue
  1364             idx -= 1
  1365             if len(cur) and cur[-1]:
  1366                 cur[-1] = cur[-1] + spaces
  1367         elif not quoted and char == name_value_separator:
  1368             if multikey or len(cur) == 1:
  1369                 cur.append(None)
  1370             else:
  1371                 if not multikey:
  1372                     if cur[-1] is None:
  1373                         cur[-1] = ''
  1374                     cur[-1] += name_value_separator
  1375                 else:
  1376                     cur.append(None)
  1377             noquote = False
  1378         elif not quoted and not seplimit_reached and char in separators:
  1379             (cur, noquote, separator_count, seplimit_reached,
  1380              nextitemsep) = additem(result, cur, separator_count, nextitemsep)
  1381         elif not quoted and not noquote and char in quotes:
  1382             if len(cur) and cur[-1] is None:
  1383                 del cur[-1]
  1384             cur.append(u'')
  1385             quoted = char
  1386         elif char == quoted and not skipquote:
  1387             if next == quoted:
  1388                 skipquote = 2 # will be decremented right away
  1389             else:
  1390                 quoted = None
  1391         elif not quoted and char in opening:
  1392             while len(cur) and cur[-1] is None:
  1393                 del cur[-1]
  1394             (cur, noquote, separator_count, seplimit_reached,
  1395              nextitemsep) = additem(result, cur, separator_count, nextitemsep)
  1396             bracketstack.append((matchingbracket[char], result))
  1397             result = [char]
  1398         elif not quoted and char in closing:
  1399             while len(cur) and cur[-1] is None:
  1400                 del cur[-1]
  1401             (cur, noquote, separator_count, seplimit_reached,
  1402              nextitemsep) = additem(result, cur, separator_count, nextitemsep)
  1403             cur = []
  1404             if not bracketstack:
  1405                 raise BracketUnexpectedCloseError(char)
  1406             expected, oldresult = bracketstack[-1]
  1407             if not expected == char:
  1408                 raise BracketUnexpectedCloseError(char)
  1409             del bracketstack[-1]
  1410             oldresult.append(result)
  1411             result = oldresult
  1412         elif not quoted and prefixes and char in prefixes and cur == [None]:
  1413             cur = [ParserPrefix(char)]
  1414             cur.append(None)
  1415         else:
  1416             if len(cur):
  1417                 if cur[-1] is None:
  1418                     cur[-1] = char
  1419                 else:
  1420                     cur[-1] += char
  1421             else:
  1422                 cur.append(char)
  1423             noquote = True
  1424 
  1425         idx += 1
  1426 
  1427     if bracketstack:
  1428         raise BracketMissingCloseError(bracketstack[-1][0])
  1429 
  1430     if quoted:
  1431         if len(cur):
  1432             if cur[-1] is None:
  1433                 cur[-1] = quoted
  1434             else:
  1435                 cur[-1] = quoted + cur[-1]
  1436         else:
  1437             cur.append(quoted)
  1438 
  1439     additem(result, cur, separator_count, nextitemsep)
  1440 
  1441     return result
  1442 
  1443 def parse_quoted_separated(args, separator=',', name_value=True, seplimit=0):
  1444     result = []
  1445     positional = result
  1446     if name_value:
  1447         name_value_separator = '='
  1448         trailing = []
  1449         keywords = {}
  1450     else:
  1451         name_value_separator = None
  1452 
  1453     l = parse_quoted_separated_ext(args, separator=separator,
  1454                                    name_value_separator=name_value_separator,
  1455                                    seplimit=seplimit)
  1456     for item in l:
  1457         if isinstance(item, tuple):
  1458             key, value = item
  1459             if key is None:
  1460                 key = u''
  1461             keywords[key] = value
  1462             positional = trailing
  1463         else:
  1464             positional.append(item)
  1465 
  1466     if name_value:
  1467         return result, keywords, trailing
  1468     return result
  1469 
  1470 def get_bool(request, arg, name=None, default=None):
  1471     """
  1472     For use with values returned from parse_quoted_separated or given
  1473     as macro parameters, return a boolean from a unicode string.
  1474     Valid input is 'true'/'false', 'yes'/'no' and '1'/'0' or None for
  1475     the default value.
  1476 
  1477     @param request: A request instance
  1478     @param arg: The argument, may be None or a unicode string
  1479     @param name: Name of the argument, for error messages
  1480     @param default: default value if arg is None
  1481     @rtype: boolean or None
  1482     @returns: the boolean value of the string according to above rules
  1483               (or default value)
  1484     """
  1485     _ = request.getText
  1486     assert default is None or isinstance(default, bool)
  1487     if arg is None:
  1488         return default
  1489     elif not isinstance(arg, unicode):
  1490         raise TypeError('Argument must be None or unicode')
  1491     arg = arg.lower()
  1492     if arg in [u'0', u'false', u'no']:
  1493         return False
  1494     elif arg in [u'1', u'true', u'yes']:
  1495         return True
  1496     else:
  1497         if name:
  1498             raise ValueError(
  1499                 _('Argument "%s" must be a boolean value, not "%s"') % (
  1500                     name, arg))
  1501         else:
  1502             raise ValueError(
  1503                 _('Argument must be a boolean value, not "%s"') % arg)
  1504 
  1505 
  1506 def get_int(request, arg, name=None, default=None):
  1507     """
  1508     For use with values returned from parse_quoted_separated or given
  1509     as macro parameters, return an integer from a unicode string
  1510     containing the decimal representation of a number.
  1511     None is a valid input and yields the default value.
  1512 
  1513     @param request: A request instance
  1514     @param arg: The argument, may be None or a unicode string
  1515     @param name: Name of the argument, for error messages
  1516     @param default: default value if arg is None
  1517     @rtype: int or None
  1518     @returns: the integer value of the string (or default value)
  1519     """
  1520     _ = request.getText
  1521     assert default is None or isinstance(default, (int, long))
  1522     if arg is None:
  1523         return default
  1524     elif not isinstance(arg, unicode):
  1525         raise TypeError('Argument must be None or unicode')
  1526     try:
  1527         return int(arg)
  1528     except ValueError:
  1529         if name:
  1530             raise ValueError(
  1531                 _('Argument "%s" must be an integer value, not "%s"') % (
  1532                     name, arg))
  1533         else:
  1534             raise ValueError(
  1535                 _('Argument must be an integer value, not "%s"') % arg)
  1536 
  1537 
  1538 def get_float(request, arg, name=None, default=None):
  1539     """
  1540     For use with values returned from parse_quoted_separated or given
  1541     as macro parameters, return a float from a unicode string.
  1542     None is a valid input and yields the default value.
  1543 
  1544     @param request: A request instance
  1545     @param arg: The argument, may be None or a unicode string
  1546     @param name: Name of the argument, for error messages
  1547     @param default: default return value if arg is None
  1548     @rtype: float or None
  1549     @returns: the float value of the string (or default value)
  1550     """
  1551     _ = request.getText
  1552     assert default is None or isinstance(default, (int, long, float))
  1553     if arg is None:
  1554         return default
  1555     elif not isinstance(arg, unicode):
  1556         raise TypeError('Argument must be None or unicode')
  1557     try:
  1558         return float(arg)
  1559     except ValueError:
  1560         if name:
  1561             raise ValueError(
  1562                 _('Argument "%s" must be a floating point value, not "%s"') % (
  1563                     name, arg))
  1564         else:
  1565             raise ValueError(
  1566                 _('Argument must be a floating point value, not "%s"') % arg)
  1567 
  1568 
  1569 def get_complex(request, arg, name=None, default=None):
  1570     """
  1571     For use with values returned from parse_quoted_separated or given
  1572     as macro parameters, return a complex from a unicode string.
  1573     None is a valid input and yields the default value.
  1574 
  1575     @param request: A request instance
  1576     @param arg: The argument, may be None or a unicode string
  1577     @param name: Name of the argument, for error messages
  1578     @param default: default return value if arg is None
  1579     @rtype: complex or None
  1580     @returns: the complex value of the string (or default value)
  1581     """
  1582     _ = request.getText
  1583     assert default is None or isinstance(default, (int, long, float, complex))
  1584     if arg is None:
  1585         return default
  1586     elif not isinstance(arg, unicode):
  1587         raise TypeError('Argument must be None or unicode')
  1588     try:
  1589         # allow writing 'i' instead of 'j'
  1590         arg = arg.replace('i', 'j').replace('I', 'j')
  1591         return complex(arg)
  1592     except ValueError:
  1593         if name:
  1594             raise ValueError(
  1595                 _('Argument "%s" must be a complex value, not "%s"') % (
  1596                     name, arg))
  1597         else:
  1598             raise ValueError(
  1599                 _('Argument must be a complex value, not "%s"') % arg)
  1600 
  1601 
  1602 def get_unicode(request, arg, name=None, default=None):
  1603     """
  1604     For use with values returned from parse_quoted_separated or given
  1605     as macro parameters, return a unicode string from a unicode string.
  1606     None is a valid input and yields the default value.
  1607 
  1608     @param request: A request instance
  1609     @param arg: The argument, may be None or a unicode string
  1610     @param name: Name of the argument, for error messages
  1611     @param default: default return value if arg is None;
  1612     @rtype: unicode or None
  1613     @returns: the unicode string (or default value)
  1614     """
  1615     assert default is None or isinstance(default, unicode)
  1616     if arg is None:
  1617         return default
  1618     elif not isinstance(arg, unicode):
  1619         raise TypeError('Argument must be None or unicode')
  1620 
  1621     return arg
  1622 
  1623 
  1624 def get_choice(request, arg, name=None, choices=[None]):
  1625     """
  1626     For use with values returned from parse_quoted_separated or given
  1627     as macro parameters, return a unicode string that must be in the
  1628     choices given. None is a valid input and yields first of the valid
  1629     choices.
  1630 
  1631     @param request: A request instance
  1632     @param arg: The argument, may be None or a unicode string
  1633     @param name: Name of the argument, for error messages
  1634     @param choices: the possible choices
  1635     @rtype: unicode or None
  1636     @returns: the unicode string (or default value)
  1637     """
  1638     assert isinstance(choices, (tuple, list))
  1639     if arg is None:
  1640         return choices[0]
  1641     elif not isinstance(arg, unicode):
  1642         raise TypeError('Argument must be None or unicode')
  1643     elif not arg in choices:
  1644         _ = request.getText
  1645         if name:
  1646             raise ValueError(
  1647                 _('Argument "%s" must be one of "%s", not "%s"') % (
  1648                     name, '", "'.join(choices), arg))
  1649         else:
  1650             raise ValueError(
  1651                 _('Argument must be one of "%s", not "%s"') % (
  1652                     '", "'.join(choices), arg))
  1653 
  1654     return arg
  1655 
  1656 
  1657 class IEFArgument:
  1658     """
  1659     Base class for new argument parsers for
  1660     invoke_extension_function.
  1661     """
  1662     def __init__(self):
  1663         pass
  1664 
  1665     def parse_argument(self, s):
  1666         """
  1667         Parse the argument given in s (a string) and return
  1668         the argument for the extension function.
  1669         """
  1670         raise NotImplementedError
  1671 
  1672     def get_default(self):
  1673         """
  1674         Return the default for this argument.
  1675         """
  1676         raise NotImplementedError
  1677 
  1678 
  1679 class UnitArgument(IEFArgument):
  1680     """
  1681     Argument class for invoke_extension_function that forces
  1682     having any of the specified units given for a value.
  1683 
  1684     Note that the default unit is "mm".
  1685 
  1686     Use, for example, "UnitArgument('7mm', float, ['%', 'mm'])".
  1687 
  1688     If the defaultunit parameter is given, any argument that
  1689     can be converted into the given argtype is assumed to have
  1690     the default unit. NOTE: This doesn't work with a choice
  1691     (tuple or list) argtype.
  1692     """
  1693     def __init__(self, default, argtype, units=['mm'], defaultunit=None):
  1694         """
  1695         Initialise a UnitArgument giving the default,
  1696         argument type and the permitted units.
  1697         """
  1698         IEFArgument.__init__(self)
  1699         self._units = list(units)
  1700         self._units.sort(lambda x, y: len(y) - len(x))
  1701         self._type = argtype
  1702         self._defaultunit = defaultunit
  1703         assert defaultunit is None or defaultunit in units
  1704         if default is not None:
  1705             self._default = self.parse_argument(default)
  1706         else:
  1707             self._default = None
  1708 
  1709     def parse_argument(self, s):
  1710         for unit in self._units:
  1711             if s.endswith(unit):
  1712                 ret = (self._type(s[:len(s) - len(unit)]), unit)
  1713                 return ret
  1714         if self._defaultunit is not None:
  1715             try:
  1716                 return (self._type(s), self._defaultunit)
  1717             except ValueError:
  1718                 pass
  1719         units = ', '.join(self._units)
  1720         ## XXX: how can we translate this?
  1721         raise ValueError("Invalid unit in value %s (allowed units: %s)" % (s, units))
  1722 
  1723     def get_default(self):
  1724         return self._default
  1725 
  1726 
  1727 class required_arg:
  1728     """
  1729     Wrap a type in this class and give it as default argument
  1730     for a function passed to invoke_extension_function() in
  1731     order to get generic checking that the argument is given.
  1732     """
  1733     def __init__(self, argtype):
  1734         """
  1735         Initialise a required_arg
  1736         @param argtype: the type the argument should have
  1737         """
  1738         if not (argtype in (bool, int, long, float, complex, unicode) or
  1739                 isinstance(argtype, (IEFArgument, tuple, list))):
  1740             raise TypeError("argtype must be a valid type")
  1741         self.argtype = argtype
  1742 
  1743 
  1744 def invoke_extension_function(request, function, args, fixed_args=[]):
  1745     """
  1746     Parses arguments for an extension call and calls the extension
  1747     function with the arguments.
  1748 
  1749     If the macro function has a default value that is a bool,
  1750     int, long, float or unicode object, then the given value
  1751     is converted to the type of that default value before passing
  1752     it to the macro function. That way, macros need not call the
  1753     wikiutil.get_* functions for any arguments that have a default.
  1754 
  1755     @param request: the request object
  1756     @param function: the function to invoke
  1757     @param args: unicode string with arguments (or evaluating to False)
  1758     @param fixed_args: fixed arguments to pass as the first arguments
  1759     @returns: the return value from the function called
  1760     """
  1761 
  1762     def _convert_arg(request, value, default, name=None):
  1763         """
  1764         Using the get_* functions, convert argument to the type of the default
  1765         if that is any of bool, int, long, float or unicode; if the default
  1766         is the type itself then convert to that type (keeps None) or if the
  1767         default is a list require one of the list items.
  1768 
  1769         In other cases return the value itself.
  1770         """
  1771         # if extending this, extend required_arg as well!
  1772         if isinstance(default, bool):
  1773             return get_bool(request, value, name, default)
  1774         elif isinstance(default, (int, long)):
  1775             return get_int(request, value, name, default)
  1776         elif isinstance(default, float):
  1777             return get_float(request, value, name, default)
  1778         elif isinstance(default, complex):
  1779             return get_complex(request, value, name, default)
  1780         elif isinstance(default, unicode):
  1781             return get_unicode(request, value, name, default)
  1782         elif isinstance(default, (tuple, list)):
  1783             return get_choice(request, value, name, default)
  1784         elif default is bool:
  1785             return get_bool(request, value, name)
  1786         elif default is int or default is long:
  1787             return get_int(request, value, name)
  1788         elif default is float:
  1789             return get_float(request, value, name)
  1790         elif default is complex:
  1791             return get_complex(request, value, name)
  1792         elif isinstance(default, IEFArgument):
  1793             # defaults handled later
  1794             if value is None:
  1795                 return None
  1796             return default.parse_argument(value)
  1797         elif isinstance(default, required_arg):
  1798             if isinstance(default.argtype, (tuple, list)):
  1799                 # treat choice specially and return None if no choice
  1800                 # is given in the value
  1801                 choices = [None] + list(default.argtype)
  1802                 return get_choice(request, value, name, choices)
  1803             else:
  1804                 return _convert_arg(request, value, default.argtype, name)
  1805         return value
  1806 
  1807     assert isinstance(fixed_args, (list, tuple))
  1808 
  1809     _ = request.getText
  1810 
  1811     kwargs = {}
  1812     kwargs_to_pass = {}
  1813     trailing_args = []
  1814 
  1815     if args:
  1816         assert isinstance(args, unicode)
  1817 
  1818         positional, keyword, trailing = parse_quoted_separated(args)
  1819 
  1820         for kw in keyword:
  1821             try:
  1822                 kwargs[str(kw)] = keyword[kw]
  1823             except UnicodeEncodeError:
  1824                 kwargs_to_pass[kw] = keyword[kw]
  1825 
  1826         trailing_args.extend(trailing)
  1827 
  1828     else:
  1829         positional = []
  1830 
  1831     if isfunction(function) or ismethod(function):
  1832         argnames, varargs, varkw, defaultlist = getargspec(function)
  1833     elif isclass(function):
  1834         (argnames, varargs,
  1835          varkw, defaultlist) = getargspec(function.__init__.im_func)
  1836     else:
  1837         raise TypeError('function must be a function, method or class')
  1838 
  1839     # self is implicit!
  1840     if ismethod(function) or isclass(function):
  1841         argnames = argnames[1:]
  1842 
  1843     fixed_argc = len(fixed_args)
  1844     argnames = argnames[fixed_argc:]
  1845     argc = len(argnames)
  1846     if not defaultlist:
  1847         defaultlist = []
  1848 
  1849     # if the fixed parameters have defaults too...
  1850     if argc < len(defaultlist):
  1851         defaultlist = defaultlist[fixed_argc:]
  1852     defstart = argc - len(defaultlist)
  1853 
  1854     defaults = {}
  1855     # reverse to be able to pop() things off
  1856     positional.reverse()
  1857     allow_kwargs = False
  1858     allow_trailing = False
  1859     # convert all arguments to keyword arguments,
  1860     # fill all arguments that weren't given with None
  1861     for idx in range(argc):
  1862         argname = argnames[idx]
  1863         if argname == '_kwargs':
  1864             allow_kwargs = True
  1865             continue
  1866         if argname == '_trailing_args':
  1867             allow_trailing = True
  1868             continue
  1869         if positional:
  1870             kwargs[argname] = positional.pop()
  1871         if not argname in kwargs:
  1872             kwargs[argname] = None
  1873         if idx >= defstart:
  1874             defaults[argname] = defaultlist[idx - defstart]
  1875 
  1876     if positional:
  1877         if not allow_trailing:
  1878             raise ValueError(_('Too many arguments'))
  1879         trailing_args.extend(positional)
  1880 
  1881     if trailing_args:
  1882         if not allow_trailing:
  1883             raise ValueError(_('Cannot have arguments without name following'
  1884                                ' named arguments'))
  1885         kwargs['_trailing_args'] = trailing_args
  1886 
  1887     # type-convert all keyword arguments to the type
  1888     # that the default value indicates
  1889     for argname in kwargs.keys()[:]:
  1890         if argname in defaults:
  1891             # the value of 'argname' from kwargs will be put into the
  1892             # macro's 'argname' argument, so convert that giving the
  1893             # name to the converter so the user is told which argument
  1894             # went wrong (if it does)
  1895             kwargs[argname] = _convert_arg(request, kwargs[argname],
  1896                                            defaults[argname], argname)
  1897             if kwargs[argname] is None:
  1898                 if isinstance(defaults[argname], required_arg):
  1899                     raise ValueError(_('Argument "%s" is required') % argname)
  1900                 if isinstance(defaults[argname], IEFArgument):
  1901                     kwargs[argname] = defaults[argname].get_default()
  1902 
  1903         if not argname in argnames:
  1904             # move argname into _kwargs parameter
  1905             kwargs_to_pass[argname] = kwargs[argname]
  1906             del kwargs[argname]
  1907 
  1908     if kwargs_to_pass:
  1909         kwargs['_kwargs'] = kwargs_to_pass
  1910         if not allow_kwargs:
  1911             raise ValueError(_(u'No argument named "%s"') % (
  1912                 kwargs_to_pass.keys()[0]))
  1913 
  1914     return function(*fixed_args, **kwargs)
  1915 
  1916 
  1917 def parseAttributes(request, attrstring, endtoken=None, extension=None):
  1918     """
  1919     Parse a list of attributes and return a dict plus a possible
  1920     error message.
  1921     If extension is passed, it has to be a callable that returns
  1922     a tuple (found_flag, msg). found_flag is whether it did find and process
  1923     something, msg is '' when all was OK or any other string to return an error
  1924     message.
  1925 
  1926     @param request: the request object
  1927     @param attrstring: string containing the attributes to be parsed
  1928     @param endtoken: token terminating parsing
  1929     @param extension: extension function -
  1930                       gets called with the current token, the parser and the dict
  1931     @rtype: dict, msg
  1932     @return: a dict plus a possible error message
  1933     """
  1934     import shlex, StringIO
  1935 
  1936     _ = request.getText
  1937 
  1938     parser = shlex.shlex(StringIO.StringIO(attrstring))
  1939     parser.commenters = ''
  1940     msg = None
  1941     attrs = {}
  1942 
  1943     while not msg:
  1944         try:
  1945             key = parser.get_token()
  1946         except ValueError, err:
  1947             msg = str(err)
  1948             break
  1949         if not key:
  1950             break
  1951         if endtoken and key == endtoken:
  1952             break
  1953 
  1954         # call extension function with the current token, the parser, and the dict
  1955         if extension:
  1956             found_flag, msg = extension(key, parser, attrs)
  1957             #logging.debug("%r = extension(%r, parser, %r)" % (msg, key, attrs))
  1958             if found_flag:
  1959                 continue
  1960             elif msg:
  1961                 break
  1962             #else (we found nothing, but also didn't have an error msg) we just continue below:
  1963 
  1964         try:
  1965             eq = parser.get_token()
  1966         except ValueError, err:
  1967             msg = str(err)
  1968             break
  1969         if eq != "=":
  1970             msg = _('Expected "=" to follow "%(token)s"') % {'token': key}
  1971             break
  1972 
  1973         try:
  1974             val = parser.get_token()
  1975         except ValueError, err:
  1976             msg = str(err)
  1977             break
  1978         if not val:
  1979             msg = _('Expected a value for key "%(token)s"') % {'token': key}
  1980             break
  1981 
  1982         key = escape(key) # make sure nobody cheats
  1983 
  1984         # safely escape and quote value
  1985         if val[0] in ["'", '"']:
  1986             val = escape(val)
  1987         else:
  1988             val = '"%s"' % escape(val, 1)
  1989 
  1990         attrs[key.lower()] = val
  1991 
  1992     return attrs, msg or ''
  1993 
  1994 
  1995 class ParameterParser:
  1996     """ MoinMoin macro parameter parser
  1997 
  1998         Parses a given parameter string, separates the individual parameters
  1999         and detects their type.
  2000 
  2001         Possible parameter types are:
  2002 
  2003         Name      | short  | example
  2004         ----------------------------
  2005          Integer  | i      | -374
  2006          Float    | f      | 234.234 23.345E-23
  2007          String   | s      | 'Stri\'ng'
  2008          Boolean  | b      | 0 1 True false
  2009          Name     |        | case_sensitive | converted to string
  2010 
  2011         So say you want to parse three things, name, age and if the
  2012         person is male or not:
  2013 
  2014         The pattern will be: %(name)s%(age)i%(male)b
  2015 
  2016         As a result, the returned dict will put the first value into
  2017         male, second into age etc. If some argument is missing, it will
  2018         get None as its value. This also means that all the identifiers
  2019         in the pattern will exist in the dict, they will just have the
  2020         value None if they were not specified by the caller.
  2021 
  2022         So if we call it with the parameters as follows:
  2023             ("John Smith", 18)
  2024         this will result in the following dict:
  2025             {"name": "John Smith", "age": 18, "male": None}
  2026 
  2027         Another way of calling would be:
  2028             ("John Smith", male=True)
  2029         this will result in the following dict:
  2030             {"name": "John Smith", "age": None, "male": True}
  2031     """
  2032 
  2033     def __init__(self, pattern):
  2034         # parameter_re = "([^\"',]*(\"[^\"]*\"|'[^']*')?[^\"',]*)[,)]"
  2035         name = "(?P<%s>[a-zA-Z_][a-zA-Z0-9_]*)"
  2036         int_re = r"(?P<int>-?\d+)"
  2037         bool_re = r"(?P<bool>(([10])|([Tt]rue)|([Ff]alse)))"
  2038         float_re = r"(?P<float>-?\d+\.\d+([eE][+-]?\d+)?)"
  2039         string_re = (r"(?P<string>('([^']|(\'))*?')|" +
  2040                                 r'("([^"]|(\"))*?"))')
  2041         name_re = name % "name"
  2042         name_param_re = name % "name_param"
  2043 
  2044         param_re = r"\s*(\s*%s\s*=\s*)?(%s|%s|%s|%s|%s)\s*(,|$)" % (
  2045                    name_re, float_re, int_re, bool_re, string_re, name_param_re)
  2046         self.param_re = re.compile(param_re, re.U)
  2047         self._parse_pattern(pattern)
  2048 
  2049     def _parse_pattern(self, pattern):
  2050         param_re = r"(%(?P<name>\(.*?\))?(?P<type>[ibfs]{1,3}))|\|"
  2051         i = 0
  2052         # TODO: Optionals aren't checked.
  2053         self.optional = []
  2054         named = False
  2055         self.param_list = []
  2056         self.param_dict = {}
  2057 
  2058         for match in re.finditer(param_re, pattern):
  2059             if match.group() == "|":
  2060                 self.optional.append(i)
  2061                 continue
  2062             self.param_list.append(match.group('type'))
  2063             if match.group('name'):
  2064                 named = True
  2065                 self.param_dict[match.group('name')[1:-1]] = i
  2066             elif named:
  2067                 raise ValueError("Named parameter expected")
  2068             i += 1
  2069 
  2070     def __str__(self):
  2071         return "%s, %s, optional:%s" % (self.param_list, self.param_dict,
  2072                                         self.optional)
  2073 
  2074     def parse_parameters(self, params):
  2075         # Default list/dict entries to None
  2076         parameter_list = [None] * len(self.param_list)
  2077         parameter_dict = dict([(key, None) for key in self.param_dict])
  2078         check_list = [0] * len(self.param_list)
  2079 
  2080         i = 0
  2081         start = 0
  2082         fixed_count = 0
  2083         named = False
  2084 
  2085         while start < len(params):
  2086             match = re.match(self.param_re, params[start:])
  2087             if not match:
  2088                 raise ValueError("malformed parameters")
  2089             start += match.end()
  2090             if match.group("int"):
  2091                 pvalue = int(match.group("int"))
  2092                 ptype = 'i'
  2093             elif match.group("bool"):
  2094                 pvalue = (match.group("bool") == "1") or (match.group("bool") == "True") or (match.group("bool") == "true")
  2095                 ptype = 'b'
  2096             elif match.group("float"):
  2097                 pvalue = float(match.group("float"))
  2098                 ptype = 'f'
  2099             elif match.group("string"):
  2100                 pvalue = match.group("string")[1:-1]
  2101                 ptype = 's'
  2102             elif match.group("name_param"):
  2103                 pvalue = match.group("name_param")
  2104                 ptype = 'n'
  2105             else:
  2106                 raise ValueError("Parameter parser code does not fit param_re regex")
  2107 
  2108             name = match.group("name")
  2109             if name:
  2110                 if name not in self.param_dict:
  2111                     # TODO we should think on inheritance of parameters
  2112                     raise ValueError("unknown parameter name '%s'" % name)
  2113                 nr = self.param_dict[name]
  2114                 if check_list[nr]:
  2115                     raise ValueError("parameter '%s' specified twice" % name)
  2116                 else:
  2117                     check_list[nr] = 1
  2118                 pvalue = self._check_type(pvalue, ptype, self.param_list[nr])
  2119                 parameter_dict[name] = pvalue
  2120                 parameter_list[nr] = pvalue
  2121                 named = True
  2122             elif named:
  2123                 raise ValueError("only named parameters allowed after first named parameter")
  2124             else:
  2125                 nr = i
  2126                 if nr not in self.param_dict.values():
  2127                     fixed_count = nr + 1
  2128                 parameter_list[nr] = self._check_type(pvalue, ptype, self.param_list[nr])
  2129 
  2130             # Let's populate and map our dictionary to what's been found
  2131             for name in self.param_dict:
  2132                 tmp = self.param_dict[name]
  2133                 parameter_dict[name] = parameter_list[tmp]
  2134 
  2135             i += 1
  2136 
  2137         for i in range(fixed_count):
  2138             parameter_dict[i] = parameter_list[i]
  2139 
  2140         return fixed_count, parameter_dict
  2141 
  2142     def _check_type(self, pvalue, ptype, format):
  2143         if ptype == 'n' and 's' in format: # n as s
  2144             return pvalue
  2145 
  2146         if ptype in format:
  2147             return pvalue # x -> x
  2148 
  2149         if ptype == 'i':
  2150             if 'f' in format:
  2151                 return float(pvalue) # i -> f
  2152             elif 'b' in format:
  2153                 return pvalue != 0 # i -> b
  2154         elif ptype == 's':
  2155             if 'b' in format:
  2156                 if pvalue.lower() == 'false':
  2157                     return False # s-> b
  2158                 elif pvalue.lower() == 'true':
  2159                     return True # s-> b
  2160                 else:
  2161                     raise ValueError('%r does not match format %r' % (pvalue, format))
  2162 
  2163         if 's' in format: # * -> s
  2164             return str(pvalue)
  2165 
  2166         raise ValueError('%r does not match format %r' % (pvalue, format))
  2167 
  2168 
  2169 #############################################################################
  2170 ### Misc
  2171 #############################################################################
  2172 def normalize_pagename(name, cfg):
  2173     """ Normalize page name
  2174 
  2175     Prevent creating page names with invisible characters or funny
  2176     whitespace that might confuse the users or abuse the wiki, or
  2177     just does not make sense.
  2178 
  2179     Restrict even more group pages, so they can be used inside acl lines.
  2180 
  2181     @param name: page name, unicode
  2182     @rtype: unicode
  2183     @return: decoded and sanitized page name
  2184     """
  2185     # Strip invalid characters
  2186     name = config.page_invalid_chars_regex.sub(u'', name)
  2187 
  2188     # Split to pages and normalize each one
  2189     pages = name.split(u'/')
  2190     normalized = []
  2191     for page in pages:
  2192         # Ignore empty or whitespace only pages
  2193         if not page or page.isspace():
  2194             continue
  2195 
  2196         # Cleanup group pages.
  2197         # Strip non alpha numeric characters, keep white space
  2198         if isGroupPage(page, cfg):
  2199             page = u''.join([c for c in page
  2200                              if c.isalnum() or c.isspace()])
  2201 
  2202         # Normalize white space. Each name can contain multiple
  2203         # words separated with only one space. Split handle all
  2204         # 30 unicode spaces (isspace() == True)
  2205         page = u' '.join(page.split())
  2206 
  2207         normalized.append(page)
  2208 
  2209     # Assemble components into full pagename
  2210     name = u'/'.join(normalized)
  2211     return name
  2212 
  2213 def taintfilename(basename):
  2214     """
  2215     Make a filename that is supposed to be a plain name secure, i.e.
  2216     remove any possible path components that compromise our system.
  2217 
  2218     @param basename: (possibly unsafe) filename
  2219     @rtype: string
  2220     @return: (safer) filename
  2221     """
  2222     for x in (os.pardir, ':', '/', '\\', '<', '>'):
  2223         basename = basename.replace(x, '_')
  2224 
  2225     return basename
  2226 
  2227 
  2228 def mapURL(request, url):
  2229     """
  2230     Map URLs according to 'cfg.url_mappings'.
  2231 
  2232     @param url: a URL
  2233     @rtype: string
  2234     @return: mapped URL
  2235     """
  2236     # check whether we have to map URLs
  2237     if request.cfg.url_mappings:
  2238         # check URL for the configured prefixes
  2239         for prefix in request.cfg.url_mappings:
  2240             if url.startswith(prefix):
  2241                 # substitute prefix with replacement value
  2242                 return request.cfg.url_mappings[prefix] + url[len(prefix):]
  2243 
  2244     # return unchanged url
  2245     return url
  2246 
  2247 
  2248 def getUnicodeIndexGroup(name):
  2249     """
  2250     Return a group letter for `name`, which must be a unicode string.
  2251     Currently supported: Hangul Syllables (U+AC00 - U+D7AF)
  2252 
  2253     @param name: a string
  2254     @rtype: string
  2255     @return: group letter or None
  2256     """
  2257     c = name[0]
  2258     if u'\uAC00' <= c <= u'\uD7AF': # Hangul Syllables
  2259         return unichr(0xac00 + (int(ord(c) - 0xac00) / 588) * 588)
  2260     else:
  2261         return c.upper() # we put lower and upper case words into the same index group
  2262 
  2263 
  2264 def isStrictWikiname(name, word_re=re.compile(ur"^(?:[%(u)s][%(l)s]+){2,}$" % {'u': config.chars_upper, 'l': config.chars_lower})):
  2265     """
  2266     Check whether this is NOT an extended name.
  2267 
  2268     @param name: the wikiname in question
  2269     @rtype: bool
  2270     @return: true if name matches the word_re
  2271     """
  2272     return word_re.match(name)
  2273 
  2274 
  2275 def is_URL(arg, schemas=config.url_schemas):
  2276     """ Return True if arg is a URL (with a schema given in the schemas list).
  2277 
  2278         Note: there are not that many requirements for generic URLs, basically
  2279         the only mandatory requirement is the ':' between schema and rest.
  2280         Schema itself could be anything, also the rest (but we only support some
  2281         schemas, as given in config.url_schemas, so it is a bit less ambiguous).
  2282     """
  2283     if ':' not in arg:
  2284         return False
  2285     for schema in schemas:
  2286         if arg.startswith(schema + ':'):
  2287             return True
  2288     return False
  2289 
  2290 
  2291 def isPicture(url):
  2292     """
  2293     Is this a picture's url?
  2294 
  2295     @param url: the url in question
  2296     @rtype: bool
  2297     @return: true if url points to a picture
  2298     """
  2299     extpos = url.rfind(".") + 1
  2300     return extpos > 1 and url[extpos:].lower() in config.browser_supported_images
  2301 
  2302 
  2303 def link_tag(request, params, text=None, formatter=None, on=None, **kw):
  2304     """ Create a link.
  2305 
  2306     TODO: cleanup css_class
  2307 
  2308     @param request: the request object
  2309     @param params: parameter string appended to the URL after the scriptname/
  2310     @param text: text / inner part of the <a>...</a> link - does NOT get
  2311                  escaped, so you can give HTML here and it will be used verbatim
  2312     @param formatter: the formatter object to use
  2313     @param on: opening/closing tag only
  2314     @keyword attrs: additional attrs (HTMLified string) (removed in 1.5.3)
  2315     @rtype: string
  2316     @return: formatted link tag
  2317     """
  2318     if formatter is None:
  2319         formatter = request.html_formatter
  2320     if 'css_class' in kw:
  2321         css_class = kw['css_class']
  2322         del kw['css_class'] # one time is enough
  2323     else:
  2324         css_class = None
  2325     id = kw.get('id', None)
  2326     name = kw.get('name', None)
  2327     if text is None:
  2328         text = params # default
  2329     if formatter:
  2330         url = "%s/%s" % (request.script_root, params)
  2331         # formatter.url will escape the url part
  2332         if on is not None:
  2333             tag = formatter.url(on, url, css_class, **kw)
  2334         else:
  2335             tag = (formatter.url(1, url, css_class, **kw) +
  2336                 formatter.rawHTML(text) +
  2337                 formatter.url(0))
  2338     else: # this shouldn't be used any more:
  2339         if on is not None and not on:
  2340             tag = '</a>'
  2341         else:
  2342             attrs = ''
  2343             if css_class:
  2344                 attrs += ' class="%s"' % css_class
  2345             if id:
  2346                 attrs += ' id="%s"' % id
  2347             if name:
  2348                 attrs += ' name="%s"' % name
  2349             tag = '<a%s href="%s/%s">' % (attrs, request.script_root, params)
  2350             if not on:
  2351                 tag = "%s%s</a>" % (tag, text)
  2352         logging.warning("wikiutil.link_tag called without formatter and without request.html_formatter. tag=%r" % (tag, ))
  2353     return tag
  2354 
  2355 def containsConflictMarker(text):
  2356     """ Returns true if there is a conflict marker in the text. """
  2357     return "/!\\ '''Edit conflict" in text
  2358 
  2359 def pagediff(request, pagename1, rev1, pagename2, rev2, **kw):
  2360     """
  2361     Calculate the "diff" between two page contents.
  2362 
  2363     @param pagename1: name of first page
  2364     @param rev1: revision of first page
  2365     @param pagename2: name of second page
  2366     @param rev2: revision of second page
  2367     @keyword ignorews: if 1: ignore pure-whitespace changes.
  2368     @rtype: list
  2369     @return: lines of diff output
  2370     """
  2371     from MoinMoin.Page import Page
  2372     from MoinMoin.util import diff_text
  2373     lines1 = Page(request, pagename1, rev=rev1).getlines()
  2374     lines2 = Page(request, pagename2, rev=rev2).getlines()
  2375 
  2376     lines = diff_text.diff(lines1, lines2, **kw)
  2377     return lines
  2378 
  2379 def anchor_name_from_text(text):
  2380     '''
  2381     Generate an anchor name from the given text.
  2382     This function generates valid HTML IDs matching: [A-Za-z][A-Za-z0-9:_.-]*
  2383     Note: this transformation has a special feature: when you feed it with a
  2384           valid ID/name, it will return it without modification (identity
  2385           transformation).
  2386     '''
  2387     quoted = urllib.quote_plus(text.encode('utf-7'), safe=':')
  2388     res = quoted.replace('%', '.').replace('+', '_')
  2389     if not res[:1].isalpha():
  2390         return 'A%s' % res
  2391     return res
  2392 
  2393 def split_anchor(pagename):
  2394     """
  2395     Split a pagename that (optionally) has an anchor into the real pagename
  2396     and the anchor part. If there is no anchor, it returns an empty string
  2397     for the anchor.
  2398 
  2399     Note: if pagename contains a # (as part of the pagename, not as anchor),
  2400           you can use a trick to make it work nevertheless: just append a
  2401           # at the end:
  2402           "C##" returns ("C#", "")
  2403           "Problem #1#" returns ("Problem #1", "")
  2404 
  2405     TODO: We shouldn't deal with composite pagename#anchor strings, but keep
  2406           it separate.
  2407           Current approach: [[pagename#anchor|label|attr=val,&qarg=qval]]
  2408           Future approach:  [[pagename|label|attr=val,&qarg=qval,#anchor]]
  2409           The future approach will avoid problems when there is a # in the
  2410           pagename part (and no anchor). Also, we need to append #anchor
  2411           at the END of the generated URL (AFTER the query string).
  2412     """
  2413     parts = rsplit(pagename, '#', 1)
  2414     if len(parts) == 2:
  2415         return parts
  2416     else:
  2417         return pagename, ""
  2418 
  2419 ########################################################################
  2420 ### Tickets - used by RenamePage and DeletePage
  2421 ########################################################################
  2422 
  2423 def createTicket(request, tm=None, action=None):
  2424     """ Create a ticket using a configured secret
  2425 
  2426         @param tm: unix timestamp (optional, uses current time if not given)
  2427         @param action: action name (optional, uses current action if not given)
  2428                        Note: if you create a ticket for a form that calls another
  2429                              action than the current one, you MUST specify the
  2430                              action you call when posting the form.
  2431     """
  2432 
  2433     from MoinMoin.support.python_compatibility import hash_new
  2434     if tm is None:
  2435         tm = "%010x" % time.time()
  2436 
  2437     # make the ticket specific to the page and action:
  2438     try:
  2439         pagename = quoteWikinameURL(request.page.page_name)
  2440     except:
  2441         pagename = 'None'
  2442 
  2443     if action is None:
  2444         try:
  2445             action = request.action
  2446         except:
  2447             action = 'None'
  2448 
  2449     secret = request.cfg.secrets['wikiutil/tickets']
  2450     digest = hash_new('sha1', secret)
  2451 
  2452     ticket = "%s.%s.%s" % (tm, pagename, action)
  2453     digest.update(ticket)
  2454 
  2455     return "%s.%s" % (ticket, digest.hexdigest())
  2456 
  2457 
  2458 def checkTicket(request, ticket):
  2459     """Check validity of a previously created ticket"""
  2460     try:
  2461         timestamp_str = ticket.split('.')[0]
  2462         timestamp = int(timestamp_str, 16)
  2463     except ValueError:
  2464         # invalid or empty ticket
  2465         logging.debug("checkTicket: invalid or empty ticket %r" % ticket)
  2466         return False
  2467     now = time.time()
  2468     if timestamp < now - 10 * 3600:
  2469         # we don't accept tickets older than 10h
  2470         logging.debug("checkTicket: too old ticket, timestamp %r" % timestamp)
  2471         return False
  2472     ourticket = createTicket(request, timestamp_str)
  2473     logging.debug("checkTicket: returning %r, got %r, expected %r" % (ticket == ourticket, ticket, ourticket))
  2474     return ticket == ourticket
  2475 
  2476 
  2477 def renderText(request, Parser, text):
  2478     """executes raw wiki markup with all page elements"""
  2479     import StringIO
  2480     out = StringIO.StringIO()
  2481     request.redirect(out)
  2482     wikiizer = Parser(text, request)
  2483     wikiizer.format(request.formatter, inhibit_p=True)
  2484     result = out.getvalue()
  2485     request.redirect()
  2486     del out
  2487     return result
  2488 
  2489 def get_processing_instructions(body):
  2490     """ Extract the processing instructions / acl / etc. at the beginning of a page's body.
  2491 
  2492         Hint: if you have a Page object p, you already have the result of this function in
  2493               p.meta and (even better) parsed/processed stuff in p.pi.
  2494 
  2495         Returns a list of (pi, restofline) tuples and a string with the rest of the body.
  2496     """
  2497     pi = []
  2498     while body.startswith('#'):
  2499         try:
  2500             line, body = body.split('\n', 1) # extract first line
  2501         except ValueError:
  2502             line = body
  2503             body = ''
  2504 
  2505         # end parsing on empty (invalid) PI
  2506         if line == "#":
  2507             body = line + '\n' + body
  2508             break
  2509 
  2510         if line[1] == '#':# two hash marks are a comment
  2511             comment = line[2:]
  2512             if not comment.startswith(' '):
  2513                 # we don't require a blank after the ##, so we put one there
  2514                 comment = ' ' + comment
  2515                 line = '##%s' % comment
  2516 
  2517         verb, args = (line[1:] + ' ').split(' ', 1) # split at the first blank
  2518         pi.append((verb.lower(), args.strip()))
  2519 
  2520     return pi, body
  2521