MoinMoin/wikiutil.py
author Thomas Waldmann <tw AT waldmann-edv DOT de>
Mon, 28 Nov 2011 21:54:40 +0100
changeset 4525 42d282096e80
parent 4477 af66afbc9a31
parent 4488 1f638ed400a0
permissions -rw-r--r--
tagged release 1.8.9
     1 # -*- coding: iso-8859-1 -*-
     2 """
     3     MoinMoin - Wiki Utility Functions
     4 
     5     @copyright: 2000-2004 Juergen Hermann <jh@web.de>,
     6                 2004 by Florian Festi,
     7                 2006 by Mikko Virkkil,
     8                 2005-2008 MoinMoin:ThomasWaldmann,
     9                 2007 MoinMoin:ReimarBauer
    10     @license: GNU GPL, see COPYING for details.
    11 """
    12 
    13 import cgi
    14 import codecs
    15 import os
    16 import re
    17 import time
    18 import urllib
    19 
    20 from MoinMoin import log
    21 logging = log.getLogger(__name__)
    22 
    23 from MoinMoin import config
    24 from MoinMoin.util import pysupport, lock
    25 from MoinMoin.support.python_compatibility import rsplit
    26 from inspect import getargspec, isfunction, isclass, ismethod
    27 
    28 
    29 # Exceptions
    30 class InvalidFileNameError(Exception):
    31     """ Called when we find an invalid file name """
    32     pass
    33 
    34 # constants for page names
    35 PARENT_PREFIX = "../"
    36 PARENT_PREFIX_LEN = len(PARENT_PREFIX)
    37 CHILD_PREFIX = "/"
    38 CHILD_PREFIX_LEN = len(CHILD_PREFIX)
    39 
    40 #############################################################################
    41 ### Getting data from user/Sending data to user
    42 #############################################################################
    43 
    44 def decodeWindowsPath(text):
    45     """ Decode Windows path names correctly. This is needed because many CGI
    46     servers follow the RFC recommendation and re-encode the path_info variable
    47     according to the file system semantics.
    48 
    49     @param text: the text to decode, string
    50     @rtype: unicode
    51     @return: decoded text
    52     """
    53 
    54     import locale
    55     cur_charset = locale.getdefaultlocale()[1]
    56     try:
    57         return unicode(text, 'utf-8')
    58     except UnicodeError:
    59         try:
    60             return unicode(text, cur_charset, 'replace')
    61         except LookupError:
    62             return unicode(text, 'iso-8859-1', 'replace')
    63 
    64 def decodeUnknownInput(text):
    65     """ Decode unknown input, like text attachments
    66 
    67     First we try utf-8 because it has special format, and it will decode
    68     only utf-8 files. Then we try config.charset, then iso-8859-1 using
    69     'replace'. We will never raise an exception, but may return junk
    70     data.
    71 
    72     WARNING: Use this function only for data that you view, not for data
    73     that you save in the wiki.
    74 
    75     @param text: the text to decode, string
    76     @rtype: unicode
    77     @return: decoded text (maybe wrong)
    78     """
    79     # Shortcut for unicode input
    80     if isinstance(text, unicode):
    81         return text
    82 
    83     try:
    84         return unicode(text, 'utf-8')
    85     except UnicodeError:
    86         if config.charset not in ['utf-8', 'iso-8859-1']:
    87             try:
    88                 return unicode(text, config.charset)
    89             except UnicodeError:
    90                 pass
    91         return unicode(text, 'iso-8859-1', 'replace')
    92 
    93 
    94 def decodeUserInput(s, charsets=[config.charset]):
    95     """
    96     Decodes input from the user.
    97 
    98     @param s: the string to unquote
    99     @param charsets: list of charsets to assume the string is in
   100     @rtype: unicode
   101     @return: the unquoted string as unicode
   102     """
   103     for charset in charsets:
   104         try:
   105             return s.decode(charset)
   106         except UnicodeError:
   107             pass
   108     raise UnicodeError('The string %r cannot be decoded.' % s)
   109 
   110 
   111 # this is a thin wrapper around urllib (urllib only handles str, not unicode)
   112 # with py <= 2.4.1, it would give incorrect results with unicode
   113 # with py == 2.4.2, it crashes with unicode, if it contains non-ASCII chars
   114 def url_quote(s, safe='/', want_unicode=False):
   115     """
   116     Wrapper around urllib.quote doing the encoding/decoding as usually wanted:
   117 
   118     @param s: the string to quote (can be str or unicode, if it is unicode,
   119               config.charset is used to encode it before calling urllib)
   120     @param safe: just passed through to urllib
   121     @param want_unicode: for the less usual case that you want to get back
   122                          unicode and not str, set this to True
   123                          Default is False.
   124     """
   125     if isinstance(s, unicode):
   126         s = s.encode(config.charset)
   127     elif not isinstance(s, str):
   128         s = str(s)
   129     s = urllib.quote(s, safe)
   130     if want_unicode:
   131         s = s.decode(config.charset) # ascii would also work
   132     return s
   133 
   134 def url_quote_plus(s, safe='/', want_unicode=False):
   135     """
   136     Wrapper around urllib.quote_plus doing the encoding/decoding as usually wanted:
   137 
   138     @param s: the string to quote (can be str or unicode, if it is unicode,
   139               config.charset is used to encode it before calling urllib)
   140     @param safe: just passed through to urllib
   141     @param want_unicode: for the less usual case that you want to get back
   142                          unicode and not str, set this to True
   143                          Default is False.
   144     """
   145     if isinstance(s, unicode):
   146         s = s.encode(config.charset)
   147     elif not isinstance(s, str):
   148         s = str(s)
   149     s = urllib.quote_plus(s, safe)
   150     if want_unicode:
   151         s = s.decode(config.charset) # ascii would also work
   152     return s
   153 
   154 def url_unquote(s, want_unicode=True):
   155     """
   156     Wrapper around urllib.unquote doing the encoding/decoding as usually wanted:
   157 
   158     @param s: the string to unquote (can be str or unicode, if it is unicode,
   159               config.charset is used to encode it before calling urllib)
   160     @param want_unicode: for the less usual case that you want to get back
   161                          str and not unicode, set this to False.
   162                          Default is True.
   163     """
   164     if isinstance(s, unicode):
   165         s = s.encode(config.charset) # ascii would also work
   166     s = urllib.unquote(s)
   167     if want_unicode:
   168         try:
   169             s = decodeUserInput(s, [config.charset, 'iso-8859-1', ]) # try hard
   170         except UnicodeError:
   171             s = s.decode('ascii', 'replace') # better than crashing
   172     return s
   173 
   174 def parseQueryString(qstr, want_unicode=True):
   175     """ Parse a querystring "key=value&..." into a dict.
   176     """
   177     is_unicode = isinstance(qstr, unicode)
   178     if is_unicode:
   179         qstr = qstr.encode(config.charset)
   180     values = {}
   181     for key, value in cgi.parse_qs(qstr).items():
   182         if len(value) < 2:
   183             v = ''.join(value)
   184             if want_unicode:
   185                 try:
   186                     v = unicode(v, config.charset)
   187                 except UnicodeDecodeError:
   188                     v = unicode(v, 'iso-8859-1', 'replace')
   189             values[key] = v
   190     return values
   191 
   192 def makeQueryString(qstr=None, want_unicode=False, **kw):
   193     """ Make a querystring from arguments.
   194 
   195     kw arguments overide values in qstr.
   196 
   197     If a string is passed in, it's returned verbatim and
   198     keyword parameters are ignored.
   199 
   200     @param qstr: dict to format as query string, using either ascii or unicode
   201     @param kw: same as dict when using keywords, using ascii or unicode
   202     @rtype: string
   203     @return: query string ready to use in a url
   204     """
   205     if qstr is None:
   206         qstr = {}
   207     if isinstance(qstr, dict):
   208         qstr.update(kw)
   209         items = ['%s=%s' % (url_quote_plus(key, want_unicode=want_unicode), url_quote_plus(value, want_unicode=want_unicode)) for key, value in qstr.items()]
   210         qstr = '&'.join(items)
   211     return qstr
   212 
   213 
   214 def quoteWikinameURL(pagename, charset=config.charset):
   215     """ Return a url encoding of filename in plain ascii
   216 
   217     Use urllib.quote to quote any character that is not always safe.
   218 
   219     @param pagename: the original pagename (unicode)
   220     @param charset: url text encoding, 'utf-8' recommended. Other charset
   221                     might not be able to encode the page name and raise
   222                     UnicodeError. (default config.charset ('utf-8')).
   223     @rtype: string
   224     @return: the quoted filename, all unsafe characters encoded
   225     """
   226     pagename = pagename.encode(charset)
   227     return urllib.quote(pagename)
   228 
   229 
   230 def escape(s, quote=0):
   231     """ Escape possible html tags
   232 
   233     Replace special characters '&', '<' and '>' by SGML entities.
   234     (taken from cgi.escape so we don't have to include that, even if we
   235     don't use cgi at all)
   236 
   237     @param s: (unicode) string to escape
   238     @param quote: bool, should transform '\"' to '&quot;'
   239     @rtype: when called with a unicode object, return unicode object - otherwise return string object
   240     @return: escaped version of s
   241     """
   242     if not isinstance(s, (str, unicode)):
   243         s = str(s)
   244 
   245     # Must first replace &
   246     s = s.replace("&", "&amp;")
   247 
   248     # Then other...
   249     s = s.replace("<", "&lt;")
   250     s = s.replace(">", "&gt;")
   251     if quote:
   252         s = s.replace('"', "&quot;")
   253         s = s.replace("'", "&#x27;")
   254     return s
   255 
   256 def clean_input(text, max_len=201):
   257     """ Clean input:
   258         replace CR, LF, TAB by whitespace
   259         delete control chars
   260 
   261         @param text: unicode text to clean (if we get str, we decode)
   262         @rtype: unicode
   263         @return: cleaned text
   264     """
   265     # we only have input fields with max 200 chars, but spammers send us more
   266     length = len(text)
   267     if length == 0 or length > max_len:
   268         return u''
   269     else:
   270         if isinstance(text, str):
   271             # the translate() below can ONLY process unicode, thus, if we get
   272             # str, we try to decode it using the usual coding:
   273             text = text.decode(config.charset)
   274         return text.translate(config.clean_input_translation_map)
   275 
   276 
   277 def make_breakable(text, maxlen):
   278     """ make a text breakable by inserting spaces into nonbreakable parts
   279     """
   280     text = text.split(" ")
   281     newtext = []
   282     for part in text:
   283         if len(part) > maxlen:
   284             while part:
   285                 newtext.append(part[:maxlen])
   286                 part = part[maxlen:]
   287         else:
   288             newtext.append(part)
   289     return " ".join(newtext)
   290 
   291 ########################################################################
   292 ### Storage
   293 ########################################################################
   294 
   295 # Precompiled patterns for file name [un]quoting
   296 UNSAFE = re.compile(r'[^a-zA-Z0-9_]+')
   297 QUOTED = re.compile(r'\(([a-fA-F0-9]+)\)')
   298 
   299 
   300 def quoteWikinameFS(wikiname, charset=config.charset):
   301     """ Return file system representation of a Unicode WikiName.
   302 
   303     Warning: will raise UnicodeError if wikiname can not be encoded using
   304     charset. The default value of config.charset, 'utf-8' can encode any
   305     character.
   306 
   307     @param wikiname: Unicode string possibly containing non-ascii characters
   308     @param charset: charset to encode string
   309     @rtype: string
   310     @return: quoted name, safe for any file system
   311     """
   312     filename = wikiname.encode(charset)
   313 
   314     quoted = []
   315     location = 0
   316     for needle in UNSAFE.finditer(filename):
   317         # append leading safe stuff
   318         quoted.append(filename[location:needle.start()])
   319         location = needle.end()
   320         # Quote and append unsafe stuff
   321         quoted.append('(')
   322         for character in needle.group():
   323             quoted.append('%02x' % ord(character))
   324         quoted.append(')')
   325 
   326     # append rest of string
   327     quoted.append(filename[location:])
   328     return ''.join(quoted)
   329 
   330 
   331 def unquoteWikiname(filename, charsets=[config.charset]):
   332     """ Return Unicode WikiName from quoted file name.
   333 
   334     We raise an InvalidFileNameError if we find an invalid name, so the
   335     wiki could alarm the admin or suggest the user to rename a page.
   336     Invalid file names should never happen in normal use, but are rather
   337     cheap to find.
   338 
   339     This function should be used only to unquote file names, not page
   340     names we receive from the user. These are handled in request by
   341     urllib.unquote, decodePagename and normalizePagename.
   342 
   343     Todo: search clients of unquoteWikiname and check for exceptions.
   344 
   345     @param filename: string using charset and possibly quoted parts
   346     @param charsets: list of charsets used by string
   347     @rtype: Unicode String
   348     @return: WikiName
   349     """
   350     ### Temporary fix start ###
   351     # From some places we get called with Unicode strings
   352     if isinstance(filename, type(u'')):
   353         filename = filename.encode(config.charset)
   354     ### Temporary fix end ###
   355 
   356     parts = []
   357     start = 0
   358     for needle in QUOTED.finditer(filename):
   359         # append leading unquoted stuff
   360         parts.append(filename[start:needle.start()])
   361         start = needle.end()
   362         # Append quoted stuff
   363         group = needle.group(1)
   364         # Filter invalid filenames
   365         if (len(group) % 2 != 0):
   366             raise InvalidFileNameError(filename)
   367         try:
   368             for i in range(0, len(group), 2):
   369                 byte = group[i:i+2]
   370                 character = chr(int(byte, 16))
   371                 parts.append(character)
   372         except ValueError:
   373             # byte not in hex, e.g 'xy'
   374             raise InvalidFileNameError(filename)
   375 
   376     # append rest of string
   377     if start == 0:
   378         wikiname = filename
   379     else:
   380         parts.append(filename[start:len(filename)])
   381         wikiname = ''.join(parts)
   382 
   383     # FIXME: This looks wrong, because at this stage "()" can be both errors
   384     # like open "(" without close ")", or unquoted valid characters in the file name.
   385     # Filter invalid filenames. Any left (xx) must be invalid
   386     #if '(' in wikiname or ')' in wikiname:
   387     #    raise InvalidFileNameError(filename)
   388 
   389     wikiname = decodeUserInput(wikiname, charsets)
   390     return wikiname
   391 
   392 # time scaling
   393 def timestamp2version(ts):
   394     """ Convert UNIX timestamp (may be float or int) to our version
   395         (long) int.
   396         We don't want to use floats, so we just scale by 1e6 to get
   397         an integer in usecs.
   398     """
   399     return long(ts*1000000L) # has to be long for py 2.2.x
   400 
   401 def version2timestamp(v):
   402     """ Convert version number to UNIX timestamp (float).
   403         This must ONLY be used for display purposes.
   404     """
   405     return v / 1000000.0
   406 
   407 
   408 # This is the list of meta attribute names to be treated as integers.
   409 # IMPORTANT: do not use any meta attribute names with "-" (or any other chars
   410 # invalid in python attribute names), use e.g. _ instead.
   411 INTEGER_METAS = ['current', 'revision', # for page storage (moin 2.0)
   412                  'data_format_revision', # for data_dir format spec (use by mig scripts)
   413                 ]
   414 
   415 class MetaDict(dict):
   416     """ store meta informations as a dict.
   417     """
   418     def __init__(self, metafilename, cache_directory):
   419         """ create a MetaDict from metafilename """
   420         dict.__init__(self)
   421         self.metafilename = metafilename
   422         self.dirty = False
   423         lock_dir = os.path.join(cache_directory, '__metalock__')
   424         self.rlock = lock.ReadLock(lock_dir, 60.0)
   425         self.wlock = lock.WriteLock(lock_dir, 60.0)
   426 
   427         if not self.rlock.acquire(3.0):
   428             raise EnvironmentError("Could not lock in MetaDict")
   429         try:
   430             self._get_meta()
   431         finally:
   432             self.rlock.release()
   433 
   434     def _get_meta(self):
   435         """ get the meta dict from an arbitrary filename.
   436             does not keep state, does uncached, direct disk access.
   437             @param metafilename: the name of the file to read
   438             @return: dict with all values or {} if empty or error
   439         """
   440 
   441         try:
   442             metafile = codecs.open(self.metafilename, "r", "utf-8")
   443             meta = metafile.read() # this is much faster than the file's line-by-line iterator
   444             metafile.close()
   445         except IOError:
   446             meta = u''
   447         for line in meta.splitlines():
   448             key, value = line.split(':', 1)
   449             value = value.strip()
   450             if key in INTEGER_METAS:
   451                 value = int(value)
   452             dict.__setitem__(self, key, value)
   453 
   454     def _put_meta(self):
   455         """ put the meta dict into an arbitrary filename.
   456             does not keep or modify state, does uncached, direct disk access.
   457             @param metafilename: the name of the file to write
   458             @param metadata: dict of the data to write to the file
   459         """
   460         meta = []
   461         for key, value in self.items():
   462             if key in INTEGER_METAS:
   463                 value = str(value)
   464             meta.append("%s: %s" % (key, value))
   465         meta = '\r\n'.join(meta)
   466 
   467         metafile = codecs.open(self.metafilename, "w", "utf-8")
   468         metafile.write(meta)
   469         metafile.close()
   470         self.dirty = False
   471 
   472     def sync(self, mtime_usecs=None):
   473         """ No-Op except for that parameter """
   474         if not mtime_usecs is None:
   475             self.__setitem__('mtime', str(mtime_usecs))
   476         # otherwise no-op
   477 
   478     def __getitem__(self, key):
   479         """ We don't care for cache coherency here. """
   480         return dict.__getitem__(self, key)
   481 
   482     def __setitem__(self, key, value):
   483         """ Sets a dictionary entry. """
   484         if not self.wlock.acquire(5.0):
   485             raise EnvironmentError("Could not lock in MetaDict")
   486         try:
   487             self._get_meta() # refresh cache
   488             try:
   489                 oldvalue = dict.__getitem__(self, key)
   490             except KeyError:
   491                 oldvalue = None
   492             if value != oldvalue:
   493                 dict.__setitem__(self, key, value)
   494                 self._put_meta() # sync cache
   495         finally:
   496             self.wlock.release()
   497 
   498 
   499 # Quoting of wiki names, file names, etc. (in the wiki markup) -----------------------------------
   500 
   501 # don't ever change this - DEPRECATED, only needed for 1.5 > 1.6 migration conversion
   502 QUOTE_CHARS = u'"'
   503 
   504 
   505 #############################################################################
   506 ### InterWiki
   507 #############################################################################
   508 INTERWIKI_PAGE = "InterWikiMap"
   509 
   510 def generate_file_list(request):
   511     """ generates a list of all files. for internal use. """
   512 
   513     # order is important here, the local intermap file takes
   514     # precedence over the shared one, and is thus read AFTER
   515     # the shared one
   516     intermap_files = request.cfg.shared_intermap
   517     if not isinstance(intermap_files, list):
   518         intermap_files = [intermap_files]
   519     else:
   520         intermap_files = intermap_files[:]
   521     intermap_files.append(os.path.join(request.cfg.data_dir, "intermap.txt"))
   522     request.cfg.shared_intermap_files = [filename for filename in intermap_files
   523                                          if filename and os.path.isfile(filename)]
   524 
   525 
   526 def get_max_mtime(file_list, page):
   527     """ Returns the highest modification time of the files in file_list and the
   528     page page. """
   529     timestamps = [os.stat(filename).st_mtime for filename in file_list]
   530     if page.exists():
   531         # exists() is cached and thus cheaper than mtime_usecs()
   532         timestamps.append(version2timestamp(page.mtime_usecs()))
   533     if timestamps:
   534         return max(timestamps)
   535     else:
   536         return 0 # no files / pages there
   537 
   538 def load_wikimap(request):
   539     """ load interwiki map (once, and only on demand) """
   540     from MoinMoin.Page import Page
   541 
   542     now = int(time.time())
   543     if getattr(request.cfg, "shared_intermap_files", None) is None:
   544         generate_file_list(request)
   545 
   546     try:
   547         _interwiki_list = request.cfg.cache.interwiki_list
   548         old_mtime = request.cfg.cache.interwiki_mtime
   549         if request.cfg.cache.interwiki_ts + (1*60) < now: # 1 minutes caching time
   550             max_mtime = get_max_mtime(request.cfg.shared_intermap_files, Page(request, INTERWIKI_PAGE))
   551             if max_mtime > old_mtime:
   552                 raise AttributeError # refresh cache
   553             else:
   554                 request.cfg.cache.interwiki_ts = now
   555     except AttributeError:
   556         _interwiki_list = {}
   557         lines = []
   558 
   559         for filename in request.cfg.shared_intermap_files:
   560             f = codecs.open(filename, "r", config.charset)
   561             lines.extend(f.readlines())
   562             f.close()
   563 
   564         # add the contents of the InterWikiMap page
   565         lines += Page(request, INTERWIKI_PAGE).get_raw_body().splitlines()
   566 
   567         for line in lines:
   568             if not line or line[0] == '#':
   569                 continue
   570             try:
   571                 line = "%s %s/InterWiki" % (line, request.getScriptname())
   572                 wikitag, urlprefix, dummy = line.split(None, 2)
   573             except ValueError:
   574                 pass
   575             else:
   576                 _interwiki_list[wikitag] = urlprefix
   577 
   578         del lines
   579 
   580         # add own wiki as "Self" and by its configured name
   581         _interwiki_list['Self'] = request.getScriptname() + '/'
   582         if request.cfg.interwikiname:
   583             _interwiki_list[request.cfg.interwikiname] = request.getScriptname() + '/'
   584 
   585         # save for later
   586         request.cfg.cache.interwiki_list = _interwiki_list
   587         request.cfg.cache.interwiki_ts = now
   588         request.cfg.cache.interwiki_mtime = get_max_mtime(request.cfg.shared_intermap_files, Page(request, INTERWIKI_PAGE))
   589 
   590     return _interwiki_list
   591 
   592 def split_wiki(wikiurl):
   593     """
   594     Split a wiki url.
   595 
   596     *** DEPRECATED FUNCTION FOR OLD 1.5 SYNTAX - ONLY STILL HERE FOR THE 1.5 -> 1.6 MIGRATION ***
   597     Use split_interwiki(), see below.
   598 
   599     @param wikiurl: the url to split
   600     @rtype: tuple
   601     @return: (tag, tail)
   602     """
   603     # !!! use a regex here!
   604     try:
   605         wikitag, tail = wikiurl.split(":", 1)
   606     except ValueError:
   607         try:
   608             wikitag, tail = wikiurl.split("/", 1)
   609         except ValueError:
   610             wikitag, tail = 'Self', wikiurl
   611     return wikitag, tail
   612 
   613 def split_interwiki(wikiurl):
   614     """ Split a interwiki name, into wikiname and pagename, e.g:
   615 
   616     'MoinMoin:FrontPage' -> "MoinMoin", "FrontPage"
   617     'FrontPage' -> "Self", "FrontPage"
   618     'MoinMoin:Page with blanks' -> "MoinMoin", "Page with blanks"
   619     'MoinMoin:' -> "MoinMoin", ""
   620 
   621     can also be used for:
   622 
   623     'attachment:filename with blanks.txt' -> "attachment", "filename with blanks.txt"
   624 
   625     @param wikiurl: the url to split
   626     @rtype: tuple
   627     @return: (wikiname, pagename)
   628     """
   629     try:
   630         wikiname, pagename = wikiurl.split(":", 1)
   631     except ValueError:
   632         wikiname, pagename = 'Self', wikiurl
   633     return wikiname, pagename
   634 
   635 def resolve_wiki(request, wikiurl):
   636     """
   637     Resolve an interwiki link.
   638 
   639     *** DEPRECATED FUNCTION FOR OLD 1.5 SYNTAX - ONLY STILL HERE FOR THE 1.5 -> 1.6 MIGRATION ***
   640     Use resolve_interwiki(), see below.
   641 
   642     @param request: the request object
   643     @param wikiurl: the InterWiki:PageName link
   644     @rtype: tuple
   645     @return: (wikitag, wikiurl, wikitail, err)
   646     """
   647     _interwiki_list = load_wikimap(request)
   648     # split wiki url
   649     wikiname, pagename = split_wiki(wikiurl)
   650 
   651     # return resolved url
   652     if wikiname in _interwiki_list:
   653         return (wikiname, _interwiki_list[wikiname], pagename, False)
   654     else:
   655         return (wikiname, request.getScriptname(), "/InterWiki", True)
   656 
   657 def resolve_interwiki(request, wikiname, pagename):
   658     """ Resolve an interwiki reference (wikiname:pagename).
   659 
   660     @param request: the request object
   661     @param wikiname: interwiki wiki name
   662     @param pagename: interwiki page name
   663     @rtype: tuple
   664     @return: (wikitag, wikiurl, wikitail, err)
   665     """
   666     _interwiki_list = load_wikimap(request)
   667     if wikiname in _interwiki_list:
   668         return (wikiname, _interwiki_list[wikiname], pagename, False)
   669     else:
   670         return (wikiname, request.getScriptname(), "/InterWiki", True)
   671 
   672 def join_wiki(wikiurl, wikitail):
   673     """
   674     Add a (url_quoted) page name to an interwiki url.
   675 
   676     Note: We can't know what kind of URL quoting a remote wiki expects.
   677           We just use a utf-8 encoded string with standard URL quoting.
   678 
   679     @param wikiurl: wiki url, maybe including a $PAGE placeholder
   680     @param wikitail: page name
   681     @rtype: string
   682     @return: generated URL of the page in the other wiki
   683     """
   684     wikitail = url_quote(wikitail)
   685     if '$PAGE' in wikiurl:
   686         return wikiurl.replace('$PAGE', wikitail)
   687     else:
   688         return wikiurl + wikitail
   689 
   690 
   691 #############################################################################
   692 ### Page types (based on page names)
   693 #############################################################################
   694 
   695 def isSystemPage(request, pagename):
   696     """ Is this a system page? Uses AllSystemPagesGroup internally.
   697 
   698     @param request: the request object
   699     @param pagename: the page name
   700     @rtype: bool
   701     @return: true if page is a system page
   702     """
   703     return (request.dicts.has_member('SystemPagesGroup', pagename) or
   704         isTemplatePage(request, pagename))
   705 
   706 
   707 def isTemplatePage(request, pagename):
   708     """ Is this a template page?
   709 
   710     @param pagename: the page name
   711     @rtype: bool
   712     @return: true if page is a template page
   713     """
   714     return request.cfg.cache.page_template_regexact.search(pagename) is not None
   715 
   716 
   717 def isGroupPage(request, pagename):
   718     """ Is this a name of group page?
   719 
   720     @param pagename: the page name
   721     @rtype: bool
   722     @return: true if page is a form page
   723     """
   724     return request.cfg.cache.page_group_regexact.search(pagename) is not None
   725 
   726 
   727 def filterCategoryPages(request, pagelist):
   728     """ Return category pages in pagelist
   729 
   730     WARNING: DO NOT USE THIS TO FILTER THE FULL PAGE LIST! Use
   731     getPageList with a filter function.
   732 
   733     If you pass a list with a single pagename, either that is returned
   734     or an empty list, thus you can use this function like a `isCategoryPage`
   735     one.
   736 
   737     @param pagelist: a list of pages
   738     @rtype: list
   739     @return: only the category pages of pagelist
   740     """
   741     func = request.cfg.cache.page_category_regexact.search
   742     return [pn for pn in pagelist if func(pn)]
   743 
   744 
   745 def getLocalizedPage(request, pagename): # was: getSysPage
   746     """ Get a system page according to user settings and available translations.
   747 
   748     We include some special treatment for the case that <pagename> is the
   749     currently rendered page, as this is the case for some pages used very
   750     often, like FrontPage, RecentChanges etc. - in that case we reuse the
   751     already existing page object instead creating a new one.
   752 
   753     @param request: the request object
   754     @param pagename: the name of the page
   755     @rtype: Page object
   756     @return: the page object of that system page, using a translated page,
   757              if it exists
   758     """
   759     from MoinMoin.Page import Page
   760     i18n_name = request.getText(pagename)
   761     pageobj = None
   762     if i18n_name != pagename:
   763         if request.page and i18n_name == request.page.page_name:
   764             # do not create new object for current page
   765             i18n_page = request.page
   766             if i18n_page.exists():
   767                 pageobj = i18n_page
   768         else:
   769             i18n_page = Page(request, i18n_name)
   770             if i18n_page.exists():
   771                 pageobj = i18n_page
   772 
   773     # if we failed getting a translated version of <pagename>,
   774     # we fall back to english
   775     if not pageobj:
   776         if request.page and pagename == request.page.page_name:
   777             # do not create new object for current page
   778             pageobj = request.page
   779         else:
   780             pageobj = Page(request, pagename)
   781     return pageobj
   782 
   783 
   784 def getFrontPage(request):
   785     """ Convenience function to get localized front page
   786 
   787     @param request: current request
   788     @rtype: Page object
   789     @return localized page_front_page, if there is a translation
   790     """
   791     return getLocalizedPage(request, request.cfg.page_front_page)
   792 
   793 
   794 def getHomePage(request, username=None):
   795     """
   796     Get a user's homepage, or return None for anon users and
   797     those who have not created a homepage.
   798 
   799     DEPRECATED - try to use getInterwikiHomePage (see below)
   800 
   801     @param request: the request object
   802     @param username: the user's name
   803     @rtype: Page
   804     @return: user's homepage object - or None
   805     """
   806     from MoinMoin.Page import Page
   807     # default to current user
   808     if username is None and request.user.valid:
   809         username = request.user.name
   810 
   811     # known user?
   812     if username:
   813         # Return home page
   814         page = Page(request, username)
   815         if page.exists():
   816             return page
   817 
   818     return None
   819 
   820 
   821 def getInterwikiHomePage(request, username=None):
   822     """
   823     Get a user's homepage.
   824 
   825     cfg.user_homewiki influences behaviour of this:
   826     'Self' does mean we store user homepage in THIS wiki.
   827     When set to our own interwikiname, it behaves like with 'Self'.
   828 
   829     'SomeOtherWiki' means we store user homepages in another wiki.
   830 
   831     @param request: the request object
   832     @param username: the user's name
   833     @rtype: tuple (or None for anon users)
   834     @return: (wikiname, pagename)
   835     """
   836     # default to current user
   837     if username is None and request.user.valid:
   838         username = request.user.name
   839     if not username:
   840         return None # anon user
   841 
   842     homewiki = request.cfg.user_homewiki
   843     if homewiki == request.cfg.interwikiname:
   844         homewiki = u'Self'
   845 
   846     return homewiki, username
   847 
   848 
   849 def AbsPageName(context, pagename):
   850     """
   851     Return the absolute pagename for a (possibly) relative pagename.
   852 
   853     @param context: name of the page where "pagename" appears on
   854     @param pagename: the (possibly relative) page name
   855     @rtype: string
   856     @return: the absolute page name
   857     """
   858     if pagename.startswith(PARENT_PREFIX):
   859         while context and pagename.startswith(PARENT_PREFIX):
   860             context = '/'.join(context.split('/')[:-1])
   861             pagename = pagename[PARENT_PREFIX_LEN:]
   862         pagename = '/'.join(filter(None, [context, pagename, ]))
   863     elif pagename.startswith(CHILD_PREFIX):
   864         if context:
   865             pagename = context + '/' + pagename[CHILD_PREFIX_LEN:]
   866         else:
   867             pagename = pagename[CHILD_PREFIX_LEN:]
   868     return pagename
   869 
   870 def RelPageName(context, pagename):
   871     """
   872     Return the relative pagename for some context.
   873 
   874     @param context: name of the page where "pagename" appears on
   875     @param pagename: the absolute page name
   876     @rtype: string
   877     @return: the relative page name
   878     """
   879     if context == '':
   880         # special case, context is some "virtual root" page with name == ''
   881         # every page is a subpage of this virtual root
   882         return CHILD_PREFIX + pagename
   883     elif pagename.startswith(context + CHILD_PREFIX):
   884         # simple child
   885         return pagename[len(context):]
   886     else:
   887         # some kind of sister/aunt
   888         context_frags = context.split('/')   # A, B, C, D, E
   889         pagename_frags = pagename.split('/') # A, B, C, F
   890         # first throw away common parents:
   891         common = 0
   892         for cf, pf in zip(context_frags, pagename_frags):
   893             if cf == pf:
   894                 common += 1
   895             else:
   896                 break
   897         context_frags = context_frags[common:] # D, E
   898         pagename_frags = pagename_frags[common:] # F
   899         go_up = len(context_frags)
   900         return PARENT_PREFIX * go_up + '/'.join(pagename_frags)
   901 
   902 
   903 def pagelinkmarkup(pagename, text=None):
   904     """ return markup that can be used as link to page <pagename> """
   905     from MoinMoin.parser.text_moin_wiki import Parser
   906     if re.match(Parser.word_rule + "$", pagename, re.U|re.X) and \
   907             (text is None or text == pagename):
   908         return pagename
   909     else:
   910         if text is None or text == pagename:
   911             text = ''
   912         else:
   913             text = '|%s' % text
   914         return u'[[%s%s]]' % (pagename, text)
   915 
   916 #############################################################################
   917 ### mimetype support
   918 #############################################################################
   919 import mimetypes
   920 
   921 MIMETYPES_MORE = {
   922  # OpenOffice 2.x & other open document stuff
   923  '.odt': 'application/vnd.oasis.opendocument.text',
   924  '.ods': 'application/vnd.oasis.opendocument.spreadsheet',
   925  '.odp': 'application/vnd.oasis.opendocument.presentation',
   926  '.odg': 'application/vnd.oasis.opendocument.graphics',
   927  '.odc': 'application/vnd.oasis.opendocument.chart',
   928  '.odf': 'application/vnd.oasis.opendocument.formula',
   929  '.odb': 'application/vnd.oasis.opendocument.database',
   930  '.odi': 'application/vnd.oasis.opendocument.image',
   931  '.odm': 'application/vnd.oasis.opendocument.text-master',
   932  '.ott': 'application/vnd.oasis.opendocument.text-template',
   933  '.ots': 'application/vnd.oasis.opendocument.spreadsheet-template',
   934  '.otp': 'application/vnd.oasis.opendocument.presentation-template',
   935  '.otg': 'application/vnd.oasis.opendocument.graphics-template',
   936  # some systems (like Mac OS X) don't have some of these:
   937  '.patch': 'text/x-diff',
   938  '.diff': 'text/x-diff',
   939  '.py': 'text/x-python',
   940  '.cfg': 'text/plain',
   941  '.conf': 'text/plain',
   942  '.irc': 'text/plain',
   943  '.md5': 'text/plain',
   944  '.csv': 'text/csv',
   945  '.flv': 'video/x-flv',
   946  '.wmv': 'video/x-ms-wmv',
   947  '.swf': 'application/x-shockwave-flash',
   948 }
   949 [mimetypes.add_type(mimetype, ext, True) for ext, mimetype in MIMETYPES_MORE.items()]
   950 
   951 MIMETYPES_sanitize_mapping = {
   952     # this stuff is text, but got application/* for unknown reasons
   953     ('application', 'docbook+xml'): ('text', 'docbook'),
   954     ('application', 'x-latex'): ('text', 'latex'),
   955     ('application', 'x-tex'): ('text', 'tex'),
   956     ('application', 'javascript'): ('text', 'javascript'),
   957 }
   958 
   959 MIMETYPES_spoil_mapping = {} # inverse mapping of above
   960 for _key, _value in MIMETYPES_sanitize_mapping.items():
   961     MIMETYPES_spoil_mapping[_value] = _key
   962 
   963 
   964 class MimeType(object):
   965     """ represents a mimetype like text/plain """
   966 
   967     def __init__(self, mimestr=None, filename=None):
   968         self.major = self.minor = None # sanitized mime type and subtype
   969         self.params = {} # parameters like "charset" or others
   970         self.charset = None # this stays None until we know for sure!
   971         self.raw_mimestr = mimestr
   972 
   973         if mimestr:
   974             self.parse_mimetype(mimestr)
   975         elif filename:
   976             self.parse_filename(filename)
   977 
   978     def parse_filename(self, filename):
   979         mtype, encoding = mimetypes.guess_type(filename)
   980         if mtype is None:
   981             mtype = 'application/octet-stream'
   982         self.parse_mimetype(mtype)
   983 
   984     def parse_mimetype(self, mimestr):
   985         """ take a string like used in content-type and parse it into components,
   986             alternatively it also can process some abbreviated string like "wiki"
   987         """
   988         parameters = mimestr.split(";")
   989         parameters = [p.strip() for p in parameters]
   990         mimetype, parameters = parameters[0], parameters[1:]
   991         mimetype = mimetype.split('/')
   992         if len(mimetype) >= 2:
   993             major, minor = mimetype[:2] # we just ignore more than 2 parts
   994         else:
   995             major, minor = self.parse_format(mimetype[0])
   996         self.major = major.lower()
   997         self.minor = minor.lower()
   998         for param in parameters:
   999             key, value = param.split('=')
  1000             if value[0] == '"' and value[-1] == '"': # remove quotes
  1001                 value = value[1:-1]
  1002             self.params[key.lower()] = value
  1003         if 'charset' in self.params:
  1004             self.charset = self.params['charset'].lower()
  1005         self.sanitize()
  1006 
  1007     def parse_format(self, format):
  1008         """ maps from what we currently use on-page in a #format xxx processing
  1009             instruction to a sanitized mimetype major, minor tuple.
  1010             can also be user later for easier entry by the user, so he can just
  1011             type "wiki" instead of "text/moin-wiki".
  1012         """
  1013         format = format.lower()
  1014         if format in config.parser_text_mimetype:
  1015             mimetype = 'text', format
  1016         else:
  1017             mapping = {
  1018                 'wiki': ('text', 'moin-wiki'),
  1019                 'irc': ('text', 'irssi'),
  1020             }
  1021             try:
  1022                 mimetype = mapping[format]
  1023             except KeyError:
  1024                 mimetype = 'text', 'x-%s' % format
  1025         return mimetype
  1026 
  1027     def sanitize(self):
  1028         """ convert to some representation that makes sense - this is not necessarily
  1029             conformant to /etc/mime.types or IANA listing, but if something is
  1030             readable text, we will return some text/* mimetype, not application/*,
  1031             because we need text/plain as fallback and not application/octet-stream.
  1032         """
  1033         self.major, self.minor = MIMETYPES_sanitize_mapping.get((self.major, self.minor), (self.major, self.minor))
  1034 
  1035     def spoil(self):
  1036         """ this returns something conformant to /etc/mime.type or IANA as a string,
  1037             kind of inverse operation of sanitize(), but doesn't change self
  1038         """
  1039         major, minor = MIMETYPES_spoil_mapping.get((self.major, self.minor), (self.major, self.minor))
  1040         return self.content_type(major, minor)
  1041 
  1042     def content_type(self, major=None, minor=None, charset=None, params=None):
  1043         """ return a string suitable for Content-Type header
  1044         """
  1045         major = major or self.major
  1046         minor = minor or self.minor
  1047         params = params or self.params or {}
  1048         if major == 'text':
  1049             charset = charset or self.charset or params.get('charset', config.charset)
  1050             params['charset'] = charset
  1051         mimestr = "%s/%s" % (major, minor)
  1052         params = ['%s="%s"' % (key.lower(), value) for key, value in params.items()]
  1053         params.insert(0, mimestr)
  1054         return "; ".join(params)
  1055 
  1056     def mime_type(self):
  1057         """ return a string major/minor only, no params """
  1058         return "%s/%s" % (self.major, self.minor)
  1059 
  1060     def module_name(self):
  1061         """ convert this mimetype to a string useable as python module name,
  1062             we yield the exact module name first and then proceed to shorter
  1063             module names (useful for falling back to them, if the more special
  1064             module is not found) - e.g. first "text_python", next "text".
  1065             Finally, we yield "application_octet_stream" as the most general
  1066             mimetype we have.
  1067             Hint: the fallback handler module for text/* should be implemented
  1068                   in module "text" (not "text_plain")
  1069         """
  1070         mimetype = self.mime_type()
  1071         modname = mimetype.replace("/", "_").replace("-", "_").replace(".", "_")
  1072         fragments = modname.split('_')
  1073         for length in range(len(fragments), 1, -1):
  1074             yield "_".join(fragments[:length])
  1075         yield self.raw_mimestr
  1076         yield fragments[0]
  1077         yield "application_octet_stream"
  1078 
  1079 
  1080 #############################################################################
  1081 ### Plugins
  1082 #############################################################################
  1083 
  1084 class PluginError(Exception):
  1085     """ Base class for plugin errors """
  1086 
  1087 class PluginMissingError(PluginError):
  1088     """ Raised when a plugin is not found """
  1089 
  1090 class PluginAttributeError(PluginError):
  1091     """ Raised when plugin does not contain an attribtue """
  1092 
  1093 
  1094 def importPlugin(cfg, kind, name, function="execute"):
  1095     """ Import wiki or builtin plugin
  1096 
  1097     Returns <function> attr from a plugin module <name>.
  1098     If <function> attr is missing, raise PluginAttributeError.
  1099     If <function> is None, return the whole module object.
  1100 
  1101     If <name> plugin can not be imported, raise PluginMissingError.
  1102 
  1103     kind may be one of 'action', 'formatter', 'macro', 'parser' or any other
  1104     directory that exist in MoinMoin or data/plugin.
  1105 
  1106     Wiki plugins will always override builtin plugins. If you want
  1107     specific plugin, use either importWikiPlugin or importBuiltinPlugin
  1108     directly.
  1109 
  1110     @param cfg: wiki config instance
  1111     @param kind: what kind of module we want to import
  1112     @param name: the name of the module
  1113     @param function: the function name
  1114     @rtype: any object
  1115     @return: "function" of module "name" of kind "kind", or None
  1116     """
  1117     try:
  1118         return importWikiPlugin(cfg, kind, name, function)
  1119     except PluginMissingError:
  1120         return importBuiltinPlugin(kind, name, function)
  1121 
  1122 
  1123 def importWikiPlugin(cfg, kind, name, function="execute"):
  1124     """ Import plugin from the wiki data directory
  1125 
  1126     See importPlugin docstring.
  1127     """
  1128     plugins = wikiPlugins(kind, cfg)
  1129     modname = plugins.get(name, None)
  1130     if modname is None:
  1131         raise PluginMissingError()
  1132     moduleName = '%s.%s' % (modname, name)
  1133     return importNameFromPlugin(moduleName, function)
  1134 
  1135 
  1136 def importBuiltinPlugin(kind, name, function="execute"):
  1137     """ Import builtin plugin from MoinMoin package
  1138 
  1139     See importPlugin docstring.
  1140     """
  1141     if not name in builtinPlugins(kind):
  1142         raise PluginMissingError()
  1143     moduleName = 'MoinMoin.%s.%s' % (kind, name)
  1144     return importNameFromPlugin(moduleName, function)
  1145 
  1146 
  1147 def importNameFromPlugin(moduleName, name):
  1148     """ Return <name> attr from <moduleName> module,
  1149         raise PluginAttributeError if name does not exist.
  1150 
  1151         If name is None, return the <moduleName> module object.
  1152     """
  1153     if name is None:
  1154         fromlist = []
  1155     else:
  1156         fromlist = [name]
  1157     module = __import__(moduleName, globals(), {}, fromlist)
  1158     if fromlist:
  1159         # module has the obj for module <moduleName>
  1160         try:
  1161             return getattr(module, name)
  1162         except AttributeError:
  1163             raise PluginAttributeError
  1164     else:
  1165         # module now has the toplevel module of <moduleName> (see __import__ docs!)
  1166         components = moduleName.split('.')
  1167         for comp in components[1:]:
  1168             module = getattr(module, comp)
  1169         return module
  1170 
  1171 
  1172 def builtinPlugins(kind):
  1173     """ Gets a list of modules in MoinMoin.'kind'
  1174 
  1175     @param kind: what kind of modules we look for
  1176     @rtype: list
  1177     @return: module names
  1178     """
  1179     modulename = "MoinMoin." + kind
  1180     return pysupport.importName(modulename, "modules")
  1181 
  1182 
  1183 def wikiPlugins(kind, cfg):
  1184     """
  1185     Gets a dict containing the names of all plugins of @kind
  1186     as the key and the containing module name as the value.
  1187 
  1188     @param kind: what kind of modules we look for
  1189     @rtype: dict
  1190     @return: plugin name to containing module name mapping
  1191     """
  1192     # short-cut if we've loaded the dict already
  1193     # (or already failed to load it)
  1194     cache = cfg._site_plugin_lists
  1195     if kind in cache:
  1196         result = cache[kind]
  1197     else:
  1198         result = {}
  1199         for modname in cfg._plugin_modules:
  1200             try:
  1201                 module = pysupport.importName(modname, kind)
  1202                 packagepath = os.path.dirname(module.__file__)
  1203                 plugins = pysupport.getPluginModules(packagepath)
  1204                 for p in plugins:
  1205                     if not p in result:
  1206                         result[p] = '%s.%s' % (modname, kind)
  1207             except AttributeError:
  1208                 pass
  1209         cache[kind] = result
  1210     return result
  1211 
  1212 
  1213 def getPlugins(kind, cfg):
  1214     """ Gets a list of plugin names of kind
  1215 
  1216     @param kind: what kind of modules we look for
  1217     @rtype: list
  1218     @return: module names
  1219     """
  1220     # Copy names from builtin plugins - so we dont destroy the value
  1221     all_plugins = builtinPlugins(kind)[:]
  1222 
  1223     # Add extension plugins without duplicates
  1224     for plugin in wikiPlugins(kind, cfg):
  1225         if plugin not in all_plugins:
  1226             all_plugins.append(plugin)
  1227 
  1228     return all_plugins
  1229 
  1230 
  1231 def searchAndImportPlugin(cfg, type, name, what=None):
  1232     type2classname = {"parser": "Parser",
  1233                       "formatter": "Formatter",
  1234     }
  1235     if what is None:
  1236         what = type2classname[type]
  1237     mt = MimeType(name)
  1238     plugin = None
  1239     for module_name in mt.module_name():
  1240         try:
  1241             plugin = importPlugin(cfg, type, module_name, what)
  1242             break
  1243         except PluginMissingError:
  1244             pass
  1245     else:
  1246         raise PluginMissingError("Plugin not found!")
  1247     return plugin
  1248 
  1249 
  1250 #############################################################################
  1251 ### Parsers
  1252 #############################################################################
  1253 
  1254 def getParserForExtension(cfg, extension):
  1255     """
  1256     Returns the Parser class of the parser fit to handle a file
  1257     with the given extension. The extension should be in the same
  1258     format as os.path.splitext returns it (i.e. with the dot).
  1259     Returns None if no parser willing to handle is found.
  1260     The dict of extensions is cached in the config object.
  1261 
  1262     @param cfg: the Config instance for the wiki in question
  1263     @param extension: the filename extension including the dot
  1264     @rtype: class, None
  1265     @returns: the parser class or None
  1266     """
  1267     if not hasattr(cfg.cache, 'EXT_TO_PARSER'):
  1268         etp, etd = {}, None
  1269         for pname in getPlugins('parser', cfg):
  1270             try:
  1271                 Parser = importPlugin(cfg, 'parser', pname, 'Parser')
  1272             except PluginMissingError:
  1273                 continue
  1274             if hasattr(Parser, 'extensions'):
  1275                 exts = Parser.extensions
  1276                 if isinstance(exts, list):
  1277                     for ext in Parser.extensions:
  1278                         etp[ext] = Parser
  1279                 elif str(exts) == '*':
  1280                     etd = Parser
  1281         cfg.cache.EXT_TO_PARSER = etp
  1282         cfg.cache.EXT_TO_PARSER_DEFAULT = etd
  1283 
  1284     return cfg.cache.EXT_TO_PARSER.get(extension, cfg.cache.EXT_TO_PARSER_DEFAULT)
  1285 
  1286 
  1287 #############################################################################
  1288 ### Parameter parsing
  1289 #############################################################################
  1290 
  1291 class BracketError(Exception):
  1292     pass
  1293 
  1294 class BracketUnexpectedCloseError(BracketError):
  1295     def __init__(self, bracket):
  1296         self.bracket = bracket
  1297         BracketError.__init__(self, "Unexpected closing bracket %s" % bracket)
  1298 
  1299 class BracketMissingCloseError(BracketError):
  1300     def __init__(self, bracket):
  1301         self.bracket = bracket
  1302         BracketError.__init__(self, "Missing closing bracket %s" % bracket)
  1303 
  1304 class ParserPrefix:
  1305     """
  1306     Trivial container-class holding a single character for
  1307     the possible prefixes for parse_quoted_separated_ext
  1308     and implementing rich equal comparison.
  1309     """
  1310     def __init__(self, prefix):
  1311         self.prefix = prefix
  1312 
  1313     def __eq__(self, other):
  1314         return isinstance(other, ParserPrefix) and other.prefix == self.prefix
  1315 
  1316     def __repr__(self):
  1317         return '<ParserPrefix(%s)>' % self.prefix.encode('utf-8')
  1318 
  1319 def parse_quoted_separated_ext(args, separator=None, name_value_separator=None,
  1320                                brackets=None, seplimit=0, multikey=False,
  1321                                prefixes=None, quotes='"'):
  1322     """
  1323     Parses the given string according to the other parameters.
  1324 
  1325     Items can be quoted with any character from the quotes parameter
  1326     and each quote can be escaped by doubling it, the separator and
  1327     name_value_separator can both be quoted, when name_value_separator
  1328     is set then the name can also be quoted.
  1329 
  1330     Values that are not given are returned as None, while the
  1331     empty string as a value can be achieved by quoting it.
  1332 
  1333     If a name or value does not start with a quote, then the quote
  1334     looses its special meaning for that name or value, unless it
  1335     starts with one of the given prefixes (the parameter is unicode
  1336     containing all allowed prefixes.) The prefixes will be returned
  1337     as ParserPrefix() instances in the first element of the tuple
  1338     for that particular argument.
  1339 
  1340     If multiple separators follow each other, this is treated as
  1341     having None arguments inbetween, that is also true for when
  1342     space is used as separators (when separator is None), filter
  1343     them out afterwards.
  1344 
  1345     The function can also do bracketing, i.e. parse expressions
  1346     that contain things like
  1347         "(a (a b))" to ['(', 'a', ['(', 'a', 'b']],
  1348     in this case, as in this example, the returned list will
  1349     contain sub-lists and the brackets parameter must be a list
  1350     of opening and closing brackets, e.g.
  1351         brackets = ['()', '<>']
  1352     Each sub-list's first item is the opening bracket used for
  1353     grouping.
  1354     Nesting will be observed between the different types of
  1355     brackets given. If bracketing doesn't match, a BracketError
  1356     instance is raised with a 'bracket' property indicating the
  1357     type of missing or unexpected bracket, the instance will be
  1358     either of the class BracketMissingCloseError or of the class
  1359     BracketUnexpectedCloseError.
  1360 
  1361     If multikey is True (along with setting name_value_separator),
  1362     then the returned tuples for (key, value) pairs can also have
  1363     multiple keys, e.g.
  1364         "a=b=c" -> ('a', 'b', 'c')
  1365 
  1366     @param args: arguments to parse
  1367     @param separator: the argument separator, defaults to None, meaning any
  1368         space separates arguments
  1369     @param name_value_separator: separator for name=value, default '=',
  1370         name=value keywords not parsed if evaluates to False
  1371     @param brackets: a list of two-character strings giving
  1372         opening and closing brackets
  1373     @param seplimit: limits the number of parsed arguments
  1374     @param multikey: multiple keys allowed for a single value
  1375     @rtype: list
  1376     @returns: list of unicode strings and tuples containing
  1377         unicode strings, or lists containing the same for
  1378         bracketing support
  1379     """
  1380     idx = 0
  1381     assert name_value_separator is None or name_value_separator != separator
  1382     assert name_value_separator is None or len(name_value_separator) == 1
  1383     if not isinstance(args, unicode):
  1384         raise TypeError('args must be unicode')
  1385     max = len(args)
  1386     result = []         # result list
  1387     cur = [None]        # current item
  1388     quoted = None       # we're inside quotes, indicates quote character used
  1389     skipquote = 0       # next quote is a quoted quote
  1390     noquote = False     # no quotes expected because word didn't start with one
  1391     seplimit_reached = False # number of separators exhausted
  1392     separator_count = 0 # number of separators encountered
  1393     SPACE = [' ', '\t', ]
  1394     nextitemsep = [separator]   # used for skipping trailing space
  1395     SPACE = [' ', '\t', ]
  1396     if separator is None:
  1397         nextitemsep = SPACE[:]
  1398         separators = SPACE
  1399     else:
  1400         nextitemsep = [separator]   # used for skipping trailing space
  1401         separators = [separator]
  1402     if name_value_separator:
  1403         nextitemsep.append(name_value_separator)
  1404 
  1405     # bracketing support
  1406     opening = []
  1407     closing = []
  1408     bracketstack = []
  1409     matchingbracket = {}
  1410     if brackets:
  1411         for o, c in brackets:
  1412             assert not o in opening
  1413             opening.append(o)
  1414             assert not c in closing
  1415             closing.append(c)
  1416             matchingbracket[o] = c
  1417 
  1418     def additem(result, cur, separator_count, nextitemsep):
  1419         if len(cur) == 1:
  1420             result.extend(cur)
  1421         elif cur:
  1422             result.append(tuple(cur))
  1423         cur = [None]
  1424         noquote = False
  1425         separator_count += 1
  1426         seplimit_reached = False
  1427         if seplimit and separator_count >= seplimit:
  1428             seplimit_reached = True
  1429             nextitemsep = [n for n in nextitemsep if n in separators]
  1430 
  1431         return cur, noquote, separator_count, seplimit_reached, nextitemsep
  1432 
  1433     while idx < max:
  1434         char = args[idx]
  1435         next = None
  1436         if idx + 1 < max:
  1437             next = args[idx+1]
  1438         if skipquote:
  1439             skipquote -= 1
  1440         if not separator is None and not quoted and char in SPACE:
  1441             spaces = ''
  1442             # accumulate all space
  1443             while char in SPACE and idx < max - 1:
  1444                 spaces += char
  1445                 idx += 1
  1446                 char = args[idx]
  1447             # remove space if args end with it
  1448             if char in SPACE and idx == max - 1:
  1449                 break
  1450             # remove space at end of argument
  1451             if char in nextitemsep:
  1452                 continue
  1453             idx -= 1
  1454             if len(cur) and cur[-1]:
  1455                 cur[-1] = cur[-1] + spaces
  1456         elif not quoted and char == name_value_separator:
  1457             if multikey or len(cur) == 1:
  1458                 cur.append(None)
  1459             else:
  1460                 if not multikey:
  1461                     if cur[-1] is None:
  1462                         cur[-1] = ''
  1463                     cur[-1] += name_value_separator
  1464                 else:
  1465                     cur.append(None)
  1466             noquote = False
  1467         elif not quoted and not seplimit_reached and char in separators:
  1468             (cur, noquote, separator_count, seplimit_reached,
  1469              nextitemsep) = additem(result, cur, separator_count, nextitemsep)
  1470         elif not quoted and not noquote and char in quotes:
  1471             if len(cur) and cur[-1] is None:
  1472                 del cur[-1]
  1473             cur.append(u'')
  1474             quoted = char
  1475         elif char == quoted and not skipquote:
  1476             if next == quoted:
  1477                 skipquote = 2 # will be decremented right away
  1478             else:
  1479                 quoted = None
  1480         elif not quoted and char in opening:
  1481             while len(cur) and cur[-1] is None:
  1482                 del cur[-1]
  1483             (cur, noquote, separator_count, seplimit_reached,
  1484              nextitemsep) = additem(result, cur, separator_count, nextitemsep)
  1485             bracketstack.append((matchingbracket[char], result))
  1486             result = [char]
  1487         elif not quoted and char in closing:
  1488             while len(cur) and cur[-1] is None:
  1489                 del cur[-1]
  1490             (cur, noquote, separator_count, seplimit_reached,
  1491              nextitemsep) = additem(result, cur, separator_count, nextitemsep)
  1492             cur = []
  1493             if not bracketstack:
  1494                 raise BracketUnexpectedCloseError(char)
  1495             expected, oldresult = bracketstack[-1]
  1496             if not expected == char:
  1497                 raise BracketUnexpectedCloseError(char)
  1498             del bracketstack[-1]
  1499             oldresult.append(result)
  1500             result = oldresult
  1501         elif not quoted and prefixes and char in prefixes and cur == [None]:
  1502             cur = [ParserPrefix(char)]
  1503             cur.append(None)
  1504         else:
  1505             if len(cur):
  1506                 if cur[-1] is None:
  1507                     cur[-1] = char
  1508                 else:
  1509                     cur[-1] += char
  1510             else:
  1511                 cur.append(char)
  1512             noquote = True
  1513 
  1514         idx += 1
  1515 
  1516     if bracketstack:
  1517         raise BracketMissingCloseError(bracketstack[-1][0])
  1518 
  1519     if quoted:
  1520         if len(cur):
  1521             if cur[-1] is None:
  1522                 cur[-1] = quoted
  1523             else:
  1524                 cur[-1] = quoted + cur[-1]
  1525         else:
  1526             cur.append(quoted)
  1527 
  1528     additem(result, cur, separator_count, nextitemsep)
  1529 
  1530     return result
  1531 
  1532 def parse_quoted_separated(args, separator=',', name_value=True, seplimit=0):
  1533     result = []
  1534     positional = result
  1535     if name_value:
  1536         name_value_separator = '='
  1537         trailing = []
  1538         keywords = {}
  1539     else:
  1540         name_value_separator = None
  1541 
  1542     l = parse_quoted_separated_ext(args, separator=separator,
  1543                                    name_value_separator=name_value_separator,
  1544                                    seplimit=seplimit)
  1545     for item in l:
  1546         if isinstance(item, tuple):
  1547             key, value = item
  1548             if key is None:
  1549                 key = u''
  1550             keywords[key] = value
  1551             positional = trailing
  1552         else:
  1553             positional.append(item)
  1554 
  1555     if name_value:
  1556         return result, keywords, trailing
  1557     return result
  1558 
  1559 def get_bool(request, arg, name=None, default=None):
  1560     """
  1561     For use with values returned from parse_quoted_separated or given
  1562     as macro parameters, return a boolean from a unicode string.
  1563     Valid input is 'true'/'false', 'yes'/'no' and '1'/'0' or None for
  1564     the default value.
  1565 
  1566     @param request: A request instance
  1567     @param arg: The argument, may be None or a unicode string
  1568     @param name: Name of the argument, for error messages
  1569     @param default: default value if arg is None
  1570     @rtype: boolean or None
  1571     @returns: the boolean value of the string according to above rules
  1572               (or default value)
  1573     """
  1574     _ = request.getText
  1575     assert default is None or isinstance(default, bool)
  1576     if arg is None:
  1577         return default
  1578     elif not isinstance(arg, unicode):
  1579         raise TypeError('Argument must be None or unicode')
  1580     arg = arg.lower()
  1581     if arg in [u'0', u'false', u'no']:
  1582         return False
  1583     elif arg in [u'1', u'true', u'yes']:
  1584         return True
  1585     else:
  1586         if name:
  1587             raise ValueError(
  1588                 _('Argument "%s" must be a boolean value, not "%s"') % (
  1589                     name, arg))
  1590         else:
  1591             raise ValueError(
  1592                 _('Argument must be a boolean value, not "%s"') % arg)
  1593 
  1594 
  1595 def get_int(request, arg, name=None, default=None):
  1596     """
  1597     For use with values returned from parse_quoted_separated or given
  1598     as macro parameters, return an integer from a unicode string
  1599     containing the decimal representation of a number.
  1600     None is a valid input and yields the default value.
  1601 
  1602     @param request: A request instance
  1603     @param arg: The argument, may be None or a unicode string
  1604     @param name: Name of the argument, for error messages
  1605     @param default: default value if arg is None
  1606     @rtype: int or None
  1607     @returns: the integer value of the string (or default value)
  1608     """
  1609     _ = request.getText
  1610     assert default is None or isinstance(default, (int, long))
  1611     if arg is None:
  1612         return default
  1613     elif not isinstance(arg, unicode):
  1614         raise TypeError('Argument must be None or unicode')
  1615     try:
  1616         return int(arg)
  1617     except ValueError:
  1618         if name:
  1619             raise ValueError(
  1620                 _('Argument "%s" must be an integer value, not "%s"') % (
  1621                     name, arg))
  1622         else:
  1623             raise ValueError(
  1624                 _('Argument must be an integer value, not "%s"') % arg)
  1625 
  1626 
  1627 def get_float(request, arg, name=None, default=None):
  1628     """
  1629     For use with values returned from parse_quoted_separated or given
  1630     as macro parameters, return a float from a unicode string.
  1631     None is a valid input and yields the default value.
  1632 
  1633     @param request: A request instance
  1634     @param arg: The argument, may be None or a unicode string
  1635     @param name: Name of the argument, for error messages
  1636     @param default: default return value if arg is None
  1637     @rtype: float or None
  1638     @returns: the float value of the string (or default value)
  1639     """
  1640     _ = request.getText
  1641     assert default is None or isinstance(default, (int, long, float))
  1642     if arg is None:
  1643         return default
  1644     elif not isinstance(arg, unicode):
  1645         raise TypeError('Argument must be None or unicode')
  1646     try:
  1647         return float(arg)
  1648     except ValueError:
  1649         if name:
  1650             raise ValueError(
  1651                 _('Argument "%s" must be a floating point value, not "%s"') % (
  1652                     name, arg))
  1653         else:
  1654             raise ValueError(
  1655                 _('Argument must be a floating point value, not "%s"') % arg)
  1656 
  1657 
  1658 def get_complex(request, arg, name=None, default=None):
  1659     """
  1660     For use with values returned from parse_quoted_separated or given
  1661     as macro parameters, return a complex from a unicode string.
  1662     None is a valid input and yields the default value.
  1663 
  1664     @param request: A request instance
  1665     @param arg: The argument, may be None or a unicode string
  1666     @param name: Name of the argument, for error messages
  1667     @param default: default return value if arg is None
  1668     @rtype: complex or None
  1669     @returns: the complex value of the string (or default value)
  1670     """
  1671     _ = request.getText
  1672     assert default is None or isinstance(default, (int, long, float, complex))
  1673     if arg is None:
  1674         return default
  1675     elif not isinstance(arg, unicode):
  1676         raise TypeError('Argument must be None or unicode')
  1677     try:
  1678         # allow writing 'i' instead of 'j'
  1679         arg = arg.replace('i', 'j').replace('I', 'j')
  1680         return complex(arg)
  1681     except ValueError:
  1682         if name:
  1683             raise ValueError(
  1684                 _('Argument "%s" must be a complex value, not "%s"') % (
  1685                     name, arg))
  1686         else:
  1687             raise ValueError(
  1688                 _('Argument must be a complex value, not "%s"') % arg)
  1689 
  1690 
  1691 def get_unicode(request, arg, name=None, default=None):
  1692     """
  1693     For use with values returned from parse_quoted_separated or given
  1694     as macro parameters, return a unicode string from a unicode string.
  1695     None is a valid input and yields the default value.
  1696 
  1697     @param request: A request instance
  1698     @param arg: The argument, may be None or a unicode string
  1699     @param name: Name of the argument, for error messages
  1700     @param default: default return value if arg is None;
  1701     @rtype: unicode or None
  1702     @returns: the unicode string (or default value)
  1703     """
  1704     assert default is None or isinstance(default, unicode)
  1705     if arg is None:
  1706         return default
  1707     elif not isinstance(arg, unicode):
  1708         raise TypeError('Argument must be None or unicode')
  1709 
  1710     return arg
  1711 
  1712 
  1713 def get_choice(request, arg, name=None, choices=[None]):
  1714     """
  1715     For use with values returned from parse_quoted_separated or given
  1716     as macro parameters, return a unicode string that must be in the
  1717     choices given. None is a valid input and yields first of the valid
  1718     choices.
  1719 
  1720     @param request: A request instance
  1721     @param arg: The argument, may be None or a unicode string
  1722     @param name: Name of the argument, for error messages
  1723     @param choices: the possible choices
  1724     @rtype: unicode or None
  1725     @returns: the unicode string (or default value)
  1726     """
  1727     assert isinstance(choices, (tuple, list))
  1728     if arg is None:
  1729         return choices[0]
  1730     elif not isinstance(arg, unicode):
  1731         raise TypeError('Argument must be None or unicode')
  1732     elif not arg in choices:
  1733         _ = request.getText
  1734         if name:
  1735             raise ValueError(
  1736                 _('Argument "%s" must be one of "%s", not "%s"') % (
  1737                     name, '", "'.join(choices), arg))
  1738         else:
  1739             raise ValueError(
  1740                 _('Argument must be one of "%s", not "%s"') % (
  1741                     '", "'.join(choices), arg))
  1742 
  1743     return arg
  1744 
  1745 
  1746 class IEFArgument:
  1747     """
  1748     Base class for new argument parsers for
  1749     invoke_extension_function.
  1750     """
  1751     def __init__(self):
  1752         pass
  1753 
  1754     def parse_argument(self, s):
  1755         """
  1756         Parse the argument given in s (a string) and return
  1757         the argument for the extension function.
  1758         """
  1759         raise NotImplementedError
  1760 
  1761     def get_default(self):
  1762         """
  1763         Return the default for this argument.
  1764         """
  1765         raise NotImplementedError
  1766 
  1767 
  1768 class UnitArgument(IEFArgument):
  1769     """
  1770     Argument class for invoke_extension_function that forces
  1771     having any of the specified units given for a value.
  1772 
  1773     Note that the default unit is "mm".
  1774 
  1775     Use, for example, "UnitArgument('7mm', float, ['%', 'mm'])".
  1776 
  1777     If the defaultunit parameter is given, any argument that
  1778     can be converted into the given argtype is assumed to have
  1779     the default unit. NOTE: This doesn't work with a choice
  1780     (tuple or list) argtype.
  1781     """
  1782     def __init__(self, default, argtype, units=['mm'], defaultunit=None):
  1783         """
  1784         Initialise a UnitArgument giving the default,
  1785         argument type and the permitted units.
  1786         """
  1787         IEFArgument.__init__(self)
  1788         self._units = list(units)
  1789         self._units.sort(lambda x, y: len(y) - len(x))
  1790         self._type = argtype
  1791         self._defaultunit = defaultunit
  1792         assert defaultunit is None or defaultunit in units
  1793         if default is not None:
  1794             self._default = self.parse_argument(default)
  1795         else:
  1796             self._default = None
  1797 
  1798     def parse_argument(self, s):
  1799         for unit in self._units:
  1800             if s.endswith(unit):
  1801                 ret = (self._type(s[:len(s) - len(unit)]), unit)
  1802                 return ret
  1803         if self._defaultunit is not None:
  1804             try:
  1805                 return (self._type(s), self._defaultunit)
  1806             except ValueError:
  1807                 pass
  1808         units = ', '.join(self._units)
  1809         ## XXX: how can we translate this?
  1810         raise ValueError("Invalid unit in value %s (allowed units: %s)" % (s, units))
  1811 
  1812     def get_default(self):
  1813         return self._default
  1814 
  1815 
  1816 class required_arg:
  1817     """
  1818     Wrap a type in this class and give it as default argument
  1819     for a function passed to invoke_extension_function() in
  1820     order to get generic checking that the argument is given.
  1821     """
  1822     def __init__(self, argtype):
  1823         """
  1824         Initialise a required_arg
  1825         @param argtype: the type the argument should have
  1826         """
  1827         if not (argtype in (bool, int, long, float, complex, unicode) or
  1828                 isinstance(argtype, (IEFArgument, tuple, list))):
  1829             raise TypeError("argtype must be a valid type")
  1830         self.argtype = argtype
  1831 
  1832 
  1833 def invoke_extension_function(request, function, args, fixed_args=[]):
  1834     """
  1835     Parses arguments for an extension call and calls the extension
  1836     function with the arguments.
  1837 
  1838     If the macro function has a default value that is a bool,
  1839     int, long, float or unicode object, then the given value
  1840     is converted to the type of that default value before passing
  1841     it to the macro function. That way, macros need not call the
  1842     wikiutil.get_* functions for any arguments that have a default.
  1843 
  1844     @param request: the request object
  1845     @param function: the function to invoke
  1846     @param args: unicode string with arguments (or evaluating to False)
  1847     @param fixed_args: fixed arguments to pass as the first arguments
  1848     @returns: the return value from the function called
  1849     """
  1850 
  1851     def _convert_arg(request, value, default, name=None):
  1852         """
  1853         Using the get_* functions, convert argument to the type of the default
  1854         if that is any of bool, int, long, float or unicode; if the default
  1855         is the type itself then convert to that type (keeps None) or if the
  1856         default is a list require one of the list items.
  1857 
  1858         In other cases return the value itself.
  1859         """
  1860         # if extending this, extend required_arg as well!
  1861         if isinstance(default, bool):
  1862             return get_bool(request, value, name, default)
  1863         elif isinstance(default, (int, long)):
  1864             return get_int(request, value, name, default)
  1865         elif isinstance(default, float):
  1866             return get_float(request, value, name, default)
  1867         elif isinstance(default, complex):
  1868             return get_complex(request, value, name, default)
  1869         elif isinstance(default, unicode):
  1870             return get_unicode(request, value, name, default)
  1871         elif isinstance(default, (tuple, list)):
  1872             return get_choice(request, value, name, default)
  1873         elif default is bool:
  1874             return get_bool(request, value, name)
  1875         elif default is int or default is long:
  1876             return get_int(request, value, name)
  1877         elif default is float:
  1878             return get_float(request, value, name)
  1879         elif default is complex:
  1880             return get_complex(request, value, name)
  1881         elif isinstance(default, IEFArgument):
  1882             # defaults handled later
  1883             if value is None:
  1884                 return None
  1885             return default.parse_argument(value)
  1886         elif isinstance(default, required_arg):
  1887             if isinstance(default.argtype, (tuple, list)):
  1888                 # treat choice specially and return None if no choice
  1889                 # is given in the value
  1890                 choices = [None] + list(default.argtype)
  1891                 return get_choice(request, value, name, choices)
  1892             else:
  1893                 return _convert_arg(request, value, default.argtype, name)
  1894         return value
  1895 
  1896     assert isinstance(fixed_args, (list, tuple))
  1897 
  1898     _ = request.getText
  1899 
  1900     kwargs = {}
  1901     kwargs_to_pass = {}
  1902     trailing_args = []
  1903 
  1904     if args:
  1905         assert isinstance(args, unicode)
  1906 
  1907         positional, keyword, trailing = parse_quoted_separated(args)
  1908 
  1909         for kw in keyword:
  1910             try:
  1911                 kwargs[str(kw)] = keyword[kw]
  1912             except UnicodeEncodeError:
  1913                 kwargs_to_pass[kw] = keyword[kw]
  1914 
  1915         trailing_args.extend(trailing)
  1916 
  1917     else:
  1918         positional = []
  1919 
  1920     if isfunction(function) or ismethod(function):
  1921         argnames, varargs, varkw, defaultlist = getargspec(function)
  1922     elif isclass(function):
  1923         (argnames, varargs,
  1924          varkw, defaultlist) = getargspec(function.__init__.im_func)
  1925     else:
  1926         raise TypeError('function must be a function, method or class')
  1927 
  1928     # self is implicit!
  1929     if ismethod(function) or isclass(function):
  1930         argnames = argnames[1:]
  1931 
  1932     fixed_argc = len(fixed_args)
  1933     argnames = argnames[fixed_argc:]
  1934     argc = len(argnames)
  1935     if not defaultlist:
  1936         defaultlist = []
  1937 
  1938     # if the fixed parameters have defaults too...
  1939     if argc < len(defaultlist):
  1940         defaultlist = defaultlist[fixed_argc:]
  1941     defstart = argc - len(defaultlist)
  1942 
  1943     defaults = {}
  1944     # reverse to be able to pop() things off
  1945     positional.reverse()
  1946     allow_kwargs = False
  1947     allow_trailing = False
  1948     # convert all arguments to keyword arguments,
  1949     # fill all arguments that weren't given with None
  1950     for idx in range(argc):
  1951         argname = argnames[idx]
  1952         if argname == '_kwargs':
  1953             allow_kwargs = True
  1954             continue
  1955         if argname == '_trailing_args':
  1956             allow_trailing = True
  1957             continue
  1958         if positional:
  1959             kwargs[argname] = positional.pop()
  1960         if not argname in kwargs:
  1961             kwargs[argname] = None
  1962         if idx >= defstart:
  1963             defaults[argname] = defaultlist[idx - defstart]
  1964 
  1965     if positional:
  1966         if not allow_trailing:
  1967             raise ValueError(_('Too many arguments'))
  1968         trailing_args.extend(positional)
  1969 
  1970     if trailing_args:
  1971         if not allow_trailing:
  1972             raise ValueError(_('Cannot have arguments without name following'
  1973                                ' named arguments'))
  1974         kwargs['_trailing_args'] = trailing_args
  1975 
  1976     # type-convert all keyword arguments to the type
  1977     # that the default value indicates
  1978     for argname in kwargs.keys()[:]:
  1979         if argname in defaults:
  1980             # the value of 'argname' from kwargs will be put into the
  1981             # macro's 'argname' argument, so convert that giving the
  1982             # name to the converter so the user is told which argument
  1983             # went wrong (if it does)
  1984             kwargs[argname] = _convert_arg(request, kwargs[argname],
  1985                                            defaults[argname], argname)
  1986             if kwargs[argname] is None:
  1987                 if isinstance(defaults[argname], required_arg):
  1988                     raise ValueError(_('Argument "%s" is required') % argname)
  1989                 if isinstance(defaults[argname], IEFArgument):
  1990                     kwargs[argname] = defaults[argname].get_default()
  1991 
  1992         if not argname in argnames:
  1993             # move argname into _kwargs parameter
  1994             kwargs_to_pass[argname] = kwargs[argname]
  1995             del kwargs[argname]
  1996 
  1997     if kwargs_to_pass:
  1998         kwargs['_kwargs'] = kwargs_to_pass
  1999         if not allow_kwargs:
  2000             raise ValueError(_(u'No argument named "%s"') % (
  2001                 kwargs_to_pass.keys()[0]))
  2002 
  2003     return function(*fixed_args, **kwargs)
  2004 
  2005 
  2006 def parseAttributes(request, attrstring, endtoken=None, extension=None):
  2007     """
  2008     Parse a list of attributes and return a dict plus a possible
  2009     error message.
  2010     If extension is passed, it has to be a callable that returns
  2011     a tuple (found_flag, msg). found_flag is whether it did find and process
  2012     something, msg is '' when all was OK or any other string to return an error
  2013     message.
  2014 
  2015     @param request: the request object
  2016     @param attrstring: string containing the attributes to be parsed
  2017     @param endtoken: token terminating parsing
  2018     @param extension: extension function -
  2019                       gets called with the current token, the parser and the dict
  2020     @rtype: dict, msg
  2021     @return: a dict plus a possible error message
  2022     """
  2023     import shlex, StringIO
  2024 
  2025     _ = request.getText
  2026 
  2027     parser = shlex.shlex(StringIO.StringIO(attrstring))
  2028     parser.commenters = ''
  2029     msg = None
  2030     attrs = {}
  2031 
  2032     while not msg:
  2033         try:
  2034             key = parser.get_token()
  2035         except ValueError, err:
  2036             msg = str(err)
  2037             break
  2038         if not key:
  2039             break
  2040         if endtoken and key == endtoken:
  2041             break
  2042 
  2043         # call extension function with the current token, the parser, and the dict
  2044         if extension:
  2045             found_flag, msg = extension(key, parser, attrs)
  2046             #logging.debug("%r = extension(%r, parser, %r)" % (msg, key, attrs))
  2047             if found_flag:
  2048                 continue
  2049             elif msg:
  2050                 break
  2051             #else (we found nothing, but also didn't have an error msg) we just continue below:
  2052 
  2053         try:
  2054             eq = parser.get_token()
  2055         except ValueError, err:
  2056             msg = str(err)
  2057             break
  2058         if eq != "=":
  2059             msg = _('Expected "=" to follow "%(token)s"') % {'token': key}
  2060             break
  2061 
  2062         try:
  2063             val = parser.get_token()
  2064         except ValueError, err:
  2065             msg = str(err)
  2066             break
  2067         if not val:
  2068             msg = _('Expected a value for key "%(token)s"') % {'token': key}
  2069             break
  2070 
  2071         key = escape(key) # make sure nobody cheats
  2072 
  2073         # safely escape and quote value
  2074         if val[0] in ["'", '"']:
  2075             val = escape(val)
  2076         else:
  2077             val = '"%s"' % escape(val, 1)
  2078 
  2079         attrs[key.lower()] = val
  2080 
  2081     return attrs, msg or ''
  2082 
  2083 
  2084 class ParameterParser:
  2085     """ MoinMoin macro parameter parser
  2086 
  2087         Parses a given parameter string, separates the individual parameters
  2088         and detects their type.
  2089 
  2090         Possible parameter types are:
  2091 
  2092         Name      | short  | example
  2093         ----------------------------
  2094          Integer  | i      | -374
  2095          Float    | f      | 234.234 23.345E-23
  2096          String   | s      | 'Stri\'ng'
  2097          Boolean  | b      | 0 1 True false
  2098          Name     |        | case_sensitive | converted to string
  2099 
  2100         So say you want to parse three things, name, age and if the
  2101         person is male or not:
  2102 
  2103         The pattern will be: %(name)s%(age)i%(male)b
  2104 
  2105         As a result, the returned dict will put the first value into
  2106         male, second into age etc. If some argument is missing, it will
  2107         get None as its value. This also means that all the identifiers
  2108         in the pattern will exist in the dict, they will just have the
  2109         value None if they were not specified by the caller.
  2110 
  2111         So if we call it with the parameters as follows:
  2112             ("John Smith", 18)
  2113         this will result in the following dict:
  2114             {"name": "John Smith", "age": 18, "male": None}
  2115 
  2116         Another way of calling would be:
  2117             ("John Smith", male=True)
  2118         this will result in the following dict:
  2119             {"name": "John Smith", "age": None, "male": True}
  2120     """
  2121 
  2122     def __init__(self, pattern):
  2123         # parameter_re = "([^\"',]*(\"[^\"]*\"|'[^']*')?[^\"',]*)[,)]"
  2124         name = "(?P<%s>[a-zA-Z_][a-zA-Z0-9_]*)"
  2125         int_re = r"(?P<int>-?\d+)"
  2126         bool_re = r"(?P<bool>(([10])|([Tt]rue)|([Ff]alse)))"
  2127         float_re = r"(?P<float>-?\d+\.\d+([eE][+-]?\d+)?)"
  2128         string_re = (r"(?P<string>('([^']|(\'))*?')|" +
  2129                                 r'("([^"]|(\"))*?"))')
  2130         name_re = name % "name"
  2131         name_param_re = name % "name_param"
  2132 
  2133         param_re = r"\s*(\s*%s\s*=\s*)?(%s|%s|%s|%s|%s)\s*(,|$)" % (
  2134                    name_re, float_re, int_re, bool_re, string_re, name_param_re)
  2135         self.param_re = re.compile(param_re, re.U)
  2136         self._parse_pattern(pattern)
  2137 
  2138     def _parse_pattern(self, pattern):
  2139         param_re = r"(%(?P<name>\(.*?\))?(?P<type>[ibfs]{1,3}))|\|"
  2140         i = 0
  2141         # TODO: Optionals aren't checked.
  2142         self.optional = []
  2143         named = False
  2144         self.param_list = []
  2145         self.param_dict = {}
  2146 
  2147         for match in re.finditer(param_re, pattern):
  2148             if match.group() == "|":
  2149                 self.optional.append(i)
  2150                 continue
  2151             self.param_list.append(match.group('type'))
  2152             if match.group('name'):
  2153                 named = True
  2154                 self.param_dict[match.group('name')[1:-1]] = i
  2155             elif named:
  2156                 raise ValueError("Named parameter expected")
  2157             i += 1
  2158 
  2159     def __str__(self):
  2160         return "%s, %s, optional:%s" % (self.param_list, self.param_dict,
  2161                                         self.optional)
  2162 
  2163     def parse_parameters(self, params):
  2164         # Default list/dict entries to None
  2165         parameter_list = [None] * len(self.param_list)
  2166         parameter_dict = dict([(key, None) for key in self.param_dict])
  2167         check_list = [0] * len(self.param_list)
  2168 
  2169         i = 0
  2170         start = 0
  2171         fixed_count = 0
  2172         named = False
  2173 
  2174         while start < len(params):
  2175             match = re.match(self.param_re, params[start:])
  2176             if not match:
  2177                 raise ValueError("malformed parameters")
  2178             start += match.end()
  2179             if match.group("int"):
  2180                 pvalue = int(match.group("int"))
  2181                 ptype = 'i'
  2182             elif match.group("bool"):
  2183                 pvalue = (match.group("bool") == "1") or (match.group("bool") == "True") or (match.group("bool") == "true")
  2184                 ptype = 'b'
  2185             elif match.group("float"):
  2186                 pvalue = float(match.group("float"))
  2187                 ptype = 'f'
  2188             elif match.group("string"):
  2189                 pvalue = match.group("string")[1:-1]
  2190                 ptype = 's'
  2191             elif match.group("name_param"):
  2192                 pvalue = match.group("name_param")
  2193                 ptype = 'n'
  2194             else:
  2195                 raise ValueError("Parameter parser code does not fit param_re regex")
  2196 
  2197             name = match.group("name")
  2198             if name:
  2199                 if name not in self.param_dict:
  2200                     # TODO we should think on inheritance of parameters
  2201                     raise ValueError("unknown parameter name '%s'" % name)
  2202                 nr = self.param_dict[name]
  2203                 if check_list[nr]:
  2204                     raise ValueError("parameter '%s' specified twice" % name)
  2205                 else:
  2206                     check_list[nr] = 1
  2207                 pvalue = self._check_type(pvalue, ptype, self.param_list[nr])
  2208                 parameter_dict[name] = pvalue
  2209                 parameter_list[nr] = pvalue
  2210                 named = True
  2211             elif named:
  2212                 raise ValueError("only named parameters allowed after first named parameter")
  2213             else:
  2214                 nr = i
  2215                 if nr not in self.param_dict.values():
  2216                     fixed_count = nr + 1
  2217                 parameter_list[nr] = self._check_type(pvalue, ptype, self.param_list[nr])
  2218 
  2219             # Let's populate and map our dictionary to what's been found
  2220             for name in self.param_dict:
  2221                 tmp = self.param_dict[name]
  2222                 parameter_dict[name] = parameter_list[tmp]
  2223 
  2224             i += 1
  2225 
  2226         for i in range(fixed_count):
  2227             parameter_dict[i] = parameter_list[i]
  2228 
  2229         return fixed_count, parameter_dict
  2230 
  2231     def _check_type(self, pvalue, ptype, format):
  2232         if ptype == 'n' and 's' in format: # n as s
  2233             return pvalue
  2234 
  2235         if ptype in format:
  2236             return pvalue # x -> x
  2237 
  2238         if ptype == 'i':
  2239             if 'f' in format:
  2240                 return float(pvalue) # i -> f
  2241             elif 'b' in format:
  2242                 return pvalue != 0 # i -> b
  2243         elif ptype == 's':
  2244             if 'b' in format:
  2245                 if pvalue.lower() == 'false':
  2246                     return False # s-> b
  2247                 elif pvalue.lower() == 'true':
  2248                     return True # s-> b
  2249                 else:
  2250                     raise ValueError('%r does not match format %r' % (pvalue, format))
  2251 
  2252         if 's' in format: # * -> s
  2253             return str(pvalue)
  2254 
  2255         raise ValueError('%r does not match format %r' % (pvalue, format))
  2256 
  2257 
  2258 #############################################################################
  2259 ### Misc
  2260 #############################################################################
  2261 def taintfilename(basename):
  2262     """
  2263     Make a filename that is supposed to be a plain name secure, i.e.
  2264     remove any possible path components that compromise our system.
  2265 
  2266     @param basename: (possibly unsafe) filename
  2267     @rtype: string
  2268     @return: (safer) filename
  2269     """
  2270     for x in (os.pardir, ':', '/', '\\', '<', '>'):
  2271         basename = basename.replace(x, '_')
  2272 
  2273     return basename
  2274 
  2275 
  2276 def mapURL(request, url):
  2277     """
  2278     Map URLs according to 'cfg.url_mappings'.
  2279 
  2280     @param url: a URL
  2281     @rtype: string
  2282     @return: mapped URL
  2283     """
  2284     # check whether we have to map URLs
  2285     if request.cfg.url_mappings:
  2286         # check URL for the configured prefixes
  2287         for prefix in request.cfg.url_mappings:
  2288             if url.startswith(prefix):
  2289                 # substitute prefix with replacement value
  2290                 return request.cfg.url_mappings[prefix] + url[len(prefix):]
  2291 
  2292     # return unchanged url
  2293     return url
  2294 
  2295 
  2296 def getUnicodeIndexGroup(name):
  2297     """
  2298     Return a group letter for `name`, which must be a unicode string.
  2299     Currently supported: Hangul Syllables (U+AC00 - U+D7AF)
  2300 
  2301     @param name: a string
  2302     @rtype: string
  2303     @return: group letter or None
  2304     """
  2305     c = name[0]
  2306     if u'\uAC00' <= c <= u'\uD7AF': # Hangul Syllables
  2307         return unichr(0xac00 + (int(ord(c) - 0xac00) / 588) * 588)
  2308     else:
  2309         return c.upper() # we put lower and upper case words into the same index group
  2310 
  2311 
  2312 def isStrictWikiname(name, word_re=re.compile(ur"^(?:[%(u)s][%(l)s]+){2,}$" % {'u': config.chars_upper, 'l': config.chars_lower})):
  2313     """
  2314     Check whether this is NOT an extended name.
  2315 
  2316     @param name: the wikiname in question
  2317     @rtype: bool
  2318     @return: true if name matches the word_re
  2319     """
  2320     return word_re.match(name)
  2321 
  2322 
  2323 def is_URL(arg, schemas=config.url_schemas):
  2324     """ Return True if arg is a URL (with a schema given in the schemas list).
  2325 
  2326         Note: there are not that many requirements for generic URLs, basically
  2327         the only mandatory requirement is the ':' between schema and rest.
  2328         Schema itself could be anything, also the rest (but we only support some
  2329         schemas, as given in config.url_schemas, so it is a bit less ambiguous).
  2330     """
  2331     if ':' not in arg:
  2332         return False
  2333     for schema in schemas:
  2334         if arg.startswith(schema + ':'):
  2335             return True
  2336     return False
  2337 
  2338 
  2339 def isPicture(url):
  2340     """
  2341     Is this a picture's url?
  2342 
  2343     @param url: the url in question
  2344     @rtype: bool
  2345     @return: true if url points to a picture
  2346     """
  2347     extpos = url.rfind(".") + 1
  2348     return extpos > 1 and url[extpos:].lower() in config.browser_supported_images
  2349 
  2350 
  2351 def link_tag(request, params, text=None, formatter=None, on=None, **kw):
  2352     """ Create a link.
  2353 
  2354     TODO: cleanup css_class
  2355 
  2356     @param request: the request object
  2357     @param params: parameter string appended to the URL after the scriptname/
  2358     @param text: text / inner part of the <a>...</a> link - does NOT get
  2359                  escaped, so you can give HTML here and it will be used verbatim
  2360     @param formatter: the formatter object to use
  2361     @param on: opening/closing tag only
  2362     @keyword attrs: additional attrs (HTMLified string) (removed in 1.5.3)
  2363     @rtype: string
  2364     @return: formatted link tag
  2365     """
  2366     if formatter is None:
  2367         formatter = request.html_formatter
  2368     if 'css_class' in kw:
  2369         css_class = kw['css_class']
  2370         del kw['css_class'] # one time is enough
  2371     else:
  2372         css_class = None
  2373     id = kw.get('id', None)
  2374     name = kw.get('name', None)
  2375     if text is None:
  2376         text = params # default
  2377     if formatter:
  2378         url = "%s/%s" % (request.getScriptname(), params)
  2379         # formatter.url will escape the url part
  2380         if on is not None:
  2381             tag = formatter.url(on, url, css_class, **kw)
  2382         else:
  2383             tag = (formatter.url(1, url, css_class, **kw) +
  2384                 formatter.rawHTML(text) +
  2385                 formatter.url(0))
  2386     else: # this shouldn't be used any more:
  2387         if on is not None and not on:
  2388             tag = '</a>'
  2389         else:
  2390             attrs = ''
  2391             if css_class:
  2392                 attrs += ' class="%s"' % css_class
  2393             if id:
  2394                 attrs += ' id="%s"' % id
  2395             if name:
  2396                 attrs += ' name="%s"' % name
  2397             tag = '<a%s href="%s/%s">' % (attrs, request.getScriptname(), params)
  2398             if not on:
  2399                 tag = "%s%s</a>" % (tag, text)
  2400         logging.warning("wikiutil.link_tag called without formatter and without request.html_formatter. tag=%r" % (tag, ))
  2401     return tag
  2402 
  2403 def containsConflictMarker(text):
  2404     """ Returns true if there is a conflict marker in the text. """
  2405     return "/!\\ '''Edit conflict" in text
  2406 
  2407 def pagediff(request, pagename1, rev1, pagename2, rev2, **kw):
  2408     """
  2409     Calculate the "diff" between two page contents.
  2410 
  2411     @param pagename1: name of first page
  2412     @param rev1: revision of first page
  2413     @param pagename2: name of second page
  2414     @param rev2: revision of second page
  2415     @keyword ignorews: if 1: ignore pure-whitespace changes.
  2416     @rtype: list
  2417     @return: lines of diff output
  2418     """
  2419     from MoinMoin.Page import Page
  2420     from MoinMoin.util import diff_text
  2421     lines1 = Page(request, pagename1, rev=rev1).getlines()
  2422     lines2 = Page(request, pagename2, rev=rev2).getlines()
  2423 
  2424     lines = diff_text.diff(lines1, lines2, **kw)
  2425     return lines
  2426 
  2427 def anchor_name_from_text(text):
  2428     '''
  2429     Generate an anchor name from the given text.
  2430     This function generates valid HTML IDs matching: [A-Za-z][A-Za-z0-9:_.-]*
  2431     Note: this transformation has a special feature: when you feed it with a
  2432           valid ID/name, it will return it without modification (identity
  2433           transformation).
  2434     '''
  2435     quoted = urllib.quote_plus(text.encode('utf-7'), safe=':')
  2436     res = quoted.replace('%', '.').replace('+', '_')
  2437     if not res[:1].isalpha():
  2438         return 'A%s' % res
  2439     return res
  2440 
  2441 def split_anchor(pagename):
  2442     """
  2443     Split a pagename that (optionally) has an anchor into the real pagename
  2444     and the anchor part. If there is no anchor, it returns an empty string
  2445     for the anchor.
  2446 
  2447     Note: if pagename contains a # (as part of the pagename, not as anchor),
  2448           you can use a trick to make it work nevertheless: just append a
  2449           # at the end:
  2450           "C##" returns ("C#", "")
  2451           "Problem #1#" returns ("Problem #1", "")
  2452 
  2453     TODO: We shouldn't deal with composite pagename#anchor strings, but keep
  2454           it separate.
  2455           Current approach: [[pagename#anchor|label|attr=val,&qarg=qval]]
  2456           Future approach:  [[pagename|label|attr=val,&qarg=qval,#anchor]]
  2457           The future approach will avoid problems when there is a # in the
  2458           pagename part (and no anchor). Also, we need to append #anchor
  2459           at the END of the generated URL (AFTER the query string).
  2460     """
  2461     parts = rsplit(pagename, '#', 1)
  2462     if len(parts) == 2:
  2463         return parts
  2464     else:
  2465         return pagename, ""
  2466 
  2467 ########################################################################
  2468 ### Tickets - usually used in forms to make sure that form submissions
  2469 ### are in response to a form the same user got from moin for the same
  2470 ### action and same page.
  2471 ########################################################################
  2472 
  2473 def createTicket(request, tm=None, action=None, pagename=None):
  2474     """ Create a ticket using a configured secret
  2475 
  2476         @param tm: unix timestamp (optional, uses current time if not given)
  2477         @param action: action name (optional, uses current action if not given)
  2478                        Note: if you create a ticket for a form that calls another
  2479                              action than the current one, you MUST specify the
  2480                              action you call when posting the form.
  2481         @param pagename: page name (optional, uses current page name if not given)
  2482                        Note: if you create a ticket for a form that posts to another
  2483                              page than the current one, you MUST specify the
  2484                              page name you use when posting the form.
  2485     """
  2486 
  2487     from MoinMoin.support.python_compatibility import hmac_new
  2488     if tm is None:
  2489         # for age-check of ticket
  2490         tm = "%010x" % time.time()
  2491 
  2492     # make the ticket very specific:
  2493     if pagename is None:
  2494         try:
  2495             pagename = request.page.page_name
  2496         except:
  2497             pagename = ''
  2498 
  2499     if action is None:
  2500         action = request.action
  2501 
  2502     if request.session and not request.session.is_new:
  2503         sid = request.session.name
  2504     else:
  2505         sid = ''
  2506 
  2507     if request.user.valid:
  2508         uid = request.user.id
  2509     else:
  2510         uid = ''
  2511 
  2512     hmac_data = []
  2513     for value in [tm, pagename, action, sid, uid, ]:
  2514         if isinstance(value, unicode):
  2515             value = value.encode('utf-8')
  2516         hmac_data.append(value)
  2517 
  2518     hmac = hmac_new(request.cfg.secrets['wikiutil/tickets'],
  2519                     ''.join(hmac_data))
  2520     return "%s.%s" % (tm, hmac.hexdigest())
  2521 
  2522 
  2523 
  2524 def checkTicket(request, ticket):
  2525     """Check validity of a previously created ticket"""
  2526     try:
  2527         timestamp_str = ticket.split('.')[0]
  2528         timestamp = int(timestamp_str, 16)
  2529     except ValueError:
  2530         # invalid or empty ticket
  2531         logging.debug("checkTicket: invalid or empty ticket %r" % ticket)
  2532         return False
  2533     now = time.time()
  2534     if timestamp < now - 10 * 3600:
  2535         # we don't accept tickets older than 10h
  2536         logging.debug("checkTicket: too old ticket, timestamp %r" % timestamp)
  2537         return False
  2538     # Note: if the session timed out, that will also invalidate the ticket,
  2539     #       if the ticket was created within a session.
  2540     ourticket = createTicket(request, timestamp_str)
  2541     logging.debug("checkTicket: returning %r, got %r, expected %r" % (ticket == ourticket, ticket, ourticket))
  2542     return ticket == ourticket
  2543 
  2544 
  2545 def renderText(request, Parser, text):
  2546     """executes raw wiki markup with all page elements"""
  2547     import StringIO
  2548     out = StringIO.StringIO()
  2549     request.redirect(out)
  2550     wikiizer = Parser(text, request)
  2551     wikiizer.format(request.formatter, inhibit_p=True)
  2552     result = out.getvalue()
  2553     request.redirect()
  2554     del out
  2555     return result
  2556 
  2557 def get_processing_instructions(body):
  2558     """ Extract the processing instructions / acl / etc. at the beginning of a page's body.
  2559 
  2560         Hint: if you have a Page object p, you already have the result of this function in
  2561               p.meta and (even better) parsed/processed stuff in p.pi.
  2562 
  2563         Returns a list of (pi, restofline) tuples and a string with the rest of the body.
  2564     """
  2565     pi = []
  2566     while body.startswith('#'):
  2567         try:
  2568             line, body = body.split('\n', 1) # extract first line
  2569         except ValueError:
  2570             line = body
  2571             body = ''
  2572 
  2573         # end parsing on empty (invalid) PI
  2574         if line == "#":
  2575             body = line + '\n' + body
  2576             break
  2577 
  2578         if line[1] == '#':# two hash marks are a comment
  2579             comment = line[2:]
  2580             if not comment.startswith(' '):
  2581                 # we don't require a blank after the ##, so we put one there
  2582                 comment = ' ' + comment
  2583                 line = '##%s' % comment
  2584 
  2585         verb, args = (line[1:] + ' ').split(' ', 1) # split at the first blank
  2586         pi.append((verb.lower(), args.strip()))
  2587 
  2588     return pi, body
  2589