changeset 4569:3caaa8c74c41

wikiutil: replace moin's cgi/urllib wrappers by calls to werkzeug.utils code
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Wed, 11 Feb 2009 02:34:33 +0100
parents def073ae536c
children e86a7b66eb0e
files MoinMoin/_tests/test_wikiutil.py MoinMoin/action/AttachFile.py MoinMoin/action/Despam.py MoinMoin/logfile/eventlog.py MoinMoin/parser/text_creole.py MoinMoin/parser/text_moin_wiki.py MoinMoin/script/migration/_conv160.py MoinMoin/script/migration/_conv160_wiki.py MoinMoin/script/migration/_conv160a.py MoinMoin/script/migration/_conv160a_wiki.py MoinMoin/script/migration/text_moin158_wiki.py MoinMoin/stats/hitcounts.py MoinMoin/wikiutil.py
diffstat 13 files changed, 74 insertions(+), 138 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/_tests/test_wikiutil.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/_tests/test_wikiutil.py	Wed Feb 11 02:34:33 2009 +0100
@@ -11,6 +11,8 @@
 
 from MoinMoin import config, wikiutil
 
+from werkzeug.utils import MultiDict
+
 
 class TestQueryStringSupport:
     tests = [
@@ -21,17 +23,13 @@
     ]
     def testParseQueryString(self):
         for qstr, expected_str, expected_unicode in self.tests:
-            assert wikiutil.parseQueryString(qstr, want_unicode=False) == expected_str
-            assert wikiutil.parseQueryString(qstr, want_unicode=True) == expected_unicode
-            assert wikiutil.parseQueryString(unicode(qstr), want_unicode=False) == expected_str
-            assert wikiutil.parseQueryString(unicode(qstr), want_unicode=True) == expected_unicode
+            assert wikiutil.parseQueryString(qstr) == MultiDict(expected_unicode)
+            assert wikiutil.parseQueryString(unicode(qstr)) == MultiDict(expected_unicode)
 
     def testMakeQueryString(self):
         for qstr, in_str, in_unicode in self.tests:
-            assert wikiutil.parseQueryString(wikiutil.makeQueryString(in_unicode, want_unicode=False), want_unicode=False) == in_str
-            assert wikiutil.parseQueryString(wikiutil.makeQueryString(in_str, want_unicode=False), want_unicode=False) == in_str
-            assert wikiutil.parseQueryString(wikiutil.makeQueryString(in_unicode, want_unicode=True), want_unicode=True) == in_unicode
-            assert wikiutil.parseQueryString(wikiutil.makeQueryString(in_str, want_unicode=True), want_unicode=True) == in_unicode
+            assert wikiutil.parseQueryString(wikiutil.makeQueryString(in_unicode)) == MultiDict(in_unicode)
+            assert wikiutil.parseQueryString(wikiutil.makeQueryString(in_str)) == MultiDict(in_unicode)
 
 
 class TestTickets:
--- a/MoinMoin/action/AttachFile.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/action/AttachFile.py	Wed Feb 11 02:34:33 2009 +0100
@@ -219,7 +219,7 @@
     """
     from MoinMoin.logfile import editlog
     t = wikiutil.timestamp2version(time.time())
-    fname = wikiutil.url_quote(filename, want_unicode=True)
+    fname = wikiutil.url_quote(filename)
 
     # Write to global log
     log = editlog.EditLog(request)
--- a/MoinMoin/action/Despam.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/action/Despam.py	Wed Feb 11 02:34:33 2009 +0100
@@ -149,7 +149,7 @@
 def revert_pages(request, editor, timestamp):
     _ = request.getText
 
-    editor = wikiutil.url_unquote(editor, want_unicode=False)
+    editor = wikiutil.url_unquote(editor)
     timestamp = int(timestamp * 1000000)
     log = editlog.EditLog(request)
     pages = {}
--- a/MoinMoin/logfile/eventlog.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/logfile/eventlog.py	Wed Feb 11 02:34:33 2009 +0100
@@ -46,7 +46,7 @@
                     # Save those http headers in UPPERcase
                     values[key.upper()] = value
         # Encode values in a query string TODO: use more readable format
-        values = wikiutil.makeQueryString(values, want_unicode=True)
+        values = wikiutil.makeQueryString(values)
         self._add(u"%d\t%s\t%s\n" % (mtime_usecs, eventtype, values))
 
     def parser(self, line):
--- a/MoinMoin/parser/text_creole.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/parser/text_creole.py	Wed Feb 11 02:34:33 2009 +0100
@@ -297,7 +297,7 @@
                 # link to an attachment
                 scheme = m.group('attach_scheme')
                 attachment = m.group('attach_addr')
-                url = wikiutil.url_unquote(attachment, want_unicode=True)
+                url = wikiutil.url_unquote(attachment)
                 text = self.get_text(node)
                 return ''.join([
                         self.formatter.attachment_link(1, url),
@@ -321,7 +321,7 @@
         if m:
             if m.group('page_name'):
                 # inserted anchors
-                url = wikiutil.url_unquote(target, want_unicode=True)
+                url = wikiutil.url_unquote(target)
                 if target.startswith('#'):
                     return self.formatter.anchordef(url[1:])
                 # default to images
@@ -331,14 +331,14 @@
                 # external link
                 address = m.group('extern_addr')
                 proto = m.group('extern_proto')
-                url = wikiutil.url_unquote(address, want_unicode=True)
+                url = wikiutil.url_unquote(address)
                 return self.formatter.image(
                     src=url, alt=text, html_class='external_image')
             elif m.group('attach_scheme'):
                 # link to an attachment
                 scheme = m.group('attach_scheme')
                 attachment = m.group('attach_addr')
-                url = wikiutil.url_unquote(attachment, want_unicode=True)
+                url = wikiutil.url_unquote(attachment)
                 if scheme == 'image':
                     return self.formatter.attachment_image(
                         url, alt=text, html_class='image')
@@ -350,19 +350,19 @@
                 # interwiki link
                 pass
 #        return "".join(["{{", self.formatter.text(target), "}}"])
-        url = wikiutil.url_unquote(node.content, want_unicode=True)
+        url = wikiutil.url_unquote(node.content)
         return self.formatter.attachment_inlined(url, text)
 
 # Not used
 #    def drawing_emit(self, node):
-#        url = wikiutil.url_unquote(node.content, want_unicode=True)
+#        url = wikiutil.url_unquote(node.content)
 #        text = self.get_text(node)
 #        return self.formatter.attachment_drawing(url, text)
 
 # Not used
 #    def figure_emit(self, node):
 #        text = self.get_text(node)
-#        url = wikiutil.url_unquote(node.content, want_unicode=True)
+#        url = wikiutil.url_unquote(node.content)
 #        return ''.join([
 #            self.formatter.rawHTML('<div class="figure">'),
 #            self.get_image(url, text), self.emit_children(node),
--- a/MoinMoin/parser/text_moin_wiki.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/parser/text_moin_wiki.py	Wed Feb 11 02:34:33 2009 +0100
@@ -692,7 +692,7 @@
     def _transclude_repl(self, word, groups):
         """Handles transcluding content, usually embedding images."""
         target = groups.get('transclude_target', '')
-        target = wikiutil.url_unquote(target, want_unicode=True)
+        target = wikiutil.url_unquote(target)
         desc = groups.get('transclude_desc', '') or ''
         params = groups.get('transclude_params', u'') or u''
         acceptable_attrs_img = ['class', 'title', 'longdesc', 'width', 'height', 'align', ] # no style because of JS
@@ -723,7 +723,7 @@
 
             elif m.group('attach_scheme'):
                 scheme = m.group('attach_scheme')
-                url = wikiutil.url_unquote(m.group('attach_addr'), want_unicode=True)
+                url = wikiutil.url_unquote(m.group('attach_addr'))
                 if scheme == 'attachment':
                     mt = wikiutil.MimeType(filename=url)
                     if mt.major == 'text':
@@ -894,7 +894,7 @@
 
             elif mt.group('attach_scheme'):
                 scheme = mt.group('attach_scheme')
-                url = wikiutil.url_unquote(mt.group('attach_addr'), want_unicode=True)
+                url = wikiutil.url_unquote(mt.group('attach_addr'))
                 tag_attrs, query_args = self._get_params(params,
                                                          tag_attrs={'title': desc, },
                                                          acceptable_attrs=acceptable_attrs)
--- a/MoinMoin/script/migration/_conv160.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/script/migration/_conv160.py	Wed Feb 11 02:34:33 2009 +0100
@@ -108,7 +108,7 @@
                 pagename = kvdict.get('pagename')
                 if pagename and ('PAGE', pagename) in self.renames:
                     kvdict['pagename'] = self.renames[('PAGE', pagename)]
-                kvpairs = wikiutil.makeQueryString(kvdict, want_unicode=False)
+                kvpairs = wikiutil.makeQueryString(kvdict)
                 fields = str(timestamp), action, kvpairs
                 line = '\t'.join(fields) + '\n'
                 f.write(line)
--- a/MoinMoin/script/migration/_conv160_wiki.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/script/migration/_conv160_wiki.py	Wed Feb 11 02:34:33 2009 +0100
@@ -332,7 +332,7 @@
         pagename, fname = AttachFile.absoluteName(fname, self.pagename)
         from_this_page = pagename == self.pagename
         fname = self._replace(('FILE', pagename, fname))
-        fname = wikiutil.url_unquote(fname, want_unicode=True)
+        fname = wikiutil.url_unquote(fname)
         fname = self._replace(('FILE', pagename, fname))
         pagename = self._replace(('PAGE', pagename))
         if from_this_page:
--- a/MoinMoin/script/migration/_conv160a.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/script/migration/_conv160a.py	Wed Feb 11 02:34:33 2009 +0100
@@ -108,7 +108,7 @@
                 pagename = kvdict.get('pagename')
                 if pagename and ('PAGE', pagename) in self.renames:
                     kvdict['pagename'] = self.renames[('PAGE', pagename)]
-                kvpairs = wikiutil.makeQueryString(kvdict, want_unicode=False)
+                kvpairs = wikiutil.makeQueryString(kvdict)
                 fields = str(timestamp), action, kvpairs
                 line = '\t'.join(fields) + '\n'
                 f.write(line)
--- a/MoinMoin/script/migration/_conv160a_wiki.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/script/migration/_conv160a_wiki.py	Wed Feb 11 02:34:33 2009 +0100
@@ -322,7 +322,7 @@
         pagename, fname = AttachFile.absoluteName(fname, self.pagename)
         from_this_page = pagename == self.pagename
         fname = self._replace(('FILE', pagename, fname))
-        #fname = wikiutil.url_unquote(fname, want_unicode=True)
+        #fname = wikiutil.url_unquote(fname)
         #fname = self._replace(('FILE', pagename, fname))
         pagename = self._replace(('PAGE', pagename))
         if from_this_page:
--- a/MoinMoin/script/migration/text_moin158_wiki.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/script/migration/text_moin158_wiki.py	Wed Feb 11 02:34:33 2009 +0100
@@ -217,7 +217,7 @@
         inline = url[0] == 'i'
         drawing = url[0] == 'd'
         url = url.split(":", 1)[1]
-        url = wikiutil.url_unquote(url, want_unicode=True)
+        url = wikiutil.url_unquote(url)
         text = text or url
 
         from MoinMoin.action import AttachFile
--- a/MoinMoin/stats/hitcounts.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/stats/hitcounts.py	Wed Feb 11 02:34:33 2009 +0100
@@ -146,9 +146,7 @@
     # check params
     filterpage = None
     if params.startswith('page='):
-        params = params[len('page='):]
-        params = wikiutil.url_unquote(params, want_unicode=False)
-        filterpage = wikiutil.decodeUserInput(params)
+        filterpage = wikiutil.url_unquote(params[len('page='):])
 
     if request and request.values and 'page' in request.values:
         filterpage = request.values['page']
--- a/MoinMoin/wikiutil.py	Tue Feb 10 16:18:59 2009 +0100
+++ b/MoinMoin/wikiutil.py	Wed Feb 11 02:34:33 2009 +0100
@@ -25,6 +25,8 @@
 from MoinMoin.support.python_compatibility import rsplit
 from inspect import getargspec, isfunction, isclass, ismethod
 
+from MoinMoin import web # needed so that next line works:
+import werkzeug.utils
 
 # Exceptions
 class InvalidFileNameError(Exception):
@@ -88,107 +90,67 @@
     raise UnicodeError('The string %r cannot be decoded.' % s)
 
 
-# this is a thin wrapper around urllib (urllib only handles str, not unicode)
-# with py <= 2.4.1, it would give incorrect results with unicode
-# with py == 2.4.2, it crashes with unicode, if it contains non-ASCII chars
-def url_quote(s, safe='/', want_unicode=False):
-    """
-    Wrapper around urllib.quote doing the encoding/decoding as usually wanted:
-
-    @param s: the string to quote (can be str or unicode, if it is unicode,
-              config.charset is used to encode it before calling urllib)
-    @param safe: just passed through to urllib
-    @param want_unicode: for the less usual case that you want to get back
-                         unicode and not str, set this to True
-                         Default is False.
-    """
-    if isinstance(s, unicode):
-        s = s.encode(config.charset)
-    elif not isinstance(s, str):
-        s = str(s)
-    s = urllib.quote(s, safe)
-    if want_unicode:
-        s = s.decode(config.charset) # ascii would also work
-    return s
-
-def url_quote_plus(s, safe='/', want_unicode=False):
-    """
-    Wrapper around urllib.quote_plus doing the encoding/decoding as usually wanted:
+def url_quote(s, safe='/', want_unicode=None):
+    """ see werkzeug.utils.url_quote, we use a different safe param default value """
+    try:
+        assert want_unicode is None
+    except AssertionError:
+        log.exception("call with deprecated want_unicode param, please fix caller")
+    return werkzeug.utils.url_quote(s, charset=config.charset, safe=safe)
 
-    @param s: the string to quote (can be str or unicode, if it is unicode,
-              config.charset is used to encode it before calling urllib)
-    @param safe: just passed through to urllib
-    @param want_unicode: for the less usual case that you want to get back
-                         unicode and not str, set this to True
-                         Default is False.
-    """
-    if isinstance(s, unicode):
-        s = s.encode(config.charset)
-    elif not isinstance(s, str):
-        s = str(s)
-    s = urllib.quote_plus(s, safe)
-    if want_unicode:
-        s = s.decode(config.charset) # ascii would also work
-    return s
-
-def url_unquote(s, want_unicode=True):
-    """
-    Wrapper around urllib.unquote doing the encoding/decoding as usually wanted:
+def url_quote_plus(s, safe='/', want_unicode=None):
+    """ see werkzeug.utils.url_quote_plus, we use a different safe param default value """
+    try:
+        assert want_unicode is None
+    except AssertionError:
+        log.exception("call with deprecated want_unicode param, please fix caller")
+    return werkzeug.utils.url_quote_plus(s, charset=config.charset, safe=safe)
 
-    @param s: the string to unquote (can be str or unicode, if it is unicode,
-              config.charset is used to encode it before calling urllib)
-    @param want_unicode: for the less usual case that you want to get back
-                         str and not unicode, set this to False.
-                         Default is True.
-    """
-    if isinstance(s, unicode):
-        s = s.encode(config.charset) # ascii would also work
-    s = urllib.unquote(s)
-    if want_unicode:
-        try:
-            s = decodeUserInput(s, [config.charset, 'iso-8859-1', ]) # try hard
-        except UnicodeError:
-            s = s.decode('ascii', 'replace') # better than crashing
-    return s
+def url_unquote(s, want_unicode=None):
+    """ see werkzeug.utils.url_unquote """
+    try:
+        assert want_unicode is None
+    except AssertionError:
+        log.exception("call with deprecated want_unicode param, please fix caller")
+    return werkzeug.utils.url_unquote(s, charset=config.charset, errors='fallback:iso-8859-1')
 
-def parseQueryString(qstr, want_unicode=True):
-    """ Parse a querystring "key=value&..." into a dict.
-    """
-    is_unicode = isinstance(qstr, unicode)
-    if is_unicode:
-        qstr = qstr.encode(config.charset)
-    values = {}
-    for key, value in cgi.parse_qs(qstr).items():
-        if len(value) < 2:
-            v = ''.join(value)
-            if want_unicode:
-                try:
-                    v = unicode(v, config.charset)
-                except UnicodeDecodeError:
-                    v = unicode(v, 'iso-8859-1', 'replace')
-            values[key] = v
-    return values
 
-def makeQueryString(qstr=None, want_unicode=False, **kw):
+def parseQueryString(qstr, want_unicode=None):
+    """ see werkzeug.utils.url_decode """
+    try:
+        assert want_unicode is None
+    except AssertionError:
+        log.exception("call with deprecated want_unicode param, please fix caller")
+    return werkzeug.utils.url_decode(qstr, charset=config.charset, errors='fallback:iso-8859-1',
+                                     decode_keys=False, include_empty=False)
+
+def makeQueryString(qstr=None, want_unicode=None, **kw):
     """ Make a querystring from arguments.
 
     kw arguments overide values in qstr.
 
-    If a string is passed in, it's returned verbatim and
-    keyword parameters are ignored.
+    If a string is passed in, it's returned verbatim and keyword parameters are ignored.
+
+    See also: werkzeug.utils.url_encode
 
     @param qstr: dict to format as query string, using either ascii or unicode
     @param kw: same as dict when using keywords, using ascii or unicode
     @rtype: string
     @return: query string ready to use in a url
     """
+    try:
+        assert want_unicode is None
+    except AssertionError:
+        log.exception("call with deprecated want_unicode param, please fix caller")
     if qstr is None:
         qstr = {}
+    elif isinstance(qstr, (str, unicode)):
+        return qstr
     if isinstance(qstr, dict):
         qstr.update(kw)
-        items = ['%s=%s' % (url_quote_plus(key, want_unicode=want_unicode), url_quote_plus(value, want_unicode=want_unicode)) for key, value in qstr.items()]
-        qstr = '&'.join(items)
-    return qstr
+        return werkzeug.utils.url_encode(qstr, charset=config.charset, encode_keys=True)
+    else:
+        raise ValueError("Unsupported argument type, should be dict.")
 
 
 def quoteWikinameURL(pagename, charset=config.charset):
@@ -203,35 +165,13 @@
     @rtype: string
     @return: the quoted filename, all unsafe characters encoded
     """
-    pagename = pagename.encode(charset)
-    return urllib.quote(pagename)
+    # XXX please note that urllib.quote and werkzeug.utils.url_quote have
+    # XXX different defaults for safe=...
+    return werkzeug.utils.url_quote(pagename, charset=charset, safe='/')
 
 
-def escape(s, quote=0):
-    """ Escape possible html tags
-
-    Replace special characters '&', '<' and '>' by SGML entities.
-    (taken from cgi.escape so we don't have to include that, even if we
-    don't use cgi at all)
+escape = werkzeug.utils.escape
 
-    @param s: (unicode) string to escape
-    @param quote: bool, should transform '\"' to '&quot;'
-    @rtype: when called with a unicode object, return unicode object - otherwise return string object
-    @return: escaped version of s
-    """
-    if not isinstance(s, (str, unicode)):
-        s = str(s)
-
-    # Must first replace &
-    s = s.replace("&", "&amp;")
-
-    # Then other...
-    s = s.replace("<", "&lt;")
-    s = s.replace(">", "&gt;")
-    if quote:
-        s = s.replace('"', "&quot;")
-        s = s.replace("'", "&#x27;")
-    return s
 
 def clean_input(text, max_len=201):
     """ Clean input: