changeset 171:0574bae2bf7c

GivenAuth: require correct coding, update docs accordingly remove some unused functions from wikiutil
author Thomas Waldmann <tw AT waldmann-edv DOT de>
date Wed, 06 Apr 2011 03:34:25 +0200
parents 1481cbc12553
children 0b2454d84586
files MoinMoin/auth/__init__.py MoinMoin/wikiutil.py docs/admin/configure.rst
diffstat 3 files changed, 18 insertions(+), 70 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/auth/__init__.py	Wed Apr 06 02:52:21 2011 +0200
+++ b/MoinMoin/auth/__init__.py	Wed Apr 06 03:34:25 2011 +0200
@@ -275,7 +275,7 @@
                  strip_windomain=False,  # DOMAIN\joe -> joe
                  titlecase=False,  # joe doe -> Joe Doe
                  remove_blanks=False,  # Joe Doe -> JoeDoe
-                 coding=None,  # for decoding REMOTE_USER correctly (default: auto)
+                 coding='utf-8',  # for decoding REMOTE_USER correctly
                 ):
         self.env_var = env_var
         self.user_name = user_name
@@ -290,12 +290,7 @@
     def decode_username(self, name):
         """ decode the name we got from the environment var to unicode """
         if isinstance(name, str):
-            if self.coding:
-                name = name.decode(self.coding)
-            else:
-                # XXX we have no idea about REMOTE_USER encoding, please help if
-                # you know how to do that cleanly
-                name = wikiutil.decodeUnknownInput(name)
+            name = name.decode(self.coding)
         return name
 
     def transform_username(self, name):
--- a/MoinMoin/wikiutil.py	Wed Apr 06 02:52:21 2011 +0200
+++ b/MoinMoin/wikiutil.py	Wed Apr 06 03:34:25 2011 +0200
@@ -44,53 +44,7 @@
 ### Getting data from user/Sending data to user
 #############################################################################
 
-def decodeUnknownInput(text):
-    """ Decode input in unknown encoding
-
-    First we try utf-8 because it has special format, and it will decode
-    only utf-8 files. Then we try config.charset, then iso-8859-1 using
-    'replace'. We will never raise an exception, but may return junk
-    data.
-
-    WARNING: Use this function only for data that you view, not for data
-    that you save in the wiki.
-
-    :param text: the text to decode, string
-    :rtype: unicode
-    :returns: decoded text (maybe wrong)
-    """
-    # Shortcut for unicode input
-    if isinstance(text, unicode):
-        return text
-
-    try:
-        return unicode(text, 'utf-8')
-    except UnicodeError:
-        if config.charset not in ['utf-8', 'iso-8859-1']:
-            try:
-                return unicode(text, config.charset)
-            except UnicodeError:
-                pass
-        return unicode(text, 'iso-8859-1', 'replace')
-
-
-def decodeUserInput(s, charsets=[config.charset]):
-    """
-    Decodes input from the user.
-
-    :param s: the string to unquote
-    :param charsets: list of charsets to assume the string is in
-    :rtype: unicode
-    :returns: the unquoted string as unicode
-    """
-    for charset in charsets:
-        try:
-            return s.decode(charset)
-        except UnicodeError:
-            pass
-    raise UnicodeError('The string %r cannot be decoded.' % s)
-
-
+# TODO: use similar code in a flatland validator
 def clean_input(text, max_len=201):
     """ Clean input:
         replace CR, LF, TAB by whitespace
@@ -112,21 +66,6 @@
         return text.translate(config.clean_input_translation_map)
 
 
-def make_breakable(text, maxlen):
-    """ make a text breakable by inserting spaces into nonbreakable parts
-    """
-    text = text.split(" ")
-    newtext = []
-    for part in text:
-        if len(part) > maxlen:
-            while part:
-                newtext.append(part[:maxlen])
-                part = part[maxlen:]
-        else:
-            newtext.append(part)
-    return " ".join(newtext)
-
-
 #############################################################################
 ### Item types (based on item names)
 #############################################################################
--- a/docs/admin/configure.rst	Wed Apr 06 02:52:21 2011 +0200
+++ b/docs/admin/configure.rst	Wed Apr 06 03:34:25 2011 +0200
@@ -306,7 +306,7 @@
 the result to moin (usually via environment variable REMOTE_USER)::
 
     from MoinMoin.auth import GivenAuth
-    auth = [GivenAuth(autocreate=True)]
+    auth = [GivenAuth(autocreate=True, coding='utf-8')]
 
 Using this has some pros and cons:
 
@@ -317,6 +317,20 @@
 * all the stuff you won't get (but you need) will need to be manually stored
   and updated in the user's profile (e.g. the user's email address, etc.)
 
+Please note that you must give the correct coding (character set) so that moin
+can decode the username to unicode, if necessary. For environment variables
+like REMOTE_USER, the coding might depend on your operating system.
+
+If you do not know the correct coding, try: 'utf-8', 'iso-8859-1', ...
+
+.. todo::
+
+   add the usual coding(s) for some platforms (like windows)
+
+To try it out, change configuration, restart moin and then use some non-ASCII
+username (like with german umlauts or accented characters). If moin does not
+crash (log a Unicode Error), you have likely found the correct coding.
+
 OpenID
 ------
 With OpenID moin can re-use the authentication done by some OpenID provider