changeset 3399:b6d727809985

introduce wikiutil.parse_quoted_separated_ext This new function can now handle space-separated arguments (pass None as separator) as well as bracketing. Includes tests.
author Johannes Berg <johannes AT sipsolutions DOT net>
date Sat, 22 Mar 2008 01:20:57 +0100
parents f212fe993c14
children b12aba864312
files MoinMoin/_tests/test_wikiutil.py MoinMoin/wikiutil.py
diffstat 2 files changed, 236 insertions(+), 54 deletions(-) [+]
line wrap: on
line diff
--- a/MoinMoin/_tests/test_wikiutil.py	Fri Mar 21 22:05:38 2008 +0100
+++ b/MoinMoin/_tests/test_wikiutil.py	Sat Mar 22 01:20:57 2008 +0100
@@ -327,6 +327,77 @@
         py.test.raises(ValueError, result.parse_argument,  u'7')
         py.test.raises(ValueError, result.parse_argument,  u'mm')
 
+    def testExtendedParser(self):
+        tests = [
+            (u'"a", "b", "c"', u',', None, [u'a', u'b', u'c']),
+            (u'a:b, b:c, c:d', u',', u':', [(u'a', u'b'), (u'b', u'c'), (u'c', u'd')]),
+            (u'a:b, b:c, c:d', u',', None, [u'a:b', u'b:c', u'c:d']),
+            (u'a=b, b=c, c=d', u',', None, [u'a=b', u'b=c', u'c=d']),
+            (u'a=b, b=c, c=d', u',', u'=', [(u'a', u'b'), (u'b', u'c'), (u'c', u'd')]),
+            (u'"a"; "b"; "c"', u';', None, [u'a', u'b', u'c']),
+            (u'a:b; b:c; c:d', u';', u':', [(u'a', u'b'), (u'b', u'c'), (u'c', u'd')]),
+            (u'a:b; b:c; c:d', u';', None, [u'a:b', u'b:c', u'c:d']),
+            (u'a=b; b=c; c=d', u';', None, [u'a=b', u'b=c', u'c=d']),
+            (u'a=b; b=c; c=d', u';', u'=', [(u'a', u'b'), (u'b', u'c'), (u'c', u'd')]),
+            (u'"a" "b" "c"', None, None, [u'a', u'b', u'c']),
+            (u'" a " "b" "c"', None, None, [u' a ', u'b', u'c']),
+            (u'"a  " "b" "c"', None, None, [u'a  ', u'b', u'c']),
+            (u'"  a" "b" "c"', None, None, [u'  a', u'b', u'c']),
+            (u'"  a" "b" "c"', None, u':', [u'  a', u'b', u'c']),
+            (u'"a:a" "b:b" "c:b"', None, u':', [u'a:a', u'b:b', u'c:b']),
+            (u'   a:a  ', None, u':', [None, None, None, (u'a', u'a'), None, None]),
+            (u'a a: a', None, u':', [u'a', (u'a', None), u'a']),
+            (u'a a:"b c d" a', None, u':', [u'a', (u'a', u'b c d'), u'a']),
+            (u'a a:"b "" d" a', None, u':', [u'a', (u'a', u'b " d'), u'a']),
+            (u'title:Help* dog cat', None, u':', [(u'title', u'Help*'), u'dog', u'cat']),
+            (u'title:Help* "dog cat"', None, u':', [(u'title', u'Help*'), u'dog cat']),
+        ]
+
+        def _check(args, sep, kwsep, expected):
+            res = wikiutil.parse_quoted_separated_ext(args, sep, kwsep)
+            assert res == expected
+
+        for test in tests:
+            yield tuple([_check] + list(test))
+
+    def testExtendedParserBracketing(self):
+        tests = [
+            (u'"a", "b", "c"', u',', None, [u'a', u'b', u'c']),
+            (u'("a", "b", "c")', u',', None, [[u'a', u'b', u'c']]),
+            (u'("a"("b", "c"))', u',', None, [[u'a', [u'b', u'c']]]),
+            (u'("a" ("b" "c"))', None, None, [[u'a', [u'b', u'c']]]),
+            (u'("a", <"b", ("c")>)', u',', None, [[u'a', [u'b', [u'c']]]]),
+        ]
+
+        def _check(args, sep, kwsep, expected):
+            res = wikiutil.parse_quoted_separated_ext(args, sep, kwsep, brackets=(u'<>', u'()'))
+            assert res == expected
+
+        for test in tests:
+            yield tuple([_check] + list(test))
+
+    def testExtendedParserBracketingErrors(self):
+        UCE = wikiutil.BracketUnexpectedCloseError
+        MCE = wikiutil.BracketMissingCloseError
+        tests = [
+            (u'("a", "b", "c"', u',', None, MCE),
+            (u'("a"("b", "c")', u',', None, MCE),
+            (u'("a"<"b", "c")>', u',', None, UCE),
+            (u')("a" ("b" "c"))', None, None, UCE),
+            (u'("a", ("b", "c">))', u',', None, UCE),
+            (u'("a", ("b", <"c">>))', u',', None, UCE),
+            (u'(<(<)>)>', u',', None, UCE),
+        ]
+
+        def _check(args, sep, kwsep, err):
+            py.test.raises(err,
+                           wikiutil.parse_quoted_separated_ext,
+                           args, sep, kwsep,
+                           brackets=(u'<>', u'()'))
+
+        for test in tests:
+            yield tuple([_check] + list(test))
+
 class TestArgGetters:
     def testGetBoolean(self):
         tests = [
--- a/MoinMoin/wikiutil.py	Fri Mar 21 22:05:38 2008 +0100
+++ b/MoinMoin/wikiutil.py	Sat Mar 22 01:20:57 2008 +0100
@@ -1252,15 +1252,31 @@
 ### Parameter parsing
 #############################################################################
 
-def parse_quoted_separated(args, separator=',', name_value=True, seplimit=0):
+class BracketError(Exception):
+    pass
+
+class BracketUnexpectedCloseError(BracketError):
+    def __init__(self, bracket):
+        self.bracket = bracket
+        BracketError.__init__(self, "Unexpected closing bracket %s" % bracket)
+
+class BracketMissingCloseError(BracketError):
+    def __init__(self, bracket):
+        self.bracket = bracket
+        BracketError.__init__(self, "Missing closing bracket %s" % bracket)
+
+def parse_quoted_separated_ext(args, separator=None, name_value_separator=None,
+                               brackets=None, seplimit=0):
     """
-    Parses the given arguments according to the other parameters.
-    If name_value is True, it parses keyword arguments (name=value)
-    and returns the keyword arguments in the second return value
-    (a dict) and positional arguments that occurred after any keyword
-    argument in the third return value (a list).
+    Parses the given string according to the other parameters.
+
+    If name_value_separator evaluates to True, the function parses
+    keyword arguments (name=value) and returns the keyword arguments
+    in the second return value (a dict) and positional arguments that
+    occurred after any keyword argument in the third return value (a
+    list).
     The first return value always contains the positional arguments,
-    if name_value is False only it is present.
+    if name_value_separator evalutes to False only it is present.
 
     Arguments can be quoted with a double-quote ('"') and the quote
     can be escaped by doubling it, the separator and equal sign (for
@@ -1274,27 +1290,50 @@
     If a name or value does not start with a quote, then the quote
     character looses its special meaning for that name or value.
 
+    If multiple separators follow each other, this is treated as
+    having None arguments inbetween, that is also true for when
+    space is used as separators (when separator is None), filter
+    them out afterwards.
+
+    The function can also do bracketing, i.e. parse expressions
+    that contain things like "(a (a b))" to ['a', ['a', 'b']],
+    in this case, as in this example, the returned list will
+    contain sub-lists and the brackets parameter must be a list
+    of opening and closing brackets, e.g.
+        brackets = ['()', '<>']
+    Nesting will be observed between the different types of
+    brackets given. If bracketing doesn't match, a BracketError
+    instance is raised with a 'bracket' property indicating the
+    type of missing or unexpected bracket, the instance will be
+    either of the class BracketMissingCloseError or of the class
+    BracketUnexpectedCloseError.
+
     @param args: arguments to parse
-    @param separator: the argument separator, defaults to a comma (',')
-    @param name_value: indicates whether to parse keyword arguments
+    @param separator: the argument separator, defaults to None, meaning any
+        space separates arguments
+    @param name_value_separator: separator for name=value, default '=',
+        name=value keywords not parsed if evaluates to False
+    @param brackets: a list of two-character strings giving
+        opening and closing brackets
     @param seplimit: limits the number of parsed arguments
-    @rtype: tuple, list
-    @returns: if name_value is False, returns a list of arguments,
-              otherwise a list of positional, a dict of keyword and
-              a list of trailing arguments
+    @rtype: list
+    @returns: list of unicode strings and, if name_value_separator is set,
+        tuples containing (key, value) pairs
     """
     idx = 0
+    assert name_value_separator is None or name_value_separator != separator
+    assert name_value_separator is None or len(name_value_separator) == 1
     if not isinstance(args, unicode):
         raise TypeError('args must be unicode')
     max = len(args)
-    ret_positional = [] # positional argument return value
-    ret_trailing = []   # trailing arguments return value
-    positional = ret_positional
-    keyword = {}        # keyword arguments
+    result = []         # result list
     curname = u''       # current name, initially value as well (name=value)
     cur = None          # current value
     cur_quoted = False  # indicates whether value was quoted,
                         # needed None vs. u'' handling
+    # MAYBE means that if a bracket follows, it's not valid
+    NO, IFNOTBRACKET, IFNOTEND, IFNOTBRACKETOREND, YES = range(5)
+    cur_present = IFNOTBRACKET # indicates cur/curname validity
     quoted = False      # we're inside quotes
     skipquote = 0       # next quote is a quoted quote
     noquote = False     # no quotes expected because word didn't start with one
@@ -1302,8 +1341,51 @@
     separator_count = 0 # number of separators encountered
     SPACE = [' ', '\t', ]
     nextitemsep = [separator]   # used for skipping trailing space
-    if name_value:
-        nextitemsep.append('=')
+    SPACE = [' ', '\t', ]
+    if separator is None:
+        nextitemsep = SPACE[:]
+        separators = SPACE
+    else:
+        nextitemsep = [separator]   # used for skipping trailing space
+        separators = [separator]
+    if name_value_separator:
+        nextitemsep.append(name_value_separator)
+
+    # bracketing support
+    opening = []
+    closing = []
+    bracketstack = []
+    matchingbracket = {}
+    if brackets:
+        for o, c in brackets:
+            assert not o in opening
+            opening.append(o)
+            assert not c in closing
+            closing.append(c)
+            matchingbracket[o] = c
+
+    def additem(result, curname, cur, cur_quoted, separator_count, nextitemsep):
+        if cur is None:
+            cur = curname
+            curname = None
+        if not cur and not cur_quoted:
+            cur = None
+        if curname is not None:
+            result.append((curname, cur))
+        else:
+            result.append(cur)
+        curname = u''
+        cur = None
+        noquote = False
+        cur_quoted = False
+        separator_count += 1
+        seplimit_reached = False
+        if seplimit and separator_count >= seplimit:
+            seplimit_reached = True
+            nextitemsep = [n for n in nextitemsep if n in separators]
+
+        return curname, cur, noquote, cur_quoted, separator_count, seplimit_reached, nextitemsep
+
     while idx < max:
         char = args[idx]
         next = None
@@ -1311,7 +1393,7 @@
             next = args[idx+1]
         if skipquote:
             skipquote -= 1
-        if not quoted and char in SPACE:
+        if not separator is None and not quoted and char in SPACE:
             spaces = ''
             # accumulate all space
             while char in SPACE and idx < max - 1:
@@ -1330,32 +1412,21 @@
                     cur = cur + spaces
             elif curname:
                 curname = curname + spaces
-        elif not quoted and name_value and char == '=':
+        elif not quoted and char == name_value_separator:
             if cur is None:
                 cur = u''
                 cur_quoted = False
+                cur_present = YES
             else:
                 cur += '='
             noquote = False
-        elif not quoted and not seplimit_reached and char == separator:
-            if cur is None:
-                cur = curname
-                curname = None
-            if not cur and not cur_quoted:
-                cur = None
-            if curname is not None:
-                keyword[curname] = cur
-                positional = ret_trailing
-            else:
-                positional.append(cur)
-            curname = u''
-            cur = None
-            noquote = False
-            cur_quoted = False
-            separator_count += 1
-            if seplimit and separator_count >= seplimit:
-                seplimit_reached = True
-                nextitemsep.remove(separator)
+        elif not quoted and not seplimit_reached and char in separators:
+            if cur_present != NO:
+                (curname, cur, noquote, cur_quoted,
+                 separator_count, seplimit_reached,
+                 nextitemsep) = additem(result, curname, cur, cur_quoted,
+                                        separator_count, nextitemsep)
+            cur_present = IFNOTBRACKET
         elif not quoted and not noquote and char == '"':
             quoted = True
             cur_quoted = True
@@ -1364,31 +1435,71 @@
                 skipquote = 2 # will be decremented right away
             else:
                 quoted = False
+        elif not quoted and char in opening:
+            if cur_present and not cur_present in (IFNOTBRACKET, IFNOTBRACKETOREND):
+                (curname, cur, noquote, cur_quoted,
+                 separator_count, seplimit_reached,
+                 nextitemsep) = additem(result, curname, cur, cur_quoted,
+                                        separator_count, nextitemsep)
+            bracketstack.append((matchingbracket[char], result))
+            result = []
+            cur_present = IFNOTBRACKETOREND
+        elif not quoted and char in closing:
+            if cur_present and not cur_present in (IFNOTBRACKET, IFNOTBRACKETOREND):
+                (curname, cur, noquote, cur_quoted,
+                 separator_count, seplimit_reached,
+                 nextitemsep) = additem(result, curname, cur, cur_quoted,
+                                        separator_count, nextitemsep)
+            if not bracketstack:
+                raise BracketUnexpectedCloseError(char)
+            expected, oldresult = bracketstack[-1]
+            if not expected == char:
+                raise BracketUnexpectedCloseError(char)
+            del bracketstack[-1]
+            oldresult.append(result)
+            result = oldresult
+            cur_present = IFNOTBRACKETOREND
         else:
             if cur is not None:
                 cur = cur + char
             else:
                 curname = curname + char
             noquote = True
+            cur_present = YES
 
         idx += 1
 
-    if cur is None:
-        cur = curname
-        curname = None
-    cur_present = cur is not None
-    if not cur and not cur_quoted:
-        cur = None
-    if curname is not None:
-        keyword[curname] = cur
-    elif cur_present:
-        positional.append(cur)
+    if bracketstack:
+        raise BracketMissingCloseError(bracketstack[-1][0])
+
+    if cur_present and not cur_present in (IFNOTEND, IFNOTBRACKETOREND):
+        additem(result, curname, cur, cur_quoted, separator_count, nextitemsep)
+
+    return result
+
+def parse_quoted_separated(args, separator=',', name_value=True, seplimit=0):
+    result = []
+    positional = result
+    if name_value:
+        name_value_separator = '='
+        trailing = []
+        keywords = {}
+    else:
+        name_value_separator = None
+
+    l = parse_quoted_separated_ext(args, separator=separator,
+                                   name_value_separator=name_value_separator,
+                                   seplimit=seplimit)
+    for item in l:
+        if isinstance(item, tuple):
+            keywords[item[0]] = item[1]
+            positional = trailing
+        else:
+            positional.append(item)
 
     if name_value:
-        return ret_positional, keyword, ret_trailing
-    else:
-        return ret_positional
-
+        return result, keywords, trailing
+    return result
 
 def get_bool(request, arg, name=None, default=None):
     """