changeset 2165:e3cba78bea69

datastructure diff function
author Ana Balica <ana.balica@gmail.com>
date Fri, 26 Jul 2013 17:09:01 +0300
parents 63268ff4d4c0
children 8870f6cc7884
files MoinMoin/util/_tests/test_diff_datastruct.py MoinMoin/util/diff_datastruct.py
diffstat 2 files changed, 179 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/util/_tests/test_diff_datastruct.py	Fri Jul 26 17:09:01 2013 +0300
@@ -0,0 +1,99 @@
+# Copyright: 2013 MoinMoin:AnaBalica
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+"""
+    MoinMoin - MoinMoin.util.diff_datastruct Tests
+"""
+
+import pytest
+
+from MoinMoin.util.diff_datastruct import diff, Undefined, INSERT, DELETE
+
+
+class TestDiffDatastruct(object):
+
+    def test_diff_no_change(self):
+        datastruct = [None, True, 42, u"value", [1, 2, 3], dict(one=1, two=2)]
+        for d in datastruct:
+            assert diff(d, d) == []
+
+    def test_diff_none(self):
+        tests = [(None, None, []),
+                 (Undefined, None, [(INSERT, [], None)]),
+                 (None, Undefined, [(DELETE, [], None)])]
+        for d1, d2, expected in tests:
+            assert diff(d1, d2) == expected
+
+    def test_diff_bool(self):
+        tests = [(True, True, []),
+                 (Undefined, True, [(INSERT, [], True)]),
+                 (True, Undefined, [(DELETE, [], True)]),
+                 (True, False, [(DELETE, [], True), (INSERT, [], False)])]
+        for d1, d2, expected in tests:
+            assert diff(d1, d2) == expected
+
+    def test_diff_int(self):
+        tests = [(1, 1, []),
+                 (Undefined, 2, [(INSERT, [], 2)]),
+                 (2, Undefined, [(DELETE, [], 2)]),
+                 (3, 4, [(DELETE, [], 3), (INSERT, [], 4)])]
+        for d1, d2, expected in tests:
+            assert diff(d1, d2) == expected
+
+    def test_diff_float(self):
+        tests = [(1.1, 1.1, []),
+                 (Undefined, 2.2, [(INSERT, [], 2.2)]),
+                 (2.2, Undefined, [(DELETE, [], 2.2)]),
+                 (3.3, 4.4, [(DELETE, [], 3.3), (INSERT, [], 4.4)])]
+        for d1, d2, expected in tests:
+            assert diff(d1, d2) == expected
+
+    def test_diff_unicode(self):
+        tests = [(u"same", u"same", []),
+                 (Undefined, u"new", [(INSERT, [], u"new")]),
+                 (u"old", Undefined, [(DELETE, [], u"old")]),
+                 (u"some value", u"some other value",
+                  [(DELETE, [], u"some value"), (INSERT, [], u"some other value")])]
+        for d1, d2, expected in tests:
+            assert diff(d1, d2) == expected
+
+    def test_diff_list(self):
+        tests = [([1], [1], []),
+                 (Undefined, [2], [(INSERT, [], [2])]),
+                 ([2], Undefined, [(DELETE, [], [2])]),
+                 ([1, 2], [2, 3], [(DELETE, [], [1]), (INSERT, [], [3])]),
+                 ([9, 8], [8, 7, 6, 5], [(DELETE, [], [9]), (INSERT, [], [7, 6, 5])])]
+        for d1, d2, expected in tests:
+            assert diff(d1, d2) == expected
+
+    def test_diff_dict(self):
+        tests = [(dict(same=1), dict(same=1), []),
+                 (Undefined, dict(new=1), [(INSERT, ["new"], 1)]),
+                 (dict(old=1), Undefined, [(DELETE, ["old"], 1)]),
+                 (dict(same=1, old=2), dict(same=1, new1=3, new2=4),
+                  [(INSERT, ["new1"], 3), (INSERT, ["new2"], 4),
+                   (DELETE, ["old"], 2)])]
+        for d1, d2, expected in tests:
+            assert diff(d1, d2) == expected
+
+    def test_diff_nested_dict(self):
+        tests = [(dict(key=dict(same=None)), dict(key=dict(same=None)), []),
+                 (dict(key=dict()), dict(key=dict(added=None)), [(INSERT, ["key", "added"], None)]),
+                 (dict(key=dict(removed=None)), dict(key=dict()), [(DELETE, ["key", "removed"], None)])]
+        for d1, d2, expected in tests:
+            assert diff(d1, d2) == expected
+
+    def test_diff_str_unicode_keys(self):
+        d1 = {"old": u"old", u"same1": u"same1", "same2": u"same2"}
+        d2 = {u"new": u"new", "same1": u"same1", u"same2": u"same2"}
+        assert diff(d1, d2) == [(INSERT, ["new"], u"new"),
+                                (DELETE, ["old"], u"old")]
+
+    def test_diff_errors(self):
+        tests = [(u"foo", True),
+                 ((1, 2, ), (3, 4, )),
+                 (dict(key=(1, 2, )), dict()),
+                 (None, [1, 2, ])]
+        for d1, d2 in tests:
+            with pytest.raises(TypeError):
+                diff(d1, d2)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/util/diff_datastruct.py	Fri Jul 26 17:09:01 2013 +0300
@@ -0,0 +1,80 @@
+# Copyright: 2013 MoinMoin:AnaBalica
+# License: GNU GPL v2 (or any later version), see LICENSE.txt for details.
+
+import difflib
+from types import NoneType
+from collections import Hashable
+
+INSERT = u"insert"
+DELETE = u"delete"
+REPLACE = u"replace"
+
+
+class UndefinedType(object):
+    """ Represents a non-existing value """
+
+
+Undefined = UndefinedType()
+
+
+def diff(d1, d2, basekeys=None):
+    """ Get the diff of 2 datastructures (usually 2 meta dicts)
+
+    :param d1: old datastructure
+    :param d2: new datastructure
+    :param basekeys: list of data keys' basenames (default: None, meaning [])
+    :return: a list of tuples of the format (<change type>, <basekeys>, <value>)
+             that can be used to format a diff
+    """
+    if basekeys is None:
+        basekeys = []
+    changes = []
+
+    if isinstance(d1, UndefinedType) and isinstance(d2, (dict, list, )):
+        d1 = type(d2)()
+    elif isinstance(d2, UndefinedType) and isinstance(d1, (dict, list, )):
+        d2 = type(d1)()
+    if isinstance(d1, dict) and isinstance(d2, dict):
+        added = set(d2) - set(d1)
+        removed = set(d1) - set(d2)
+        all_ = set(d1) | set(d2)
+        for key in sorted(all_):
+            keys = basekeys + [key]
+            if key in added:
+                changes.extend(diff(Undefined, d2[key], keys))
+            elif key in removed:
+                changes.extend(diff(d1[key], Undefined, keys))
+            else:
+                changes.extend(diff(d1[key], d2[key], keys))
+    elif isinstance(d1, list) and isinstance(d2, list):
+        hashable = all(isinstance(d1, unicode) or all(
+            isinstance(v, Hashable) for v in d) for d in [d1, d2])
+        if hashable:
+            matches = difflib.SequenceMatcher(None, d1, d2)
+            for tag, d1_start, d1_end, d2_start, d2_end in matches.get_opcodes():
+                if tag == REPLACE:
+                    changes.extend([(DELETE, basekeys, d1[d1_start:d1_end]),
+                                    (INSERT, basekeys, d2[d2_start:d2_end])])
+                elif tag == DELETE:
+                    changes.append((DELETE, basekeys, d1[d1_start:d1_end]))
+                elif tag == INSERT:
+                    changes.append((INSERT, basekeys, d2[d2_start:d2_end]))
+        else:
+            changes.extend(diff(unicode(d1), unicode(d2), basekeys))
+    elif any(isinstance(d, (NoneType, bool, int, long, float, unicode, )) for d in (d1, d2)):
+        if isinstance(d1, UndefinedType):
+            changes.append((INSERT, basekeys, d2))
+        elif isinstance(d2, UndefinedType):
+            changes.append((DELETE, basekeys, d1))
+        elif type(d1) == type(d2):
+            if d1 != d2:
+                changes.extend([(DELETE, basekeys, d1), (INSERT, basekeys, d2)])
+        else:
+            raise TypeError(
+                "Unsupported diff between {0} and {1} data types".format(
+                    type(d1), type(d2)))
+    else:
+        raise TypeError(
+            "Unsupported diff between {0} and {1} data types".format(
+                type(d1), type(d2)))
+    return changes