changeset 961:21eb4cb11e2c

Added binary diffing! Not much left for the getDiff function.
author Alexander Schremmer <alex AT alexanderweb DOT de>
date Sat, 01 Jul 2006 01:28:46 +0200
parents afb156d4caa5
children 930c9e82a60b
files MoinMoin/util/bdiff.py docs/CHANGES.aschremmer
diffstat 2 files changed, 86 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MoinMoin/util/bdiff.py	Sat Jul 01 01:28:46 2006 +0200
@@ -0,0 +1,79 @@
+# Binary patching and diffing
+#
+# Copyright 2005 Matt Mackall <mpm@selenic.com>
+# Copyright 2006 MoinMoin:AlexanderSchremmer
+#
+# Algorithm taken from mercurial's mdiff.py
+#
+# This software may be used and distributed according to the terms
+# of the GNU General Public License, incorporated herein by reference.
+
+import zlib, difflib, struct
+
+BDIFF_PATT = ">lll"
+
+def compress(text):
+    return zlib.compress(text) # here we could tune the compression level
+
+def decompress(bin):
+    return zlib.decompress(bin)
+
+def diff(a, b):
+    """ Generates a binary diff of the passed strings. """
+    if not a:
+        return b and (struct.pack(BDIFF_PATT, 0, 0, len(b)) + b)
+
+    bin = []
+    la = lb = 0
+    
+    p = [0]
+    for i in a: p.append(p[-1] + len(i))
+    
+    for am, bm, size in difflib.SequenceMatcher(None, a, b).get_matching_blocks():
+        s = "".join(b[lb:bm])
+        if am > la or s:
+            bin.append(struct.pack(BDIFF_PATT, p[la], p[am], len(s)) + s)
+        la = am + size
+        lb = bm + size
+    
+    return "".join(bin)
+
+def patchtext(bin):
+    """ Returns the new hunks that are contained in a binary diff."""
+    pos = 0
+    t = []
+    while pos < len(bin):
+        p1, p2, l = struct.unpack(BDIFF_PATT, bin[pos:pos + 12])
+        pos += 12
+        t.append(bin[pos:pos + l])
+        pos += l
+    return "".join(t)
+
+def patch(a, bin):
+    """ Patches the string a with the binary patch bin. """
+    c = last = pos = 0
+    r = []
+
+    while pos < len(bin):
+        p1, p2, l = struct.unpack(BDIFF_PATT, bin[pos:pos + 12])
+        pos += 12
+        r.append(a[last:p1])
+        r.append(bin[pos:pos + l])
+        pos += l
+        last = p2
+        c += 1
+    r.append(a[last:])
+
+    return "".join(r)
+
+def test():
+    a = "fo" * 30
+    b = "br" * 30
+    d = diff(a, b)
+    z = compress(d)
+    print `patchtext(d)`
+    #print `d`
+    print b == patch(a, d)
+    print len(d), len(z)
+
+test()
\ No newline at end of file
--- a/docs/CHANGES.aschremmer	Fri Jun 30 21:35:59 2006 +0200
+++ b/docs/CHANGES.aschremmer	Sat Jul 01 01:28:46 2006 +0200
@@ -14,6 +14,7 @@
     * XMLRPC multicall support
     * Conflict icon in RecentChanges
     * XMLRPC Authentication System
+    * Binary Diffing
 
   Bugfixes (only stuff that is buggy in moin/1.6 main branch):
     * ...
@@ -38,9 +39,13 @@
          Added xmlrpc multicall support into the server and
          backported the client code from python 2.4
 Week 23: Debian-Sprint in Extremadura, Spain. Initial thoughts about Mercurial as a base for syncronisation.
-Week 24: Evaluation of OpenID as a base for authentication, written local testing scripts 
+Week 24: Evaluation of OpenID as a base for authentication, written local testing scripts
 Week 25: Conference in Chile.
-Week 26: Implementation of the XMLRPC authentication system.
+Week 26: Implementation of the XMLRPC authentication system, added binary
+         diffing (mainly taken from Mercurial, but had to merge 5 changesets,
+         remove some mercurial dependencies and document it. Currently, Mercurial
+         uses a module written in C to solve the problem, so the Python code
+         was not cared for anymore.)
 
 Time plan
 =========