annotate MoinMoin/util/bdiff.py @ 961:21eb4cb11e2c

Added binary diffing! Not much left for the getDiff function.
author Alexander Schremmer <alex AT alexanderweb DOT de>
date Sat, 01 Jul 2006 01:28:46 +0200
parents
children 930c9e82a60b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
961
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
1 # Binary patching and diffing
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
2 #
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
3 # Copyright 2005 Matt Mackall <mpm@selenic.com>
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
4 # Copyright 2006 MoinMoin:AlexanderSchremmer
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
5 #
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
6 # Algorithm taken from mercurial's mdiff.py
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
7 #
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
8 # This software may be used and distributed according to the terms
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
9 # of the GNU General Public License, incorporated herein by reference.
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
10
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
11 import zlib, difflib, struct
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
12
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
13 BDIFF_PATT = ">lll"
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
14
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
15 def compress(text):
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
16 return zlib.compress(text) # here we could tune the compression level
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
17
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
18 def decompress(bin):
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
19 return zlib.decompress(bin)
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
20
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
21 def diff(a, b):
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
22 """ Generates a binary diff of the passed strings. """
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
23 if not a:
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
24 return b and (struct.pack(BDIFF_PATT, 0, 0, len(b)) + b)
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
25
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
26 bin = []
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
27 la = lb = 0
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
28
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
29 p = [0]
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
30 for i in a: p.append(p[-1] + len(i))
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
31
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
32 for am, bm, size in difflib.SequenceMatcher(None, a, b).get_matching_blocks():
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
33 s = "".join(b[lb:bm])
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
34 if am > la or s:
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
35 bin.append(struct.pack(BDIFF_PATT, p[la], p[am], len(s)) + s)
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
36 la = am + size
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
37 lb = bm + size
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
38
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
39 return "".join(bin)
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
40
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
41 def patchtext(bin):
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
42 """ Returns the new hunks that are contained in a binary diff."""
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
43 pos = 0
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
44 t = []
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
45 while pos < len(bin):
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
46 p1, p2, l = struct.unpack(BDIFF_PATT, bin[pos:pos + 12])
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
47 pos += 12
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
48 t.append(bin[pos:pos + l])
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
49 pos += l
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
50 return "".join(t)
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
51
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
52 def patch(a, bin):
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
53 """ Patches the string a with the binary patch bin. """
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
54 c = last = pos = 0
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
55 r = []
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
56
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
57 while pos < len(bin):
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
58 p1, p2, l = struct.unpack(BDIFF_PATT, bin[pos:pos + 12])
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
59 pos += 12
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
60 r.append(a[last:p1])
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
61 r.append(bin[pos:pos + l])
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
62 pos += l
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
63 last = p2
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
64 c += 1
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
65 r.append(a[last:])
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
66
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
67 return "".join(r)
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
68
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
69 def test():
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
70 a = "fo" * 30
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
71 b = "br" * 30
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
72 d = diff(a, b)
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
73 z = compress(d)
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
74 print `patchtext(d)`
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
75 #print `d`
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
76 print b == patch(a, d)
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
77 print len(d), len(z)
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
78
21eb4cb11e2c Added binary diffing! Not much left for the getDiff function.
Alexander Schremmer <alex AT alexanderweb DOT de>
parents:
diff changeset
79 test()