]> git.ipfire.org Git - thirdparty/git.git/blame - diff-delta.c
GIT-VERSION-FILE: check ./version first.
[thirdparty/git.git] / diff-delta.c
CommitLineData
a310d434
NP
1/*
2 * diff-delta.c: generate a delta between two buffers
3 *
4 * Many parts of this file have been lifted from LibXDiff version 0.10.
5 * http://www.xmailserver.org/xdiff-lib.html
6 *
7 * LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
8 * Copyright (C) 2003 Davide Libenzi
9 *
10 * Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
11 *
12 * This file is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * Use of this within git automatically means that the LGPL
18 * licensing gets turned into GPLv2 within this project.
19 */
20
63f17569 21#include "git-compat-util.h"
85023577 22#include "delta.h"
a310d434 23
08abe669
NP
24/* maximum hash entry list for the same hash bucket */
25#define HASH_LIMIT 64
a310d434 26
3dc5a9e4
NP
27#define RABIN_SHIFT 23
28#define RABIN_WINDOW 16
29
30static const unsigned int T[256] = {
31 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
32 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
33 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
34 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
35 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
36 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
37 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
38 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
39 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
40 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
41 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
42 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
43 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
44 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
45 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
46 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
47 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
48 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
49 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
50 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
51 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
52 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
53 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
54 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
55 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
56 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
57 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
58 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
59 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
60 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
61 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
62 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
63 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
64 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
65 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
66 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
67 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
68 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
69 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
70 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
71 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
72 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
73 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
74};
75
76static const unsigned int U[256] = {
77 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
78 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
79 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
80 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
81 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
82 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
83 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
84 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
85 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
86 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
87 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
88 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
89 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
90 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
91 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
92 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
93 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
94 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
95 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
96 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
97 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
98 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
99 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
100 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
101 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
102 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
103 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
104 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
105 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
106 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
107 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
108 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
109 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
110 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
111 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
112 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
113 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
114 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
115 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
116 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
117 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
118 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
119 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
120};
a310d434 121
08abe669 122struct index_entry {
a310d434 123 const unsigned char *ptr;
8e1454b5 124 unsigned int val;
08abe669 125 struct index_entry *next;
8e1454b5 126};
a310d434 127
08abe669
NP
128struct delta_index {
129 const void *src_buf;
130 unsigned long src_size;
3dc5a9e4 131 unsigned int hash_mask;
63f17569 132 struct index_entry *hash[FLEX_ARRAY];
08abe669
NP
133};
134
135struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
a310d434 136{
06a9f920 137 unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
08abe669
NP
138 const unsigned char *data, *buffer = buf;
139 struct delta_index *index;
140 struct index_entry *entry, **hash;
8e1454b5 141 void *mem;
06a9f920 142 unsigned long memsize;
8e1454b5 143
08abe669
NP
144 if (!buf || !bufsize)
145 return NULL;
146
3dc5a9e4 147 /* Determine index hash size. Note that indexing skips the
82e5a82f 148 first byte to allow for optimizing the Rabin's polynomial
3dc5a9e4
NP
149 initialization in create_delta(). */
150 entries = (bufsize - 1) / RABIN_WINDOW;
8e1454b5 151 hsize = entries / 4;
b05faa2d 152 for (i = 4; (1u << i) < hsize && i < 31; i++);
8e1454b5 153 hsize = 1 << i;
3dc5a9e4 154 hmask = hsize - 1;
8e1454b5
NP
155
156 /* allocate lookup index */
06a9f920
NP
157 memsize = sizeof(*index) +
158 sizeof(*hash) * hsize +
159 sizeof(*entry) * entries;
160 mem = malloc(memsize);
8e1454b5
NP
161 if (!mem)
162 return NULL;
08abe669
NP
163 index = mem;
164 mem = index + 1;
8e1454b5 165 hash = mem;
08abe669
NP
166 mem = hash + hsize;
167 entry = mem;
168
169 index->src_buf = buf;
170 index->src_size = bufsize;
3dc5a9e4 171 index->hash_mask = hmask;
8e1454b5
NP
172 memset(hash, 0, hsize * sizeof(*hash));
173
c13c6bf7
NP
174 /* allocate an array to count hash entries */
175 hash_count = calloc(hsize, sizeof(*hash_count));
176 if (!hash_count) {
08abe669 177 free(index);
c13c6bf7
NP
178 return NULL;
179 }
180
181 /* then populate the index */
06a9f920
NP
182 prev_val = ~0;
183 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
184 data >= buffer;
185 data -= RABIN_WINDOW) {
3dc5a9e4
NP
186 unsigned int val = 0;
187 for (i = 1; i <= RABIN_WINDOW; i++)
188 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
06a9f920
NP
189 if (val == prev_val) {
190 /* keep the lowest of consecutive identical blocks */
191 entry[-1].ptr = data + RABIN_WINDOW;
192 } else {
193 prev_val = val;
194 i = val & hmask;
195 entry->ptr = data + RABIN_WINDOW;
196 entry->val = val;
197 entry->next = hash[i];
198 hash[i] = entry++;
199 hash_count[i]++;
06a9f920 200 }
3dc5a9e4 201 }
a310d434 202
c13c6bf7
NP
203 /*
204 * Determine a limit on the number of entries in the same hash
82e5a82f 205 * bucket. This guards us against pathological data sets causing
c13c6bf7
NP
206 * really bad hash distribution with most entries in the same hash
207 * bucket that would bring us to O(m*n) computing costs (m and n
208 * corresponding to reference and target buffer sizes).
209 *
08abe669 210 * Make sure none of the hash buckets has more entries than
c13c6bf7
NP
211 * we're willing to test. Otherwise we cull the entry list
212 * uniformly to still preserve a good repartition across
213 * the reference buffer.
214 */
215 for (i = 0; i < hsize; i++) {
08abe669 216 if (hash_count[i] < HASH_LIMIT)
c13c6bf7
NP
217 continue;
218 entry = hash[i];
219 do {
08abe669
NP
220 struct index_entry *keep = entry;
221 int skip = hash_count[i] / HASH_LIMIT / 2;
c13c6bf7
NP
222 do {
223 entry = entry->next;
224 } while(--skip && entry);
225 keep->next = entry;
226 } while(entry);
227 }
228 free(hash_count);
229
08abe669
NP
230 return index;
231}
232
233void free_delta_index(struct delta_index *index)
234{
235 free(index);
a310d434
NP
236}
237
3dc5a9e4
NP
238/*
239 * The maximum size for any opcode sequence, including the initial header
82e5a82f 240 * plus Rabin window plus biggest copy.
3dc5a9e4
NP
241 */
242#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
fe474b58 243
08abe669
NP
244void *
245create_delta(const struct delta_index *index,
246 const void *trg_buf, unsigned long trg_size,
247 unsigned long *delta_size, unsigned long max_size)
a310d434 248{
2d08e5dd 249 unsigned int i, outpos, outsize, val;
5a1fb2ca 250 int inscnt;
8e1454b5
NP
251 const unsigned char *ref_data, *ref_top, *data, *top;
252 unsigned char *out;
a310d434 253
08abe669 254 if (!trg_buf || !trg_size)
8e1454b5
NP
255 return NULL;
256
a310d434
NP
257 outpos = 0;
258 outsize = 8192;
fe474b58
NP
259 if (max_size && outsize >= max_size)
260 outsize = max_size + MAX_OP_SIZE + 1;
a310d434 261 out = malloc(outsize);
08abe669 262 if (!out)
a310d434 263 return NULL;
a310d434
NP
264
265 /* store reference buffer size */
08abe669
NP
266 i = index->src_size;
267 while (i >= 0x80) {
268 out[outpos++] = i | 0x80;
269 i >>= 7;
69a2d426 270 }
08abe669 271 out[outpos++] = i;
a310d434
NP
272
273 /* store target buffer size */
08abe669
NP
274 i = trg_size;
275 while (i >= 0x80) {
276 out[outpos++] = i | 0x80;
277 i >>= 7;
69a2d426 278 }
08abe669 279 out[outpos++] = i;
a310d434 280
08abe669
NP
281 ref_data = index->src_buf;
282 ref_top = ref_data + index->src_size;
283 data = trg_buf;
1d7f171c 284 top = (const unsigned char *) trg_buf + trg_size;
3dc5a9e4
NP
285
286 outpos++;
287 val = 0;
288 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
289 out[outpos++] = *data;
290 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
291 }
292 inscnt = i;
a310d434 293
8e1454b5
NP
294 while (data < top) {
295 unsigned int moff = 0, msize = 0;
08abe669 296 struct index_entry *entry;
3dc5a9e4
NP
297 val ^= U[data[-RABIN_WINDOW]];
298 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
2d08e5dd 299 i = val & index->hash_mask;
08abe669
NP
300 for (entry = index->hash[i]; entry; entry = entry->next) {
301 const unsigned char *ref = entry->ptr;
302 const unsigned char *src = data;
303 unsigned int ref_size = ref_top - ref;
304 if (entry->val != val)
305 continue;
306 if (ref_size > top - src)
307 ref_size = top - src;
29f049a0
JH
308 if (ref_size > 0x10000)
309 ref_size = 0x10000;
08abe669
NP
310 if (ref_size <= msize)
311 break;
312 while (ref_size-- && *src++ == *ref)
313 ref++;
314 if (msize < ref - entry->ptr) {
315 /* this is our best match so far */
316 msize = ref - entry->ptr;
317 moff = entry->ptr - ref_data;
a310d434
NP
318 }
319 }
320
3dc5a9e4 321 if (msize < 4) {
a310d434
NP
322 if (!inscnt)
323 outpos++;
324 out[outpos++] = *data++;
325 inscnt++;
326 if (inscnt == 0x7f) {
327 out[outpos - inscnt - 1] = inscnt;
328 inscnt = 0;
329 }
330 } else {
8e1454b5
NP
331 unsigned char *op;
332
3dc5a9e4
NP
333 if (msize >= RABIN_WINDOW) {
334 const unsigned char *sk;
335 sk = data + msize - RABIN_WINDOW;
336 val = 0;
337 for (i = 0; i < RABIN_WINDOW; i++)
338 val = ((val << 8) | *sk++) ^ T[val >> RABIN_SHIFT];
339 } else {
340 const unsigned char *sk = data + 1;
341 for (i = 1; i < msize; i++) {
342 val ^= U[sk[-RABIN_WINDOW]];
343 val = ((val << 8) | *sk++) ^ T[val >> RABIN_SHIFT];
344 }
345 }
346
a310d434 347 if (inscnt) {
5a1fb2ca
NP
348 while (moff && ref_data[moff-1] == data[-1]) {
349 if (msize == 0x10000)
350 break;
351 /* we can match one byte back */
352 msize++;
353 moff--;
354 data--;
355 outpos--;
356 if (--inscnt)
357 continue;
358 outpos--; /* remove count slot */
359 inscnt--; /* make it -1 */
360 break;
361 }
a310d434
NP
362 out[outpos - inscnt - 1] = inscnt;
363 inscnt = 0;
364 }
365
366 data += msize;
8e1454b5 367 op = out + outpos++;
a310d434
NP
368 i = 0x80;
369
370 if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
371 moff >>= 8;
372 if (moff & 0xff) { out[outpos++] = moff; i |= 0x02; }
373 moff >>= 8;
374 if (moff & 0xff) { out[outpos++] = moff; i |= 0x04; }
375 moff >>= 8;
376 if (moff & 0xff) { out[outpos++] = moff; i |= 0x08; }
377
378 if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; }
379 msize >>= 8;
380 if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
381
8e1454b5 382 *op = i;
a310d434
NP
383 }
384
fe474b58 385 if (outpos >= outsize - MAX_OP_SIZE) {
a310d434
NP
386 void *tmp = out;
387 outsize = outsize * 3 / 2;
fe474b58
NP
388 if (max_size && outsize >= max_size)
389 outsize = max_size + MAX_OP_SIZE + 1;
390 if (max_size && outpos > max_size)
3dc5a9e4 391 break;
83572c1a 392 out = xrealloc(out, outsize);
a310d434
NP
393 if (!out) {
394 free(tmp);
a310d434
NP
395 return NULL;
396 }
397 }
398 }
399
400 if (inscnt)
401 out[outpos - inscnt - 1] = inscnt;
402
3dc5a9e4
NP
403 if (max_size && outpos > max_size) {
404 free(out);
405 return NULL;
406 }
407
a310d434
NP
408 *delta_size = outpos;
409 return out;
410}