]> git.ipfire.org Git - thirdparty/git.git/blame - diff-delta.c
Change types used in bitfields to be `int's.
[thirdparty/git.git] / diff-delta.c
CommitLineData
a310d434
NP
1/*
2 * diff-delta.c: generate a delta between two buffers
3 *
4 * Many parts of this file have been lifted from LibXDiff version 0.10.
5 * http://www.xmailserver.org/xdiff-lib.html
6 *
7 * LibXDiff was written by Davide Libenzi <davidel@xmailserver.org>
8 * Copyright (C) 2003 Davide Libenzi
9 *
10 * Many mods for GIT usage by Nicolas Pitre <nico@cam.org>, (C) 2005.
11 *
12 * This file is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
16 *
17 * Use of this within git automatically means that the LGPL
18 * licensing gets turned into GPLv2 within this project.
19 */
20
21#include <stdlib.h>
8e1454b5 22#include <string.h>
a310d434
NP
23#include "delta.h"
24
63f17569 25#include "git-compat-util.h"
a310d434 26
08abe669
NP
27/* maximum hash entry list for the same hash bucket */
28#define HASH_LIMIT 64
a310d434 29
3dc5a9e4
NP
30#define RABIN_SHIFT 23
31#define RABIN_WINDOW 16
32
33static const unsigned int T[256] = {
34 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344,
35 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259,
36 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85,
37 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2,
38 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a,
39 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db,
40 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753,
41 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964,
42 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8,
43 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5,
44 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81,
45 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6,
46 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e,
47 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77,
48 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff,
49 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8,
50 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc,
51 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1,
52 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d,
53 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a,
54 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2,
55 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02,
56 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a,
57 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd,
58 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61,
59 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c,
60 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08,
61 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f,
62 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7,
63 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe,
64 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76,
65 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141,
66 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65,
67 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78,
68 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4,
69 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93,
70 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b,
71 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa,
72 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872,
73 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645,
74 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99,
75 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84,
76 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811
77};
78
79static const unsigned int U[256] = {
80 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a,
81 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48,
82 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511,
83 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d,
84 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8,
85 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe,
86 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb,
87 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937,
88 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e,
89 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c,
90 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d,
91 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1,
92 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4,
93 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa,
94 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef,
95 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263,
96 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302,
97 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000,
98 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59,
99 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5,
100 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90,
101 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7,
102 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2,
103 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e,
104 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467,
105 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765,
106 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604,
107 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88,
108 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd,
109 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3,
110 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996,
111 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a,
112 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b,
113 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609,
114 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50,
115 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc,
116 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99,
117 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf,
118 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa,
119 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176,
120 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f,
121 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d,
122 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a
123};
a310d434 124
08abe669 125struct index_entry {
a310d434 126 const unsigned char *ptr;
8e1454b5 127 unsigned int val;
08abe669 128 struct index_entry *next;
8e1454b5 129};
a310d434 130
08abe669
NP
131struct delta_index {
132 const void *src_buf;
133 unsigned long src_size;
3dc5a9e4 134 unsigned int hash_mask;
63f17569 135 struct index_entry *hash[FLEX_ARRAY];
08abe669
NP
136};
137
138struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
a310d434 139{
06a9f920 140 unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
08abe669
NP
141 const unsigned char *data, *buffer = buf;
142 struct delta_index *index;
143 struct index_entry *entry, **hash;
8e1454b5 144 void *mem;
06a9f920 145 unsigned long memsize;
8e1454b5 146
08abe669
NP
147 if (!buf || !bufsize)
148 return NULL;
149
3dc5a9e4
NP
150 /* Determine index hash size. Note that indexing skips the
151 first byte to allow for optimizing the rabin polynomial
152 initialization in create_delta(). */
153 entries = (bufsize - 1) / RABIN_WINDOW;
8e1454b5 154 hsize = entries / 4;
c13c6bf7 155 for (i = 4; (1 << i) < hsize && i < 31; i++);
8e1454b5 156 hsize = 1 << i;
3dc5a9e4 157 hmask = hsize - 1;
8e1454b5
NP
158
159 /* allocate lookup index */
06a9f920
NP
160 memsize = sizeof(*index) +
161 sizeof(*hash) * hsize +
162 sizeof(*entry) * entries;
163 mem = malloc(memsize);
8e1454b5
NP
164 if (!mem)
165 return NULL;
08abe669
NP
166 index = mem;
167 mem = index + 1;
8e1454b5 168 hash = mem;
08abe669
NP
169 mem = hash + hsize;
170 entry = mem;
171
172 index->src_buf = buf;
173 index->src_size = bufsize;
3dc5a9e4 174 index->hash_mask = hmask;
8e1454b5
NP
175 memset(hash, 0, hsize * sizeof(*hash));
176
c13c6bf7
NP
177 /* allocate an array to count hash entries */
178 hash_count = calloc(hsize, sizeof(*hash_count));
179 if (!hash_count) {
08abe669 180 free(index);
c13c6bf7
NP
181 return NULL;
182 }
183
184 /* then populate the index */
06a9f920
NP
185 prev_val = ~0;
186 for (data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW;
187 data >= buffer;
188 data -= RABIN_WINDOW) {
3dc5a9e4
NP
189 unsigned int val = 0;
190 for (i = 1; i <= RABIN_WINDOW; i++)
191 val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
06a9f920
NP
192 if (val == prev_val) {
193 /* keep the lowest of consecutive identical blocks */
194 entry[-1].ptr = data + RABIN_WINDOW;
195 } else {
196 prev_val = val;
197 i = val & hmask;
198 entry->ptr = data + RABIN_WINDOW;
199 entry->val = val;
200 entry->next = hash[i];
201 hash[i] = entry++;
202 hash_count[i]++;
06a9f920 203 }
3dc5a9e4 204 }
a310d434 205
c13c6bf7
NP
206 /*
207 * Determine a limit on the number of entries in the same hash
208 * bucket. This guard us against patological data sets causing
209 * really bad hash distribution with most entries in the same hash
210 * bucket that would bring us to O(m*n) computing costs (m and n
211 * corresponding to reference and target buffer sizes).
212 *
08abe669 213 * Make sure none of the hash buckets has more entries than
c13c6bf7
NP
214 * we're willing to test. Otherwise we cull the entry list
215 * uniformly to still preserve a good repartition across
216 * the reference buffer.
217 */
218 for (i = 0; i < hsize; i++) {
08abe669 219 if (hash_count[i] < HASH_LIMIT)
c13c6bf7
NP
220 continue;
221 entry = hash[i];
222 do {
08abe669
NP
223 struct index_entry *keep = entry;
224 int skip = hash_count[i] / HASH_LIMIT / 2;
c13c6bf7
NP
225 do {
226 entry = entry->next;
227 } while(--skip && entry);
228 keep->next = entry;
229 } while(entry);
230 }
231 free(hash_count);
232
08abe669
NP
233 return index;
234}
235
236void free_delta_index(struct delta_index *index)
237{
238 free(index);
a310d434
NP
239}
240
3dc5a9e4
NP
241/*
242 * The maximum size for any opcode sequence, including the initial header
243 * plus rabin window plus biggest copy.
244 */
245#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7)
fe474b58 246
08abe669
NP
247void *
248create_delta(const struct delta_index *index,
249 const void *trg_buf, unsigned long trg_size,
250 unsigned long *delta_size, unsigned long max_size)
a310d434 251{
2d08e5dd 252 unsigned int i, outpos, outsize, val;
5a1fb2ca 253 int inscnt;
8e1454b5
NP
254 const unsigned char *ref_data, *ref_top, *data, *top;
255 unsigned char *out;
a310d434 256
08abe669 257 if (!trg_buf || !trg_size)
8e1454b5
NP
258 return NULL;
259
a310d434
NP
260 outpos = 0;
261 outsize = 8192;
fe474b58
NP
262 if (max_size && outsize >= max_size)
263 outsize = max_size + MAX_OP_SIZE + 1;
a310d434 264 out = malloc(outsize);
08abe669 265 if (!out)
a310d434 266 return NULL;
a310d434
NP
267
268 /* store reference buffer size */
08abe669
NP
269 i = index->src_size;
270 while (i >= 0x80) {
271 out[outpos++] = i | 0x80;
272 i >>= 7;
69a2d426 273 }
08abe669 274 out[outpos++] = i;
a310d434
NP
275
276 /* store target buffer size */
08abe669
NP
277 i = trg_size;
278 while (i >= 0x80) {
279 out[outpos++] = i | 0x80;
280 i >>= 7;
69a2d426 281 }
08abe669 282 out[outpos++] = i;
a310d434 283
08abe669
NP
284 ref_data = index->src_buf;
285 ref_top = ref_data + index->src_size;
286 data = trg_buf;
287 top = trg_buf + trg_size;
3dc5a9e4
NP
288
289 outpos++;
290 val = 0;
291 for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) {
292 out[outpos++] = *data;
293 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
294 }
295 inscnt = i;
a310d434 296
8e1454b5
NP
297 while (data < top) {
298 unsigned int moff = 0, msize = 0;
08abe669 299 struct index_entry *entry;
3dc5a9e4
NP
300 val ^= U[data[-RABIN_WINDOW]];
301 val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
2d08e5dd 302 i = val & index->hash_mask;
08abe669
NP
303 for (entry = index->hash[i]; entry; entry = entry->next) {
304 const unsigned char *ref = entry->ptr;
305 const unsigned char *src = data;
306 unsigned int ref_size = ref_top - ref;
307 if (entry->val != val)
308 continue;
309 if (ref_size > top - src)
310 ref_size = top - src;
311 if (ref_size > 0x10000)
312 ref_size = 0x10000;
313 if (ref_size <= msize)
314 break;
315 while (ref_size-- && *src++ == *ref)
316 ref++;
317 if (msize < ref - entry->ptr) {
318 /* this is our best match so far */
319 msize = ref - entry->ptr;
320 moff = entry->ptr - ref_data;
a310d434
NP
321 }
322 }
323
3dc5a9e4 324 if (msize < 4) {
a310d434
NP
325 if (!inscnt)
326 outpos++;
327 out[outpos++] = *data++;
328 inscnt++;
329 if (inscnt == 0x7f) {
330 out[outpos - inscnt - 1] = inscnt;
331 inscnt = 0;
332 }
333 } else {
8e1454b5
NP
334 unsigned char *op;
335
3dc5a9e4
NP
336 if (msize >= RABIN_WINDOW) {
337 const unsigned char *sk;
338 sk = data + msize - RABIN_WINDOW;
339 val = 0;
340 for (i = 0; i < RABIN_WINDOW; i++)
341 val = ((val << 8) | *sk++) ^ T[val >> RABIN_SHIFT];
342 } else {
343 const unsigned char *sk = data + 1;
344 for (i = 1; i < msize; i++) {
345 val ^= U[sk[-RABIN_WINDOW]];
346 val = ((val << 8) | *sk++) ^ T[val >> RABIN_SHIFT];
347 }
348 }
349
a310d434 350 if (inscnt) {
5a1fb2ca
NP
351 while (moff && ref_data[moff-1] == data[-1]) {
352 if (msize == 0x10000)
353 break;
354 /* we can match one byte back */
355 msize++;
356 moff--;
357 data--;
358 outpos--;
359 if (--inscnt)
360 continue;
361 outpos--; /* remove count slot */
362 inscnt--; /* make it -1 */
363 break;
364 }
a310d434
NP
365 out[outpos - inscnt - 1] = inscnt;
366 inscnt = 0;
367 }
368
369 data += msize;
8e1454b5 370 op = out + outpos++;
a310d434
NP
371 i = 0x80;
372
373 if (moff & 0xff) { out[outpos++] = moff; i |= 0x01; }
374 moff >>= 8;
375 if (moff & 0xff) { out[outpos++] = moff; i |= 0x02; }
376 moff >>= 8;
377 if (moff & 0xff) { out[outpos++] = moff; i |= 0x04; }
378 moff >>= 8;
379 if (moff & 0xff) { out[outpos++] = moff; i |= 0x08; }
380
381 if (msize & 0xff) { out[outpos++] = msize; i |= 0x10; }
382 msize >>= 8;
383 if (msize & 0xff) { out[outpos++] = msize; i |= 0x20; }
384
8e1454b5 385 *op = i;
a310d434
NP
386 }
387
fe474b58 388 if (outpos >= outsize - MAX_OP_SIZE) {
a310d434
NP
389 void *tmp = out;
390 outsize = outsize * 3 / 2;
fe474b58
NP
391 if (max_size && outsize >= max_size)
392 outsize = max_size + MAX_OP_SIZE + 1;
393 if (max_size && outpos > max_size)
3dc5a9e4
NP
394 break;
395 out = realloc(out, outsize);
a310d434
NP
396 if (!out) {
397 free(tmp);
a310d434
NP
398 return NULL;
399 }
400 }
401 }
402
403 if (inscnt)
404 out[outpos - inscnt - 1] = inscnt;
405
3dc5a9e4
NP
406 if (max_size && outpos > max_size) {
407 free(out);
408 return NULL;
409 }
410
a310d434
NP
411 *delta_size = outpos;
412 return out;
413}