]>
Commit | Line | Data |
---|---|---|
e002a16b NP |
1 | /* |
2 | * Deltafication of a GIT database. | |
3 | * | |
4 | * (C) 2005 Nicolas Pitre <nico@cam.org> | |
5 | * | |
6 | * This code is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
10 | ||
11 | #include "cache.h" | |
12 | #include "delta.h" | |
13 | ||
14 | static int replace_object(char *buf, unsigned long size, unsigned char *sha1) | |
15 | { | |
16 | char tmpfile[PATH_MAX]; | |
17 | int fd; | |
18 | ||
19 | snprintf(tmpfile, sizeof(tmpfile), "%s/obj_XXXXXX", get_object_directory()); | |
20 | fd = mkstemp(tmpfile); | |
21 | if (fd < 0) | |
22 | return error("%s: %s\n", tmpfile, strerror(errno)); | |
23 | if (write(fd, buf, size) != size) { | |
24 | perror("unable to write file"); | |
25 | close(fd); | |
26 | unlink(tmpfile); | |
27 | return -1; | |
28 | } | |
29 | fchmod(fd, 0444); | |
30 | close(fd); | |
31 | if (rename(tmpfile, sha1_file_name(sha1))) { | |
32 | perror("unable to replace original object"); | |
33 | unlink(tmpfile); | |
34 | return -1; | |
35 | } | |
36 | return 0; | |
37 | } | |
38 | ||
d565b341 MK |
39 | static void *create_object(unsigned char *buf, unsigned long len, |
40 | char *hdr, int hdrlen, unsigned long *retsize) | |
e002a16b | 41 | { |
d565b341 | 42 | unsigned char *compressed; |
e002a16b NP |
43 | unsigned long size; |
44 | z_stream stream; | |
45 | ||
46 | /* Set it up */ | |
47 | memset(&stream, 0, sizeof(stream)); | |
48 | deflateInit(&stream, Z_BEST_COMPRESSION); | |
49 | size = deflateBound(&stream, len+hdrlen); | |
50 | compressed = xmalloc(size); | |
51 | ||
52 | /* Compress it */ | |
53 | stream.next_out = compressed; | |
54 | stream.avail_out = size; | |
55 | ||
56 | /* First header.. */ | |
d565b341 | 57 | stream.next_in = (unsigned char *)hdr; |
e002a16b NP |
58 | stream.avail_in = hdrlen; |
59 | while (deflate(&stream, 0) == Z_OK) | |
60 | /* nothing */; | |
61 | ||
62 | /* Then the data itself.. */ | |
63 | stream.next_in = buf; | |
64 | stream.avail_in = len; | |
65 | while (deflate(&stream, Z_FINISH) == Z_OK) | |
66 | /* nothing */; | |
67 | deflateEnd(&stream); | |
68 | *retsize = stream.total_out; | |
69 | return compressed; | |
70 | } | |
71 | ||
d565b341 | 72 | static int restore_original_object(unsigned char *buf, unsigned long len, |
e002a16b NP |
73 | char *type, unsigned char *sha1) |
74 | { | |
75 | char hdr[50]; | |
76 | int hdrlen, ret; | |
77 | void *compressed; | |
78 | unsigned long size; | |
79 | ||
80 | hdrlen = sprintf(hdr, "%s %lu", type, len)+1; | |
81 | compressed = create_object(buf, len, hdr, hdrlen, &size); | |
82 | ret = replace_object(compressed, size, sha1); | |
83 | free(compressed); | |
84 | return ret; | |
85 | } | |
86 | ||
d565b341 | 87 | static void *create_delta_object(unsigned char *buf, unsigned long len, |
e002a16b NP |
88 | unsigned char *sha1_ref, unsigned long *size) |
89 | { | |
90 | char hdr[50]; | |
91 | int hdrlen; | |
92 | ||
93 | /* Generate the header + sha1 of reference for delta */ | |
94 | hdrlen = sprintf(hdr, "delta %lu", len+20)+1; | |
95 | memcpy(hdr + hdrlen, sha1_ref, 20); | |
96 | hdrlen += 20; | |
97 | ||
98 | return create_object(buf, len, hdr, hdrlen, size); | |
99 | } | |
100 | ||
53d4b460 NP |
101 | static void *get_buffer(unsigned char *sha1, char *type, |
102 | unsigned long *size, unsigned long *compsize) | |
e002a16b NP |
103 | { |
104 | unsigned long mapsize; | |
105 | void *map = map_sha1_file(sha1, &mapsize); | |
106 | if (map) { | |
107 | void *buffer = unpack_sha1_file(map, mapsize, type, size); | |
108 | munmap(map, mapsize); | |
53d4b460 NP |
109 | if (compsize) |
110 | *compsize = mapsize; | |
e002a16b NP |
111 | if (buffer) |
112 | return buffer; | |
113 | } | |
114 | error("unable to get object %s", sha1_to_hex(sha1)); | |
115 | return NULL; | |
116 | } | |
117 | ||
53d4b460 NP |
118 | static void *expand_delta(void *delta, unsigned long *size, char *type, |
119 | unsigned int *depth, unsigned char **links) | |
e002a16b NP |
120 | { |
121 | void *buf = NULL; | |
53d4b460 NP |
122 | unsigned int level = (*depth)++; |
123 | if (*size < 20) { | |
e002a16b NP |
124 | error("delta object is bad"); |
125 | free(delta); | |
126 | } else { | |
127 | unsigned long ref_size; | |
53d4b460 | 128 | void *ref = get_buffer(delta, type, &ref_size, NULL); |
e002a16b | 129 | if (ref && !strcmp(type, "delta")) |
53d4b460 NP |
130 | ref = expand_delta(ref, &ref_size, type, depth, links); |
131 | else if (ref) | |
132 | { | |
133 | *links = xmalloc(*depth * 20); | |
134 | } | |
135 | if (ref) { | |
136 | buf = patch_delta(ref, ref_size, delta+20, *size-20, size); | |
137 | free(ref); | |
138 | if (buf) | |
139 | memcpy(*links + level*20, delta, 20); | |
140 | else | |
141 | free(*links); | |
142 | } | |
e002a16b NP |
143 | free(delta); |
144 | } | |
145 | return buf; | |
146 | } | |
147 | ||
148 | static char *mkdelta_usage = | |
53d4b460 NP |
149 | "mkdelta [--max-depth=N] [--max-behind=N] <reference_sha1> <target_sha1> [<next_sha1> ...]"; |
150 | ||
151 | struct delta { | |
152 | unsigned char sha1[20]; /* object sha1 */ | |
153 | unsigned long size; /* object size */ | |
154 | void *buf; /* object content */ | |
155 | unsigned char *links; /* delta reference links */ | |
156 | unsigned int depth; /* delta depth */ | |
157 | }; | |
158 | ||
e002a16b NP |
159 | int main(int argc, char **argv) |
160 | { | |
53d4b460 NP |
161 | struct delta *ref, trg; |
162 | char ref_type[20], trg_type[20], *skip_reason; | |
163 | void *best_buf; | |
164 | unsigned long best_size, orig_size, orig_compsize; | |
165 | unsigned int r, orig_ref, best_ref, nb_refs, next_ref, max_refs = 0; | |
166 | unsigned int i, duplicate, skip_lvl, verbose = 0, quiet = 0; | |
167 | unsigned int max_depth = -1; | |
e002a16b NP |
168 | |
169 | for (i = 1; i < argc; i++) { | |
170 | if (!strcmp(argv[i], "-v")) { | |
171 | verbose = 1; | |
53d4b460 NP |
172 | quiet = 0; |
173 | } else if (!strcmp(argv[i], "-q")) { | |
174 | quiet = 1; | |
175 | verbose = 0; | |
e002a16b | 176 | } else if (!strcmp(argv[i], "-d") && i+1 < argc) { |
53d4b460 | 177 | max_depth = atoi(argv[++i]); |
e002a16b | 178 | } else if (!strncmp(argv[i], "--max-depth=", 12)) { |
53d4b460 NP |
179 | max_depth = atoi(argv[i]+12); |
180 | } else if (!strcmp(argv[i], "-b") && i+1 < argc) { | |
181 | max_refs = atoi(argv[++i]); | |
182 | } else if (!strncmp(argv[i], "--max-behind=", 13)) { | |
183 | max_refs = atoi(argv[i]+13); | |
e002a16b NP |
184 | } else |
185 | break; | |
186 | } | |
187 | ||
53d4b460 | 188 | if (i + (max_depth != 0) >= argc) |
e002a16b NP |
189 | usage(mkdelta_usage); |
190 | ||
53d4b460 NP |
191 | if (!max_refs || max_refs > argc - i) |
192 | max_refs = argc - i; | |
193 | ref = xmalloc(max_refs * sizeof(*ref)); | |
194 | for (r = 0; r < max_refs; r++) | |
195 | ref[r].buf = ref[r].links = NULL; | |
196 | next_ref = nb_refs = 0; | |
e002a16b | 197 | |
53d4b460 NP |
198 | do { |
199 | if (get_sha1(argv[i], trg.sha1)) | |
e002a16b | 200 | die("bad sha1 %s", argv[i]); |
53d4b460 NP |
201 | trg.buf = get_buffer(trg.sha1, trg_type, &trg.size, &orig_compsize); |
202 | if (trg.buf && !trg.size) { | |
e002a16b NP |
203 | if (verbose) |
204 | printf("skip %s (object is empty)\n", argv[i]); | |
205 | continue; | |
206 | } | |
53d4b460 NP |
207 | orig_size = trg.size; |
208 | orig_ref = -1; | |
209 | trg.depth = 0; | |
210 | trg.links = NULL; | |
211 | if (trg.buf && !strcmp(trg_type, "delta")) { | |
212 | for (r = 0; r < nb_refs; r++) | |
213 | if (!memcmp(trg.buf, ref[r].sha1, 20)) | |
214 | break; | |
215 | if (r < nb_refs) { | |
216 | /* no need to reload the reference object */ | |
217 | trg.depth = ref[r].depth + 1; | |
218 | trg.links = xmalloc(trg.depth*20); | |
219 | memcpy(trg.links, trg.buf, 20); | |
220 | memcpy(trg.links+20, ref[r].links, ref[r].depth*20); | |
221 | trg.buf = patch_delta(ref[r].buf, ref[r].size, | |
222 | trg.buf+20, trg.size-20, | |
223 | &trg.size); | |
224 | strcpy(trg_type, ref_type); | |
225 | orig_ref = r; | |
226 | } else { | |
227 | trg.buf = expand_delta(trg.buf, &trg.size, trg_type, | |
228 | &trg.depth, &trg.links); | |
e002a16b | 229 | } |
e002a16b | 230 | } |
53d4b460 NP |
231 | if (!trg.buf) |
232 | die("unable to read target object %s", argv[i]); | |
e002a16b | 233 | |
53d4b460 NP |
234 | if (!nb_refs) { |
235 | strcpy(ref_type, trg_type); | |
236 | } else if (max_depth && strcmp(ref_type, trg_type)) { | |
e002a16b | 237 | die("type mismatch for object %s", argv[i]); |
e002a16b NP |
238 | } |
239 | ||
53d4b460 NP |
240 | duplicate = 0; |
241 | best_buf = NULL; | |
242 | best_size = -1; | |
243 | best_ref = -1; | |
244 | skip_lvl = 0; | |
245 | skip_reason = NULL; | |
246 | for (r = 0; max_depth && r < nb_refs; r++) { | |
247 | void *delta_buf, *comp_buf; | |
248 | unsigned long delta_size, comp_size; | |
249 | unsigned int l; | |
250 | ||
251 | duplicate = !memcmp(trg.sha1, ref[r].sha1, 20); | |
252 | if (duplicate) { | |
253 | skip_reason = "already seen"; | |
254 | break; | |
255 | } | |
256 | if (ref[r].depth >= max_depth) { | |
257 | if (skip_lvl < 1) { | |
258 | skip_reason = "exceeding max link depth"; | |
259 | skip_lvl = 1; | |
260 | } | |
261 | continue; | |
262 | } | |
263 | for (l = 0; l < ref[r].depth; l++) | |
264 | if (!memcmp(trg.sha1, ref[r].links + l*20, 20)) | |
265 | break; | |
266 | if (l != ref[r].depth) { | |
267 | if (skip_lvl < 2) { | |
268 | skip_reason = "would create a loop"; | |
269 | skip_lvl = 2; | |
270 | } | |
271 | continue; | |
272 | } | |
273 | if (trg.depth < max_depth && r == orig_ref) { | |
274 | if (skip_lvl < 3) { | |
275 | skip_reason = "delta already in place"; | |
276 | skip_lvl = 3; | |
277 | } | |
278 | continue; | |
279 | } | |
280 | delta_buf = diff_delta(ref[r].buf, ref[r].size, | |
75c42d8c LT |
281 | trg.buf, trg.size, |
282 | &delta_size, ~0UL); | |
53d4b460 NP |
283 | if (!delta_buf) |
284 | die("out of memory"); | |
285 | if (trg.depth < max_depth && | |
286 | delta_size+20 >= orig_size) { | |
287 | /* no need to even try to compress if original | |
288 | object is smaller than this delta */ | |
289 | free(delta_buf); | |
290 | if (skip_lvl < 4) { | |
291 | skip_reason = "no size reduction"; | |
292 | skip_lvl = 4; | |
293 | } | |
294 | continue; | |
295 | } | |
296 | comp_buf = create_delta_object(delta_buf, delta_size, | |
297 | ref[r].sha1, &comp_size); | |
298 | if (!comp_buf) | |
299 | die("out of memory"); | |
300 | free(delta_buf); | |
301 | if (trg.depth < max_depth && | |
302 | comp_size >= orig_compsize) { | |
303 | free(comp_buf); | |
304 | if (skip_lvl < 5) { | |
305 | skip_reason = "no size reduction"; | |
306 | skip_lvl = 5; | |
307 | } | |
308 | continue; | |
309 | } | |
310 | if ((comp_size < best_size) || | |
311 | (comp_size == best_size && | |
312 | ref[r].depth < ref[best_ref].depth)) { | |
313 | free(best_buf); | |
314 | best_buf = comp_buf; | |
315 | best_size = comp_size; | |
316 | best_ref = r; | |
317 | } | |
e002a16b NP |
318 | } |
319 | ||
53d4b460 NP |
320 | if (best_buf) { |
321 | if (replace_object(best_buf, best_size, trg.sha1)) | |
e002a16b | 322 | die("unable to write delta for %s", argv[i]); |
53d4b460 NP |
323 | free(best_buf); |
324 | free(trg.links); | |
325 | trg.depth = ref[best_ref].depth + 1; | |
326 | trg.links = xmalloc(trg.depth*20); | |
327 | memcpy(trg.links, ref[best_ref].sha1, 20); | |
328 | memcpy(trg.links+20, ref[best_ref].links, ref[best_ref].depth*20); | |
329 | if (!quiet) | |
330 | printf("delta %s (size=%ld.%02ld%% depth=%d dist=%d)\n", | |
331 | argv[i], best_size*100 / orig_compsize, | |
332 | (best_size*10000 / orig_compsize)%100, | |
333 | trg.depth, | |
334 | (next_ref - best_ref + max_refs) | |
335 | % (max_refs + 1) + 1); | |
336 | } else if (trg.depth > max_depth) { | |
337 | if (restore_original_object(trg.buf, trg.size, trg_type, trg.sha1)) | |
338 | die("unable to restore %s", argv[i]); | |
339 | if (!quiet) | |
340 | printf("undelta %s (depth was %d)\n", | |
341 | argv[i], trg.depth); | |
342 | trg.depth = 0; | |
343 | free(trg.links); | |
344 | trg.links = NULL; | |
345 | } else if (skip_reason && verbose) { | |
346 | printf("skip %s (%s)\n", argv[i], skip_reason); | |
e002a16b NP |
347 | } |
348 | ||
53d4b460 NP |
349 | if (!duplicate) { |
350 | free(ref[next_ref].buf); | |
351 | free(ref[next_ref].links); | |
352 | ref[next_ref] = trg; | |
353 | if (++next_ref > nb_refs) | |
354 | nb_refs = next_ref; | |
355 | if (next_ref == max_refs) | |
356 | next_ref = 0; | |
357 | } else { | |
358 | free(trg.buf); | |
359 | free(trg.links); | |
360 | } | |
361 | } while (++i < argc); | |
e002a16b NP |
362 | |
363 | return 0; | |
364 | } |