]>
Commit | Line | Data |
---|---|---|
e002a16b NP |
1 | /* |
2 | * Deltafication of a GIT database. | |
3 | * | |
4 | * (C) 2005 Nicolas Pitre <nico@cam.org> | |
5 | * | |
6 | * This code is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
10 | ||
11 | #include "cache.h" | |
12 | #include "delta.h" | |
13 | ||
14 | static int replace_object(char *buf, unsigned long size, unsigned char *sha1) | |
15 | { | |
16 | char tmpfile[PATH_MAX]; | |
17 | int fd; | |
18 | ||
19 | snprintf(tmpfile, sizeof(tmpfile), "%s/obj_XXXXXX", get_object_directory()); | |
20 | fd = mkstemp(tmpfile); | |
21 | if (fd < 0) | |
22 | return error("%s: %s\n", tmpfile, strerror(errno)); | |
23 | if (write(fd, buf, size) != size) { | |
24 | perror("unable to write file"); | |
25 | close(fd); | |
26 | unlink(tmpfile); | |
27 | return -1; | |
28 | } | |
29 | fchmod(fd, 0444); | |
30 | close(fd); | |
31 | if (rename(tmpfile, sha1_file_name(sha1))) { | |
32 | perror("unable to replace original object"); | |
33 | unlink(tmpfile); | |
34 | return -1; | |
35 | } | |
36 | return 0; | |
37 | } | |
38 | ||
39 | static void *create_object(char *buf, unsigned long len, char *hdr, int hdrlen, | |
40 | unsigned long *retsize) | |
41 | { | |
42 | char *compressed; | |
43 | unsigned long size; | |
44 | z_stream stream; | |
45 | ||
46 | /* Set it up */ | |
47 | memset(&stream, 0, sizeof(stream)); | |
48 | deflateInit(&stream, Z_BEST_COMPRESSION); | |
49 | size = deflateBound(&stream, len+hdrlen); | |
50 | compressed = xmalloc(size); | |
51 | ||
52 | /* Compress it */ | |
53 | stream.next_out = compressed; | |
54 | stream.avail_out = size; | |
55 | ||
56 | /* First header.. */ | |
57 | stream.next_in = hdr; | |
58 | stream.avail_in = hdrlen; | |
59 | while (deflate(&stream, 0) == Z_OK) | |
60 | /* nothing */; | |
61 | ||
62 | /* Then the data itself.. */ | |
63 | stream.next_in = buf; | |
64 | stream.avail_in = len; | |
65 | while (deflate(&stream, Z_FINISH) == Z_OK) | |
66 | /* nothing */; | |
67 | deflateEnd(&stream); | |
68 | *retsize = stream.total_out; | |
69 | return compressed; | |
70 | } | |
71 | ||
72 | static int restore_original_object(char *buf, unsigned long len, | |
73 | char *type, unsigned char *sha1) | |
74 | { | |
75 | char hdr[50]; | |
76 | int hdrlen, ret; | |
77 | void *compressed; | |
78 | unsigned long size; | |
79 | ||
80 | hdrlen = sprintf(hdr, "%s %lu", type, len)+1; | |
81 | compressed = create_object(buf, len, hdr, hdrlen, &size); | |
82 | ret = replace_object(compressed, size, sha1); | |
83 | free(compressed); | |
84 | return ret; | |
85 | } | |
86 | ||
87 | static void *create_delta_object(char *buf, unsigned long len, | |
88 | unsigned char *sha1_ref, unsigned long *size) | |
89 | { | |
90 | char hdr[50]; | |
91 | int hdrlen; | |
92 | ||
93 | /* Generate the header + sha1 of reference for delta */ | |
94 | hdrlen = sprintf(hdr, "delta %lu", len+20)+1; | |
95 | memcpy(hdr + hdrlen, sha1_ref, 20); | |
96 | hdrlen += 20; | |
97 | ||
98 | return create_object(buf, len, hdr, hdrlen, size); | |
99 | } | |
100 | ||
101 | static unsigned long get_object_size(unsigned char *sha1) | |
102 | { | |
103 | struct stat st; | |
104 | if (stat(sha1_file_name(sha1), &st)) | |
105 | die("%s: %s", sha1_to_hex(sha1), strerror(errno)); | |
106 | return st.st_size; | |
107 | } | |
108 | ||
109 | static void *get_buffer(unsigned char *sha1, char *type, unsigned long *size) | |
110 | { | |
111 | unsigned long mapsize; | |
112 | void *map = map_sha1_file(sha1, &mapsize); | |
113 | if (map) { | |
114 | void *buffer = unpack_sha1_file(map, mapsize, type, size); | |
115 | munmap(map, mapsize); | |
116 | if (buffer) | |
117 | return buffer; | |
118 | } | |
119 | error("unable to get object %s", sha1_to_hex(sha1)); | |
120 | return NULL; | |
121 | } | |
122 | ||
123 | static void *expand_delta(void *delta, unsigned long delta_size, char *type, | |
124 | unsigned long *size, unsigned int *depth, char *head) | |
125 | { | |
126 | void *buf = NULL; | |
127 | *depth++; | |
128 | if (delta_size < 20) { | |
129 | error("delta object is bad"); | |
130 | free(delta); | |
131 | } else { | |
132 | unsigned long ref_size; | |
133 | void *ref = get_buffer(delta, type, &ref_size); | |
134 | if (ref && !strcmp(type, "delta")) | |
135 | ref = expand_delta(ref, ref_size, type, &ref_size, | |
136 | depth, head); | |
137 | else | |
138 | memcpy(head, delta, 20); | |
139 | if (ref) | |
140 | buf = patch_delta(ref, ref_size, delta+20, | |
141 | delta_size-20, size); | |
142 | free(ref); | |
143 | free(delta); | |
144 | } | |
145 | return buf; | |
146 | } | |
147 | ||
148 | static char *mkdelta_usage = | |
149 | "mkdelta [ --max-depth=N ] <reference_sha1> <target_sha1> [ <next_sha1> ... ]"; | |
150 | ||
151 | int main(int argc, char **argv) | |
152 | { | |
153 | unsigned char sha1_ref[20], sha1_trg[20], head_ref[20], head_trg[20]; | |
154 | char type_ref[20], type_trg[20]; | |
155 | void *buf_ref, *buf_trg, *buf_delta; | |
156 | unsigned long size_ref, size_trg, size_orig, size_delta; | |
157 | unsigned int depth_ref, depth_trg, depth_max = -1; | |
158 | int i, verbose = 0; | |
159 | ||
160 | for (i = 1; i < argc; i++) { | |
161 | if (!strcmp(argv[i], "-v")) { | |
162 | verbose = 1; | |
163 | } else if (!strcmp(argv[i], "-d") && i+1 < argc) { | |
164 | depth_max = atoi(argv[++i]); | |
165 | } else if (!strncmp(argv[i], "--max-depth=", 12)) { | |
166 | depth_max = atoi(argv[i]+12); | |
167 | } else | |
168 | break; | |
169 | } | |
170 | ||
171 | if (i + (depth_max != 0) >= argc) | |
172 | usage(mkdelta_usage); | |
173 | ||
174 | if (get_sha1(argv[i], sha1_ref)) | |
175 | die("bad sha1 %s", argv[i]); | |
176 | depth_ref = 0; | |
177 | buf_ref = get_buffer(sha1_ref, type_ref, &size_ref); | |
178 | if (buf_ref && !strcmp(type_ref, "delta")) | |
179 | buf_ref = expand_delta(buf_ref, size_ref, type_ref, | |
180 | &size_ref, &depth_ref, head_ref); | |
181 | else | |
182 | memcpy(head_ref, sha1_ref, 20); | |
183 | if (!buf_ref) | |
184 | die("unable to obtain initial object %s", argv[i]); | |
185 | ||
186 | if (depth_ref > depth_max) { | |
187 | if (restore_original_object(buf_ref, size_ref, type_ref, sha1_ref)) | |
188 | die("unable to restore %s", argv[i]); | |
189 | if (verbose) | |
190 | printf("undelta %s (depth was %d)\n", argv[i], depth_ref); | |
191 | depth_ref = 0; | |
192 | } | |
193 | ||
194 | /* | |
195 | * TODO: deltafication should be tried against any early object | |
196 | * in the object list and not only the previous object. | |
197 | */ | |
198 | ||
199 | while (++i < argc) { | |
200 | if (get_sha1(argv[i], sha1_trg)) | |
201 | die("bad sha1 %s", argv[i]); | |
202 | depth_trg = 0; | |
203 | buf_trg = get_buffer(sha1_trg, type_trg, &size_trg); | |
204 | if (buf_trg && !size_trg) { | |
205 | if (verbose) | |
206 | printf("skip %s (object is empty)\n", argv[i]); | |
207 | continue; | |
208 | } | |
209 | size_orig = size_trg; | |
210 | if (buf_trg && !strcmp(type_trg, "delta")) { | |
211 | if (!memcmp(buf_trg, sha1_ref, 20)) { | |
212 | /* delta already in place */ | |
213 | depth_ref++; | |
214 | memcpy(sha1_ref, sha1_trg, 20); | |
215 | buf_ref = patch_delta(buf_ref, size_ref, | |
216 | buf_trg+20, size_trg-20, | |
217 | &size_ref); | |
218 | if (!buf_ref) | |
219 | die("unable to apply delta %s", argv[i]); | |
220 | if (depth_ref > depth_max) { | |
221 | if (restore_original_object(buf_ref, size_ref, | |
222 | type_ref, sha1_ref)) | |
223 | die("unable to restore %s", argv[i]); | |
224 | if (verbose) | |
225 | printf("undelta %s (depth was %d)\n", argv[i], depth_ref); | |
226 | depth_ref = 0; | |
227 | continue; | |
228 | } | |
229 | if (verbose) | |
230 | printf("skip %s (delta already in place)\n", argv[i]); | |
231 | continue; | |
232 | } | |
233 | buf_trg = expand_delta(buf_trg, size_trg, type_trg, | |
234 | &size_trg, &depth_trg, head_trg); | |
235 | } else | |
236 | memcpy(head_trg, sha1_trg, 20); | |
237 | if (!buf_trg) | |
238 | die("unable to read target object %s", argv[i]); | |
239 | ||
240 | if (depth_trg > depth_max) { | |
241 | if (restore_original_object(buf_trg, size_trg, type_trg, sha1_trg)) | |
242 | die("unable to restore %s", argv[i]); | |
243 | if (verbose) | |
244 | printf("undelta %s (depth was %d)\n", argv[i], depth_trg); | |
245 | depth_trg = 0; | |
246 | size_orig = size_trg; | |
247 | } | |
248 | ||
249 | if (depth_max == 0) | |
250 | goto skip; | |
251 | ||
252 | if (strcmp(type_ref, type_trg)) | |
253 | die("type mismatch for object %s", argv[i]); | |
254 | ||
255 | if (!size_ref) { | |
256 | if (verbose) | |
257 | printf("skip %s (initial object is empty)\n", argv[i]); | |
258 | goto skip; | |
259 | } | |
260 | ||
261 | if (depth_ref + 1 > depth_max) { | |
262 | if (verbose) | |
263 | printf("skip %s (exceeding max link depth)\n", argv[i]); | |
264 | goto skip; | |
265 | } | |
266 | ||
267 | if (!memcmp(head_ref, sha1_trg, 20)) { | |
268 | if (verbose) | |
269 | printf("skip %s (would create a loop)\n", argv[i]); | |
270 | goto skip; | |
271 | } | |
272 | ||
273 | buf_delta = diff_delta(buf_ref, size_ref, buf_trg, size_trg, &size_delta); | |
274 | if (!buf_delta) | |
275 | die("out of memory"); | |
276 | ||
277 | /* no need to even try to compress if original | |
278 | uncompressed is already smaller */ | |
279 | if (size_delta+20 < size_orig) { | |
280 | void *buf_obj; | |
281 | unsigned long size_obj; | |
282 | buf_obj = create_delta_object(buf_delta, size_delta, | |
283 | sha1_ref, &size_obj); | |
284 | free(buf_delta); | |
285 | size_orig = get_object_size(sha1_trg); | |
286 | if (size_obj >= size_orig) { | |
287 | free(buf_obj); | |
288 | if (verbose) | |
289 | printf("skip %s (original is smaller)\n", argv[i]); | |
290 | goto skip; | |
291 | } | |
292 | if (replace_object(buf_obj, size_obj, sha1_trg)) | |
293 | die("unable to write delta for %s", argv[i]); | |
294 | free(buf_obj); | |
295 | depth_ref++; | |
296 | if (verbose) | |
297 | printf("delta %s (size=%ld.%02ld%%, depth=%d)\n", | |
298 | argv[i], size_obj*100 / size_orig, | |
299 | (size_obj*10000 / size_orig)%100, | |
300 | depth_ref); | |
301 | } else { | |
302 | free(buf_delta); | |
303 | if (verbose) | |
304 | printf("skip %s (original is smaller)\n", argv[i]); | |
305 | skip: | |
306 | depth_ref = depth_trg; | |
307 | memcpy(head_ref, head_trg, 20); | |
308 | } | |
309 | ||
310 | free(buf_ref); | |
311 | buf_ref = buf_trg; | |
312 | size_ref = size_trg; | |
313 | memcpy(sha1_ref, sha1_trg, 20); | |
314 | } | |
315 | ||
316 | return 0; | |
317 | } |