]>
Commit | Line | Data |
---|---|---|
27225f2e | 1 | #include <ctype.h> |
c323ac7d LT |
2 | #include "cache.h" |
3 | #include "object.h" | |
4 | #include "delta.h" | |
a733cb60 | 5 | #include "pack.h" |
c38138cd | 6 | #include "csum-file.h" |
c323ac7d | 7 | |
eb019375 | 8 | static const char pack_usage[] = "git-pack-objects [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list"; |
c323ac7d | 9 | |
c323ac7d LT |
10 | struct object_entry { |
11 | unsigned char sha1[20]; | |
12 | unsigned long size; | |
13 | unsigned long offset; | |
14 | unsigned int depth; | |
27225f2e | 15 | unsigned int hash; |
a733cb60 | 16 | enum object_type type; |
c323ac7d LT |
17 | unsigned long delta_size; |
18 | struct object_entry *delta; | |
19 | }; | |
20 | ||
5f3de58f | 21 | static unsigned char object_list_sha1[20]; |
1c4a2912 | 22 | static int non_empty = 0; |
eb019375 | 23 | static int incremental = 0; |
c323ac7d LT |
24 | static struct object_entry **sorted_by_sha, **sorted_by_type; |
25 | static struct object_entry *objects = NULL; | |
26 | static int nr_objects = 0, nr_alloc = 0; | |
c323ac7d | 27 | static const char *base_name; |
e1808845 | 28 | static unsigned char pack_file_sha1[20]; |
c323ac7d | 29 | |
c323ac7d LT |
30 | static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) |
31 | { | |
32 | unsigned long othersize, delta_size; | |
33 | char type[10]; | |
34 | void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize); | |
35 | void *delta_buf; | |
36 | ||
37 | if (!otherbuf) | |
38 | die("unable to read %s", sha1_to_hex(entry->delta->sha1)); | |
8ee378a0 | 39 | delta_buf = diff_delta(otherbuf, othersize, |
dcde55bc | 40 | buf, size, &delta_size, 0); |
75c42d8c | 41 | if (!delta_buf || delta_size != entry->delta_size) |
c323ac7d LT |
42 | die("delta size changed"); |
43 | free(buf); | |
44 | free(otherbuf); | |
45 | return delta_buf; | |
46 | } | |
47 | ||
a733cb60 LT |
48 | /* |
49 | * The per-object header is a pretty dense thing, which is | |
50 | * - first byte: low four bits are "size", then three bits of "type", | |
51 | * and the high bit is "size continues". | |
52 | * - each byte afterwards: low seven bits are size continuation, | |
53 | * with the high bit being "size continues" | |
54 | */ | |
55 | static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr) | |
56 | { | |
01247d87 | 57 | int n = 1; |
a733cb60 LT |
58 | unsigned char c; |
59 | ||
60 | if (type < OBJ_COMMIT || type > OBJ_DELTA) | |
61 | die("bad type %d", type); | |
62 | ||
01247d87 LT |
63 | c = (type << 4) | (size & 15); |
64 | size >>= 4; | |
65 | while (size) { | |
a733cb60 | 66 | *hdr++ = c | 0x80; |
01247d87 LT |
67 | c = size & 0x7f; |
68 | size >>= 7; | |
69 | n++; | |
a733cb60 LT |
70 | } |
71 | *hdr = c; | |
72 | return n; | |
73 | } | |
74 | ||
c38138cd | 75 | static unsigned long write_object(struct sha1file *f, struct object_entry *entry) |
c323ac7d LT |
76 | { |
77 | unsigned long size; | |
78 | char type[10]; | |
79 | void *buf = read_sha1_file(entry->sha1, type, &size); | |
a733cb60 | 80 | unsigned char header[10]; |
c323ac7d | 81 | unsigned hdrlen, datalen; |
a733cb60 | 82 | enum object_type obj_type; |
c323ac7d LT |
83 | |
84 | if (!buf) | |
85 | die("unable to read %s", sha1_to_hex(entry->sha1)); | |
86 | if (size != entry->size) | |
87 | die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); | |
88 | ||
89 | /* | |
e1ddc976 JH |
90 | * The object header is a byte of 'type' followed by zero or |
91 | * more bytes of length. For deltas, the 20 bytes of delta sha1 | |
92 | * follows that. | |
c323ac7d | 93 | */ |
a733cb60 | 94 | obj_type = entry->type; |
c323ac7d | 95 | if (entry->delta) { |
c323ac7d LT |
96 | buf = delta_against(buf, size, entry); |
97 | size = entry->delta_size; | |
a733cb60 | 98 | obj_type = OBJ_DELTA; |
c323ac7d | 99 | } |
a733cb60 | 100 | hdrlen = encode_header(obj_type, size, header); |
c38138cd | 101 | sha1write(f, header, hdrlen); |
a733cb60 LT |
102 | if (entry->delta) { |
103 | sha1write(f, entry->delta, 20); | |
104 | hdrlen += 20; | |
105 | } | |
c38138cd | 106 | datalen = sha1write_compressed(f, buf, size); |
c323ac7d LT |
107 | free(buf); |
108 | return hdrlen + datalen; | |
109 | } | |
110 | ||
9d5ab962 JH |
111 | static unsigned long write_one(struct sha1file *f, |
112 | struct object_entry *e, | |
113 | unsigned long offset) | |
114 | { | |
115 | if (e->offset) | |
116 | /* offset starts from header size and cannot be zero | |
117 | * if it is written already. | |
118 | */ | |
119 | return offset; | |
120 | e->offset = offset; | |
121 | offset += write_object(f, e); | |
122 | /* if we are delitified, write out its base object. */ | |
123 | if (e->delta) | |
124 | offset = write_one(f, e->delta, offset); | |
125 | return offset; | |
126 | } | |
127 | ||
c323ac7d LT |
128 | static void write_pack_file(void) |
129 | { | |
130 | int i; | |
d22b9290 | 131 | struct sha1file *f; |
a733cb60 | 132 | unsigned long offset; |
c323ac7d | 133 | unsigned long mb; |
a733cb60 | 134 | struct pack_header hdr; |
c323ac7d | 135 | |
d22b9290 LT |
136 | if (!base_name) |
137 | f = sha1fd(1, "<stdout>"); | |
138 | else | |
5f3de58f | 139 | f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack"); |
a733cb60 | 140 | hdr.hdr_signature = htonl(PACK_SIGNATURE); |
01247d87 | 141 | hdr.hdr_version = htonl(PACK_VERSION); |
a733cb60 LT |
142 | hdr.hdr_entries = htonl(nr_objects); |
143 | sha1write(f, &hdr, sizeof(hdr)); | |
144 | offset = sizeof(hdr); | |
9d5ab962 JH |
145 | for (i = 0; i < nr_objects; i++) |
146 | offset = write_one(f, objects + i, offset); | |
147 | ||
e1808845 | 148 | sha1close(f, pack_file_sha1, 1); |
c323ac7d LT |
149 | mb = offset >> 20; |
150 | offset &= 0xfffff; | |
151 | } | |
152 | ||
153 | static void write_index_file(void) | |
154 | { | |
155 | int i; | |
5f3de58f | 156 | struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx"); |
c323ac7d LT |
157 | struct object_entry **list = sorted_by_sha; |
158 | struct object_entry **last = list + nr_objects; | |
159 | unsigned int array[256]; | |
160 | ||
161 | /* | |
162 | * Write the first-level table (the list is sorted, | |
163 | * but we use a 256-entry lookup to be able to avoid | |
e1808845 | 164 | * having to do eight extra binary search iterations). |
c323ac7d LT |
165 | */ |
166 | for (i = 0; i < 256; i++) { | |
167 | struct object_entry **next = list; | |
168 | while (next < last) { | |
169 | struct object_entry *entry = *next; | |
170 | if (entry->sha1[0] != i) | |
171 | break; | |
172 | next++; | |
173 | } | |
174 | array[i] = htonl(next - sorted_by_sha); | |
175 | list = next; | |
176 | } | |
c38138cd | 177 | sha1write(f, array, 256 * sizeof(int)); |
c323ac7d LT |
178 | |
179 | /* | |
180 | * Write the actual SHA1 entries.. | |
181 | */ | |
182 | list = sorted_by_sha; | |
49397104 | 183 | for (i = 0; i < nr_objects; i++) { |
c323ac7d LT |
184 | struct object_entry *entry = *list++; |
185 | unsigned int offset = htonl(entry->offset); | |
c38138cd LT |
186 | sha1write(f, &offset, 4); |
187 | sha1write(f, entry->sha1, 20); | |
c323ac7d | 188 | } |
e1808845 LT |
189 | sha1write(f, pack_file_sha1, 20); |
190 | sha1close(f, NULL, 1); | |
c323ac7d LT |
191 | } |
192 | ||
5f3de58f | 193 | static int add_object_entry(unsigned char *sha1, unsigned int hash) |
c323ac7d LT |
194 | { |
195 | unsigned int idx = nr_objects; | |
196 | struct object_entry *entry; | |
197 | ||
eb019375 | 198 | if (incremental && has_sha1_pack(sha1)) |
5f3de58f | 199 | return 0; |
eb019375 | 200 | |
c323ac7d LT |
201 | if (idx >= nr_alloc) { |
202 | unsigned int needed = (idx + 1024) * 3 / 2; | |
203 | objects = xrealloc(objects, needed * sizeof(*entry)); | |
204 | nr_alloc = needed; | |
205 | } | |
206 | entry = objects + idx; | |
207 | memset(entry, 0, sizeof(*entry)); | |
208 | memcpy(entry->sha1, sha1, 20); | |
27225f2e | 209 | entry->hash = hash; |
c323ac7d | 210 | nr_objects = idx+1; |
5f3de58f | 211 | return 1; |
c323ac7d LT |
212 | } |
213 | ||
214 | static void check_object(struct object_entry *entry) | |
215 | { | |
36e4d74a JH |
216 | char type[20]; |
217 | ||
218 | if (!sha1_object_info(entry->sha1, type, &entry->size)) { | |
219 | if (!strcmp(type, "commit")) { | |
a733cb60 | 220 | entry->type = OBJ_COMMIT; |
36e4d74a | 221 | } else if (!strcmp(type, "tree")) { |
a733cb60 | 222 | entry->type = OBJ_TREE; |
36e4d74a | 223 | } else if (!strcmp(type, "blob")) { |
a733cb60 | 224 | entry->type = OBJ_BLOB; |
a69d0943 | 225 | } else if (!strcmp(type, "tag")) { |
a733cb60 | 226 | entry->type = OBJ_TAG; |
36e4d74a JH |
227 | } else |
228 | die("unable to pack object %s of type %s", | |
229 | sha1_to_hex(entry->sha1), type); | |
230 | } | |
231 | else | |
232 | die("unable to get type of object %s", | |
233 | sha1_to_hex(entry->sha1)); | |
c323ac7d LT |
234 | } |
235 | ||
236 | static void get_object_details(void) | |
237 | { | |
238 | int i; | |
239 | struct object_entry *entry = objects; | |
240 | ||
241 | for (i = 0; i < nr_objects; i++) | |
242 | check_object(entry++); | |
243 | } | |
244 | ||
245 | typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *); | |
246 | ||
247 | static entry_sort_t current_sort; | |
248 | ||
249 | static int sort_comparator(const void *_a, const void *_b) | |
250 | { | |
251 | struct object_entry *a = *(struct object_entry **)_a; | |
252 | struct object_entry *b = *(struct object_entry **)_b; | |
253 | return current_sort(a,b); | |
254 | } | |
255 | ||
256 | static struct object_entry **create_sorted_list(entry_sort_t sort) | |
257 | { | |
258 | struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *)); | |
259 | int i; | |
260 | ||
261 | for (i = 0; i < nr_objects; i++) | |
262 | list[i] = objects + i; | |
263 | current_sort = sort; | |
264 | qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator); | |
265 | return list; | |
266 | } | |
267 | ||
268 | static int sha1_sort(const struct object_entry *a, const struct object_entry *b) | |
269 | { | |
270 | return memcmp(a->sha1, b->sha1, 20); | |
271 | } | |
272 | ||
273 | static int type_size_sort(const struct object_entry *a, const struct object_entry *b) | |
274 | { | |
275 | if (a->type < b->type) | |
276 | return -1; | |
277 | if (a->type > b->type) | |
278 | return 1; | |
27225f2e LT |
279 | if (a->hash < b->hash) |
280 | return -1; | |
281 | if (a->hash > b->hash) | |
282 | return 1; | |
c323ac7d LT |
283 | if (a->size < b->size) |
284 | return -1; | |
285 | if (a->size > b->size) | |
286 | return 1; | |
287 | return a < b ? -1 : (a > b); | |
288 | } | |
289 | ||
290 | struct unpacked { | |
291 | struct object_entry *entry; | |
292 | void *data; | |
293 | }; | |
294 | ||
295 | /* | |
521a4f4c LT |
296 | * We search for deltas _backwards_ in a list sorted by type and |
297 | * by size, so that we see progressively smaller and smaller files. | |
298 | * That's because we prefer deltas to be from the bigger file | |
299 | * to the smaller - deletes are potentially cheaper, but perhaps | |
300 | * more importantly, the bigger file is likely the more recent | |
301 | * one. | |
c323ac7d | 302 | */ |
d116a45a | 303 | static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth) |
c323ac7d LT |
304 | { |
305 | struct object_entry *cur_entry = cur->entry; | |
306 | struct object_entry *old_entry = old->entry; | |
521a4f4c | 307 | unsigned long size, oldsize, delta_size, sizediff; |
75c42d8c | 308 | long max_size; |
c323ac7d LT |
309 | void *delta_buf; |
310 | ||
311 | /* Don't bother doing diffs between different types */ | |
312 | if (cur_entry->type != old_entry->type) | |
313 | return -1; | |
314 | ||
c323ac7d | 315 | size = cur_entry->size; |
75c42d8c LT |
316 | if (size < 50) |
317 | return -1; | |
c323ac7d | 318 | oldsize = old_entry->size; |
521a4f4c LT |
319 | sizediff = oldsize > size ? oldsize - size : size - oldsize; |
320 | if (sizediff > size / 8) | |
c323ac7d | 321 | return -1; |
d116a45a LT |
322 | if (old_entry->depth >= max_depth) |
323 | return 0; | |
c323ac7d LT |
324 | |
325 | /* | |
326 | * NOTE! | |
327 | * | |
328 | * We always delta from the bigger to the smaller, since that's | |
329 | * more space-efficient (deletes don't have to say _what_ they | |
330 | * delete). | |
331 | */ | |
75c42d8c LT |
332 | max_size = size / 2 - 20; |
333 | if (cur_entry->delta) | |
334 | max_size = cur_entry->delta_size-1; | |
27225f2e LT |
335 | if (sizediff >= max_size) |
336 | return -1; | |
8ee378a0 JH |
337 | delta_buf = diff_delta(old->data, oldsize, |
338 | cur->data, size, &delta_size, max_size); | |
c323ac7d | 339 | if (!delta_buf) |
75c42d8c LT |
340 | return 0; |
341 | cur_entry->delta = old_entry; | |
342 | cur_entry->delta_size = delta_size; | |
d116a45a | 343 | cur_entry->depth = old_entry->depth + 1; |
c323ac7d | 344 | free(delta_buf); |
eb41ab11 | 345 | return 0; |
c323ac7d LT |
346 | } |
347 | ||
d116a45a | 348 | static void find_deltas(struct object_entry **list, int window, int depth) |
c323ac7d | 349 | { |
521a4f4c | 350 | int i, idx; |
c323ac7d LT |
351 | unsigned int array_size = window * sizeof(struct unpacked); |
352 | struct unpacked *array = xmalloc(array_size); | |
353 | ||
354 | memset(array, 0, array_size); | |
521a4f4c LT |
355 | i = nr_objects; |
356 | idx = 0; | |
357 | while (--i >= 0) { | |
c323ac7d LT |
358 | struct object_entry *entry = list[i]; |
359 | struct unpacked *n = array + idx; | |
360 | unsigned long size; | |
361 | char type[10]; | |
362 | int j; | |
363 | ||
364 | free(n->data); | |
365 | n->entry = entry; | |
366 | n->data = read_sha1_file(entry->sha1, type, &size); | |
367 | if (size != entry->size) | |
368 | die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); | |
78817c15 LT |
369 | j = window; |
370 | while (--j > 0) { | |
371 | unsigned int other_idx = idx + j; | |
c323ac7d | 372 | struct unpacked *m; |
78817c15 LT |
373 | if (other_idx >= window) |
374 | other_idx -= window; | |
c323ac7d LT |
375 | m = array + other_idx; |
376 | if (!m->entry) | |
377 | break; | |
d116a45a | 378 | if (try_delta(n, m, depth) < 0) |
c323ac7d LT |
379 | break; |
380 | } | |
521a4f4c LT |
381 | idx++; |
382 | if (idx >= window) | |
383 | idx = 0; | |
c323ac7d LT |
384 | } |
385 | } | |
386 | ||
387 | int main(int argc, char **argv) | |
388 | { | |
5f3de58f | 389 | SHA_CTX ctx; |
27225f2e | 390 | char line[PATH_MAX + 20]; |
d22b9290 | 391 | int window = 10, depth = 10, pack_to_stdout = 0; |
c323ac7d LT |
392 | int i; |
393 | ||
394 | for (i = 1; i < argc; i++) { | |
395 | const char *arg = argv[i]; | |
396 | ||
397 | if (*arg == '-') { | |
1c4a2912 LT |
398 | if (!strcmp("--non-empty", arg)) { |
399 | non_empty = 1; | |
400 | continue; | |
401 | } | |
eb019375 LT |
402 | if (!strcmp("--incremental", arg)) { |
403 | incremental = 1; | |
404 | continue; | |
405 | } | |
c323ac7d LT |
406 | if (!strncmp("--window=", arg, 9)) { |
407 | char *end; | |
f846bbff | 408 | window = strtoul(arg+9, &end, 0); |
c323ac7d LT |
409 | if (!arg[9] || *end) |
410 | usage(pack_usage); | |
411 | continue; | |
412 | } | |
d116a45a LT |
413 | if (!strncmp("--depth=", arg, 8)) { |
414 | char *end; | |
415 | depth = strtoul(arg+8, &end, 0); | |
416 | if (!arg[8] || *end) | |
417 | usage(pack_usage); | |
418 | continue; | |
419 | } | |
d22b9290 LT |
420 | if (!strcmp("--stdout", arg)) { |
421 | pack_to_stdout = 1; | |
422 | continue; | |
423 | } | |
c323ac7d LT |
424 | usage(pack_usage); |
425 | } | |
426 | if (base_name) | |
427 | usage(pack_usage); | |
428 | base_name = arg; | |
429 | } | |
430 | ||
d22b9290 | 431 | if (pack_to_stdout != !base_name) |
c323ac7d LT |
432 | usage(pack_usage); |
433 | ||
5f3de58f | 434 | SHA1_Init(&ctx); |
c323ac7d | 435 | while (fgets(line, sizeof(line), stdin) != NULL) { |
27225f2e LT |
436 | unsigned int hash; |
437 | char *p; | |
c323ac7d | 438 | unsigned char sha1[20]; |
27225f2e | 439 | |
c323ac7d LT |
440 | if (get_sha1_hex(line, sha1)) |
441 | die("expected sha1, got garbage"); | |
27225f2e LT |
442 | hash = 0; |
443 | p = line+40; | |
444 | while (*p) { | |
445 | unsigned char c = *p++; | |
446 | if (isspace(c)) | |
447 | continue; | |
448 | hash = hash * 11 + c; | |
449 | } | |
5f3de58f LT |
450 | if (add_object_entry(sha1, hash)) |
451 | SHA1_Update(&ctx, sha1, 20); | |
c323ac7d | 452 | } |
5f3de58f | 453 | SHA1_Final(object_list_sha1, &ctx); |
1c4a2912 LT |
454 | if (non_empty && !nr_objects) |
455 | return 0; | |
c323ac7d LT |
456 | get_object_details(); |
457 | ||
d22b9290 | 458 | fprintf(stderr, "Packing %d objects\n", nr_objects); |
c323ac7d LT |
459 | |
460 | sorted_by_sha = create_sorted_list(sha1_sort); | |
461 | sorted_by_type = create_sorted_list(type_size_sort); | |
d116a45a LT |
462 | if (window && depth) |
463 | find_deltas(sorted_by_type, window+1, depth); | |
c323ac7d LT |
464 | |
465 | write_pack_file(); | |
5f3de58f | 466 | if (!pack_to_stdout) { |
d22b9290 | 467 | write_index_file(); |
5f3de58f LT |
468 | puts(sha1_to_hex(object_list_sha1)); |
469 | } | |
c323ac7d LT |
470 | return 0; |
471 | } |