]>
Commit | Line | Data |
---|---|---|
1 | #include "cache.h" | |
2 | #include "object.h" | |
3 | #include "delta.h" | |
4 | #include "pack.h" | |
5 | #include "csum-file.h" | |
6 | ||
7 | static const char pack_usage[] = "git-pack-objects [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list"; | |
8 | ||
9 | struct object_entry { | |
10 | unsigned char sha1[20]; | |
11 | unsigned long size; | |
12 | unsigned long offset; | |
13 | unsigned int depth; | |
14 | unsigned int hash; | |
15 | enum object_type type; | |
16 | unsigned long delta_size; | |
17 | struct object_entry *delta; | |
18 | }; | |
19 | ||
20 | static unsigned char object_list_sha1[20]; | |
21 | static int non_empty = 0; | |
22 | static int local = 0; | |
23 | static int incremental = 0; | |
24 | static struct object_entry **sorted_by_sha, **sorted_by_type; | |
25 | static struct object_entry *objects = NULL; | |
26 | static int nr_objects = 0, nr_alloc = 0; | |
27 | static const char *base_name; | |
28 | static unsigned char pack_file_sha1[20]; | |
29 | ||
30 | static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) | |
31 | { | |
32 | unsigned long othersize, delta_size; | |
33 | char type[10]; | |
34 | void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize); | |
35 | void *delta_buf; | |
36 | ||
37 | if (!otherbuf) | |
38 | die("unable to read %s", sha1_to_hex(entry->delta->sha1)); | |
39 | delta_buf = diff_delta(otherbuf, othersize, | |
40 | buf, size, &delta_size, 0); | |
41 | if (!delta_buf || delta_size != entry->delta_size) | |
42 | die("delta size changed"); | |
43 | free(buf); | |
44 | free(otherbuf); | |
45 | return delta_buf; | |
46 | } | |
47 | ||
48 | /* | |
49 | * The per-object header is a pretty dense thing, which is | |
50 | * - first byte: low four bits are "size", then three bits of "type", | |
51 | * and the high bit is "size continues". | |
52 | * - each byte afterwards: low seven bits are size continuation, | |
53 | * with the high bit being "size continues" | |
54 | */ | |
55 | static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr) | |
56 | { | |
57 | int n = 1; | |
58 | unsigned char c; | |
59 | ||
60 | if (type < OBJ_COMMIT || type > OBJ_DELTA) | |
61 | die("bad type %d", type); | |
62 | ||
63 | c = (type << 4) | (size & 15); | |
64 | size >>= 4; | |
65 | while (size) { | |
66 | *hdr++ = c | 0x80; | |
67 | c = size & 0x7f; | |
68 | size >>= 7; | |
69 | n++; | |
70 | } | |
71 | *hdr = c; | |
72 | return n; | |
73 | } | |
74 | ||
75 | static unsigned long write_object(struct sha1file *f, struct object_entry *entry) | |
76 | { | |
77 | unsigned long size; | |
78 | char type[10]; | |
79 | void *buf = read_sha1_file(entry->sha1, type, &size); | |
80 | unsigned char header[10]; | |
81 | unsigned hdrlen, datalen; | |
82 | enum object_type obj_type; | |
83 | ||
84 | if (!buf) | |
85 | die("unable to read %s", sha1_to_hex(entry->sha1)); | |
86 | if (size != entry->size) | |
87 | die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); | |
88 | ||
89 | /* | |
90 | * The object header is a byte of 'type' followed by zero or | |
91 | * more bytes of length. For deltas, the 20 bytes of delta sha1 | |
92 | * follows that. | |
93 | */ | |
94 | obj_type = entry->type; | |
95 | if (entry->delta) { | |
96 | buf = delta_against(buf, size, entry); | |
97 | size = entry->delta_size; | |
98 | obj_type = OBJ_DELTA; | |
99 | } | |
100 | hdrlen = encode_header(obj_type, size, header); | |
101 | sha1write(f, header, hdrlen); | |
102 | if (entry->delta) { | |
103 | sha1write(f, entry->delta, 20); | |
104 | hdrlen += 20; | |
105 | } | |
106 | datalen = sha1write_compressed(f, buf, size); | |
107 | free(buf); | |
108 | return hdrlen + datalen; | |
109 | } | |
110 | ||
111 | static unsigned long write_one(struct sha1file *f, | |
112 | struct object_entry *e, | |
113 | unsigned long offset) | |
114 | { | |
115 | if (e->offset) | |
116 | /* offset starts from header size and cannot be zero | |
117 | * if it is written already. | |
118 | */ | |
119 | return offset; | |
120 | e->offset = offset; | |
121 | offset += write_object(f, e); | |
122 | /* if we are delitified, write out its base object. */ | |
123 | if (e->delta) | |
124 | offset = write_one(f, e->delta, offset); | |
125 | return offset; | |
126 | } | |
127 | ||
128 | static void write_pack_file(void) | |
129 | { | |
130 | int i; | |
131 | struct sha1file *f; | |
132 | unsigned long offset; | |
133 | unsigned long mb; | |
134 | struct pack_header hdr; | |
135 | ||
136 | if (!base_name) | |
137 | f = sha1fd(1, "<stdout>"); | |
138 | else | |
139 | f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack"); | |
140 | hdr.hdr_signature = htonl(PACK_SIGNATURE); | |
141 | hdr.hdr_version = htonl(PACK_VERSION); | |
142 | hdr.hdr_entries = htonl(nr_objects); | |
143 | sha1write(f, &hdr, sizeof(hdr)); | |
144 | offset = sizeof(hdr); | |
145 | for (i = 0; i < nr_objects; i++) | |
146 | offset = write_one(f, objects + i, offset); | |
147 | ||
148 | sha1close(f, pack_file_sha1, 1); | |
149 | mb = offset >> 20; | |
150 | offset &= 0xfffff; | |
151 | } | |
152 | ||
153 | static void write_index_file(void) | |
154 | { | |
155 | int i; | |
156 | struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx"); | |
157 | struct object_entry **list = sorted_by_sha; | |
158 | struct object_entry **last = list + nr_objects; | |
159 | unsigned int array[256]; | |
160 | ||
161 | /* | |
162 | * Write the first-level table (the list is sorted, | |
163 | * but we use a 256-entry lookup to be able to avoid | |
164 | * having to do eight extra binary search iterations). | |
165 | */ | |
166 | for (i = 0; i < 256; i++) { | |
167 | struct object_entry **next = list; | |
168 | while (next < last) { | |
169 | struct object_entry *entry = *next; | |
170 | if (entry->sha1[0] != i) | |
171 | break; | |
172 | next++; | |
173 | } | |
174 | array[i] = htonl(next - sorted_by_sha); | |
175 | list = next; | |
176 | } | |
177 | sha1write(f, array, 256 * sizeof(int)); | |
178 | ||
179 | /* | |
180 | * Write the actual SHA1 entries.. | |
181 | */ | |
182 | list = sorted_by_sha; | |
183 | for (i = 0; i < nr_objects; i++) { | |
184 | struct object_entry *entry = *list++; | |
185 | unsigned int offset = htonl(entry->offset); | |
186 | sha1write(f, &offset, 4); | |
187 | sha1write(f, entry->sha1, 20); | |
188 | } | |
189 | sha1write(f, pack_file_sha1, 20); | |
190 | sha1close(f, NULL, 1); | |
191 | } | |
192 | ||
193 | static int add_object_entry(unsigned char *sha1, unsigned int hash) | |
194 | { | |
195 | unsigned int idx = nr_objects; | |
196 | struct object_entry *entry; | |
197 | ||
198 | if (incremental || local) { | |
199 | struct packed_git *p; | |
200 | ||
201 | for (p = packed_git; p; p = p->next) { | |
202 | struct pack_entry e; | |
203 | ||
204 | if (find_pack_entry_one(sha1, &e, p)) { | |
205 | if (incremental) | |
206 | return 0; | |
207 | if (local && !p->pack_local) | |
208 | return 0; | |
209 | } | |
210 | } | |
211 | } | |
212 | ||
213 | if (idx >= nr_alloc) { | |
214 | unsigned int needed = (idx + 1024) * 3 / 2; | |
215 | objects = xrealloc(objects, needed * sizeof(*entry)); | |
216 | nr_alloc = needed; | |
217 | } | |
218 | entry = objects + idx; | |
219 | memset(entry, 0, sizeof(*entry)); | |
220 | memcpy(entry->sha1, sha1, 20); | |
221 | entry->hash = hash; | |
222 | nr_objects = idx+1; | |
223 | return 1; | |
224 | } | |
225 | ||
226 | static void check_object(struct object_entry *entry) | |
227 | { | |
228 | char type[20]; | |
229 | ||
230 | if (!sha1_object_info(entry->sha1, type, &entry->size)) { | |
231 | if (!strcmp(type, "commit")) { | |
232 | entry->type = OBJ_COMMIT; | |
233 | } else if (!strcmp(type, "tree")) { | |
234 | entry->type = OBJ_TREE; | |
235 | } else if (!strcmp(type, "blob")) { | |
236 | entry->type = OBJ_BLOB; | |
237 | } else if (!strcmp(type, "tag")) { | |
238 | entry->type = OBJ_TAG; | |
239 | } else | |
240 | die("unable to pack object %s of type %s", | |
241 | sha1_to_hex(entry->sha1), type); | |
242 | } | |
243 | else | |
244 | die("unable to get type of object %s", | |
245 | sha1_to_hex(entry->sha1)); | |
246 | } | |
247 | ||
248 | static void get_object_details(void) | |
249 | { | |
250 | int i; | |
251 | struct object_entry *entry = objects; | |
252 | ||
253 | for (i = 0; i < nr_objects; i++) | |
254 | check_object(entry++); | |
255 | } | |
256 | ||
257 | typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *); | |
258 | ||
259 | static entry_sort_t current_sort; | |
260 | ||
261 | static int sort_comparator(const void *_a, const void *_b) | |
262 | { | |
263 | struct object_entry *a = *(struct object_entry **)_a; | |
264 | struct object_entry *b = *(struct object_entry **)_b; | |
265 | return current_sort(a,b); | |
266 | } | |
267 | ||
268 | static struct object_entry **create_sorted_list(entry_sort_t sort) | |
269 | { | |
270 | struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *)); | |
271 | int i; | |
272 | ||
273 | for (i = 0; i < nr_objects; i++) | |
274 | list[i] = objects + i; | |
275 | current_sort = sort; | |
276 | qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator); | |
277 | return list; | |
278 | } | |
279 | ||
280 | static int sha1_sort(const struct object_entry *a, const struct object_entry *b) | |
281 | { | |
282 | return memcmp(a->sha1, b->sha1, 20); | |
283 | } | |
284 | ||
285 | static int type_size_sort(const struct object_entry *a, const struct object_entry *b) | |
286 | { | |
287 | if (a->type < b->type) | |
288 | return -1; | |
289 | if (a->type > b->type) | |
290 | return 1; | |
291 | if (a->hash < b->hash) | |
292 | return -1; | |
293 | if (a->hash > b->hash) | |
294 | return 1; | |
295 | if (a->size < b->size) | |
296 | return -1; | |
297 | if (a->size > b->size) | |
298 | return 1; | |
299 | return a < b ? -1 : (a > b); | |
300 | } | |
301 | ||
302 | struct unpacked { | |
303 | struct object_entry *entry; | |
304 | void *data; | |
305 | }; | |
306 | ||
307 | /* | |
308 | * We search for deltas _backwards_ in a list sorted by type and | |
309 | * by size, so that we see progressively smaller and smaller files. | |
310 | * That's because we prefer deltas to be from the bigger file | |
311 | * to the smaller - deletes are potentially cheaper, but perhaps | |
312 | * more importantly, the bigger file is likely the more recent | |
313 | * one. | |
314 | */ | |
315 | static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth) | |
316 | { | |
317 | struct object_entry *cur_entry = cur->entry; | |
318 | struct object_entry *old_entry = old->entry; | |
319 | unsigned long size, oldsize, delta_size, sizediff; | |
320 | long max_size; | |
321 | void *delta_buf; | |
322 | ||
323 | /* Don't bother doing diffs between different types */ | |
324 | if (cur_entry->type != old_entry->type) | |
325 | return -1; | |
326 | ||
327 | size = cur_entry->size; | |
328 | if (size < 50) | |
329 | return -1; | |
330 | oldsize = old_entry->size; | |
331 | sizediff = oldsize > size ? oldsize - size : size - oldsize; | |
332 | if (sizediff > size / 8) | |
333 | return -1; | |
334 | if (old_entry->depth >= max_depth) | |
335 | return 0; | |
336 | ||
337 | /* | |
338 | * NOTE! | |
339 | * | |
340 | * We always delta from the bigger to the smaller, since that's | |
341 | * more space-efficient (deletes don't have to say _what_ they | |
342 | * delete). | |
343 | */ | |
344 | max_size = size / 2 - 20; | |
345 | if (cur_entry->delta) | |
346 | max_size = cur_entry->delta_size-1; | |
347 | if (sizediff >= max_size) | |
348 | return -1; | |
349 | delta_buf = diff_delta(old->data, oldsize, | |
350 | cur->data, size, &delta_size, max_size); | |
351 | if (!delta_buf) | |
352 | return 0; | |
353 | cur_entry->delta = old_entry; | |
354 | cur_entry->delta_size = delta_size; | |
355 | cur_entry->depth = old_entry->depth + 1; | |
356 | free(delta_buf); | |
357 | return 0; | |
358 | } | |
359 | ||
360 | static void find_deltas(struct object_entry **list, int window, int depth) | |
361 | { | |
362 | int i, idx; | |
363 | unsigned int array_size = window * sizeof(struct unpacked); | |
364 | struct unpacked *array = xmalloc(array_size); | |
365 | ||
366 | memset(array, 0, array_size); | |
367 | i = nr_objects; | |
368 | idx = 0; | |
369 | while (--i >= 0) { | |
370 | struct object_entry *entry = list[i]; | |
371 | struct unpacked *n = array + idx; | |
372 | unsigned long size; | |
373 | char type[10]; | |
374 | int j; | |
375 | ||
376 | free(n->data); | |
377 | n->entry = entry; | |
378 | n->data = read_sha1_file(entry->sha1, type, &size); | |
379 | if (size != entry->size) | |
380 | die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); | |
381 | j = window; | |
382 | while (--j > 0) { | |
383 | unsigned int other_idx = idx + j; | |
384 | struct unpacked *m; | |
385 | if (other_idx >= window) | |
386 | other_idx -= window; | |
387 | m = array + other_idx; | |
388 | if (!m->entry) | |
389 | break; | |
390 | if (try_delta(n, m, depth) < 0) | |
391 | break; | |
392 | } | |
393 | idx++; | |
394 | if (idx >= window) | |
395 | idx = 0; | |
396 | } | |
397 | ||
398 | for (i = 0; i < window; ++i) | |
399 | free(array[i].data); | |
400 | free(array); | |
401 | } | |
402 | ||
403 | static void prepare_pack(int window, int depth) | |
404 | { | |
405 | get_object_details(); | |
406 | ||
407 | fprintf(stderr, "Packing %d objects\n", nr_objects); | |
408 | ||
409 | sorted_by_type = create_sorted_list(type_size_sort); | |
410 | if (window && depth) | |
411 | find_deltas(sorted_by_type, window+1, depth); | |
412 | write_pack_file(); | |
413 | } | |
414 | ||
415 | static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout) | |
416 | { | |
417 | static const char cache[] = "pack-cache/pack-%s.%s"; | |
418 | char *cached_pack, *cached_idx; | |
419 | int ifd, ofd, ifd_ix = -1; | |
420 | ||
421 | cached_pack = git_path(cache, sha1_to_hex(sha1), "pack"); | |
422 | ifd = open(cached_pack, O_RDONLY); | |
423 | if (ifd < 0) | |
424 | return 0; | |
425 | ||
426 | if (!pack_to_stdout) { | |
427 | cached_idx = git_path(cache, sha1_to_hex(sha1), "idx"); | |
428 | ifd_ix = open(cached_idx, O_RDONLY); | |
429 | if (ifd_ix < 0) { | |
430 | close(ifd); | |
431 | return 0; | |
432 | } | |
433 | } | |
434 | ||
435 | fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects, | |
436 | sha1_to_hex(sha1)); | |
437 | ||
438 | if (pack_to_stdout) { | |
439 | if (copy_fd(ifd, 1)) | |
440 | exit(1); | |
441 | close(ifd); | |
442 | } | |
443 | else { | |
444 | char name[PATH_MAX]; | |
445 | snprintf(name, sizeof(name), | |
446 | "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack"); | |
447 | ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666); | |
448 | if (ofd < 0) | |
449 | die("unable to open %s (%s)", name, strerror(errno)); | |
450 | if (copy_fd(ifd, ofd)) | |
451 | exit(1); | |
452 | close(ifd); | |
453 | ||
454 | snprintf(name, sizeof(name), | |
455 | "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx"); | |
456 | ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666); | |
457 | if (ofd < 0) | |
458 | die("unable to open %s (%s)", name, strerror(errno)); | |
459 | if (copy_fd(ifd_ix, ofd)) | |
460 | exit(1); | |
461 | close(ifd_ix); | |
462 | puts(sha1_to_hex(sha1)); | |
463 | } | |
464 | ||
465 | return 1; | |
466 | } | |
467 | ||
468 | int main(int argc, char **argv) | |
469 | { | |
470 | SHA_CTX ctx; | |
471 | char line[PATH_MAX + 20]; | |
472 | int window = 10, depth = 10, pack_to_stdout = 0; | |
473 | struct object_entry **list; | |
474 | int i; | |
475 | ||
476 | for (i = 1; i < argc; i++) { | |
477 | const char *arg = argv[i]; | |
478 | ||
479 | if (*arg == '-') { | |
480 | if (!strcmp("--non-empty", arg)) { | |
481 | non_empty = 1; | |
482 | continue; | |
483 | } | |
484 | if (!strcmp("--local", arg)) { | |
485 | local = 1; | |
486 | continue; | |
487 | } | |
488 | if (!strcmp("--incremental", arg)) { | |
489 | incremental = 1; | |
490 | continue; | |
491 | } | |
492 | if (!strncmp("--window=", arg, 9)) { | |
493 | char *end; | |
494 | window = strtoul(arg+9, &end, 0); | |
495 | if (!arg[9] || *end) | |
496 | usage(pack_usage); | |
497 | continue; | |
498 | } | |
499 | if (!strncmp("--depth=", arg, 8)) { | |
500 | char *end; | |
501 | depth = strtoul(arg+8, &end, 0); | |
502 | if (!arg[8] || *end) | |
503 | usage(pack_usage); | |
504 | continue; | |
505 | } | |
506 | if (!strcmp("--stdout", arg)) { | |
507 | pack_to_stdout = 1; | |
508 | continue; | |
509 | } | |
510 | usage(pack_usage); | |
511 | } | |
512 | if (base_name) | |
513 | usage(pack_usage); | |
514 | base_name = arg; | |
515 | } | |
516 | ||
517 | if (pack_to_stdout != !base_name) | |
518 | usage(pack_usage); | |
519 | ||
520 | prepare_packed_git(); | |
521 | while (fgets(line, sizeof(line), stdin) != NULL) { | |
522 | unsigned int hash; | |
523 | char *p; | |
524 | unsigned char sha1[20]; | |
525 | ||
526 | if (get_sha1_hex(line, sha1)) | |
527 | die("expected sha1, got garbage"); | |
528 | hash = 0; | |
529 | p = line+40; | |
530 | while (*p) { | |
531 | unsigned char c = *p++; | |
532 | if (isspace(c)) | |
533 | continue; | |
534 | hash = hash * 11 + c; | |
535 | } | |
536 | add_object_entry(sha1, hash); | |
537 | } | |
538 | if (non_empty && !nr_objects) | |
539 | return 0; | |
540 | ||
541 | sorted_by_sha = create_sorted_list(sha1_sort); | |
542 | SHA1_Init(&ctx); | |
543 | list = sorted_by_sha; | |
544 | for (i = 0; i < nr_objects; i++) { | |
545 | struct object_entry *entry = *list++; | |
546 | SHA1_Update(&ctx, entry->sha1, 20); | |
547 | } | |
548 | SHA1_Final(object_list_sha1, &ctx); | |
549 | ||
550 | if (reuse_cached_pack(object_list_sha1, pack_to_stdout)) | |
551 | ; | |
552 | else { | |
553 | prepare_pack(window, depth); | |
554 | if (!pack_to_stdout) { | |
555 | write_index_file(); | |
556 | puts(sha1_to_hex(object_list_sha1)); | |
557 | } | |
558 | } | |
559 | return 0; | |
560 | } |