]>
Commit | Line | Data |
---|---|---|
c323ac7d LT |
1 | #include "cache.h" |
2 | #include "object.h" | |
3 | #include "delta.h" | |
a733cb60 | 4 | #include "pack.h" |
c38138cd | 5 | #include "csum-file.h" |
21fcd1bd | 6 | #include <sys/time.h> |
b2504a0d | 7 | #include <signal.h> |
c323ac7d | 8 | |
ab7cd7bb | 9 | static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list"; |
c323ac7d | 10 | |
c323ac7d LT |
11 | struct object_entry { |
12 | unsigned char sha1[20]; | |
3f9ac8d2 | 13 | unsigned long size; /* uncompressed size */ |
15b4d577 JH |
14 | unsigned long offset; /* offset into the final pack file; |
15 | * nonzero if already written. | |
16 | */ | |
3f9ac8d2 | 17 | unsigned int depth; /* delta depth */ |
15b4d577 | 18 | unsigned int delta_limit; /* base adjustment for in-pack delta */ |
3f9ac8d2 | 19 | unsigned int hash; /* name hint hash */ |
a733cb60 | 20 | enum object_type type; |
ab7cd7bb | 21 | enum object_type in_pack_type; /* could be delta */ |
3f9ac8d2 JH |
22 | unsigned long delta_size; /* delta data size (uncompressed) */ |
23 | struct object_entry *delta; /* delta base object */ | |
24 | struct packed_git *in_pack; /* already in pack */ | |
3f9ac8d2 | 25 | unsigned int in_pack_offset; |
15b4d577 JH |
26 | struct object_entry *delta_child; /* delitified objects who bases me */ |
27 | struct object_entry *delta_sibling; /* other deltified objects who | |
28 | * uses the same base as me | |
29 | */ | |
c323ac7d LT |
30 | }; |
31 | ||
3f9ac8d2 JH |
32 | /* |
33 | * Objects we are going to pack are colected in objects array (dynamically | |
34 | * expanded). nr_objects & nr_alloc controls this array. They are stored | |
35 | * in the order we see -- typically rev-list --objects order that gives us | |
36 | * nice "minimum seek" order. | |
37 | * | |
38 | * sorted-by-sha ans sorted-by-type are arrays of pointers that point at | |
39 | * elements in the objects array. The former is used to build the pack | |
40 | * index (lists object names in the ascending order to help offset lookup), | |
41 | * and the latter is used to group similar things together by try_delta() | |
42 | * heuristics. | |
43 | */ | |
44 | ||
5f3de58f | 45 | static unsigned char object_list_sha1[20]; |
1c4a2912 | 46 | static int non_empty = 0; |
ab7cd7bb | 47 | static int no_reuse_delta = 0; |
64560374 | 48 | static int local = 0; |
eb019375 | 49 | static int incremental = 0; |
c323ac7d LT |
50 | static struct object_entry **sorted_by_sha, **sorted_by_type; |
51 | static struct object_entry *objects = NULL; | |
52 | static int nr_objects = 0, nr_alloc = 0; | |
c323ac7d | 53 | static const char *base_name; |
e1808845 | 54 | static unsigned char pack_file_sha1[20]; |
024701f1 | 55 | static int progress = 1; |
fb7a6531 | 56 | static volatile sig_atomic_t progress_update = 0; |
c323ac7d | 57 | |
3f9ac8d2 JH |
58 | /* |
59 | * The object names in objects array are hashed with this hashtable, | |
60 | * to help looking up the entry by object name. Binary search from | |
61 | * sorted_by_sha is also possible but this was easier to code and faster. | |
62 | * This hashtable is built after all the objects are seen. | |
63 | */ | |
64 | static int *object_ix = NULL; | |
65 | static int object_ix_hashsz = 0; | |
66 | ||
67 | /* | |
68 | * Pack index for existing packs give us easy access to the offsets into | |
69 | * corresponding pack file where each object's data starts, but the entries | |
70 | * do not store the size of the compressed representation (uncompressed | |
71 | * size is easily available by examining the pack entry header). We build | |
72 | * a hashtable of existing packs (pack_revindex), and keep reverse index | |
73 | * here -- pack index file is sorted by object name mapping to offset; this | |
74 | * pack_revindex[].revindex array is an ordered list of offsets, so if you | |
75 | * know the offset of an object, next offset is where its packed | |
76 | * representation ends. | |
77 | */ | |
78 | struct pack_revindex { | |
79 | struct packed_git *p; | |
80 | unsigned long *revindex; | |
81 | } *pack_revindex = NULL; | |
82 | static int pack_revindex_hashsz = 0; | |
83 | ||
84 | /* | |
85 | * stats | |
86 | */ | |
87 | static int written = 0; | |
ab7cd7bb | 88 | static int written_delta = 0; |
3f9ac8d2 | 89 | static int reused = 0; |
ab7cd7bb | 90 | static int reused_delta = 0; |
3f9ac8d2 JH |
91 | |
92 | static int pack_revindex_ix(struct packed_git *p) | |
93 | { | |
94 | unsigned int ui = (unsigned int) p; | |
95 | int i; | |
96 | ||
97 | ui = ui ^ (ui >> 16); /* defeat structure alignment */ | |
98 | i = (int)(ui % pack_revindex_hashsz); | |
99 | while (pack_revindex[i].p) { | |
100 | if (pack_revindex[i].p == p) | |
101 | return i; | |
102 | if (++i == pack_revindex_hashsz) | |
103 | i = 0; | |
104 | } | |
105 | return -1 - i; | |
106 | } | |
107 | ||
108 | static void prepare_pack_ix(void) | |
109 | { | |
110 | int num; | |
111 | struct packed_git *p; | |
112 | for (num = 0, p = packed_git; p; p = p->next) | |
113 | num++; | |
114 | if (!num) | |
115 | return; | |
116 | pack_revindex_hashsz = num * 11; | |
117 | pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz); | |
118 | for (p = packed_git; p; p = p->next) { | |
119 | num = pack_revindex_ix(p); | |
120 | num = - 1 - num; | |
121 | pack_revindex[num].p = p; | |
122 | } | |
123 | /* revindex elements are lazily initialized */ | |
124 | } | |
125 | ||
126 | static int cmp_offset(const void *a_, const void *b_) | |
127 | { | |
128 | unsigned long a = *(unsigned long *) a_; | |
129 | unsigned long b = *(unsigned long *) b_; | |
130 | if (a < b) | |
131 | return -1; | |
132 | else if (a == b) | |
133 | return 0; | |
134 | else | |
135 | return 1; | |
136 | } | |
137 | ||
138 | /* | |
139 | * Ordered list of offsets of objects in the pack. | |
140 | */ | |
141 | static void prepare_pack_revindex(struct pack_revindex *rix) | |
142 | { | |
143 | struct packed_git *p = rix->p; | |
144 | int num_ent = num_packed_objects(p); | |
145 | int i; | |
146 | void *index = p->index_base + 256; | |
147 | ||
148 | rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1)); | |
149 | for (i = 0; i < num_ent; i++) { | |
150 | long hl = *((long *)(index + 24 * i)); | |
151 | rix->revindex[i] = ntohl(hl); | |
152 | } | |
153 | /* This knows the pack format -- the 20-byte trailer | |
154 | * follows immediately after the last object data. | |
155 | */ | |
156 | rix->revindex[num_ent] = p->pack_size - 20; | |
157 | qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset); | |
158 | } | |
159 | ||
160 | static unsigned long find_packed_object_size(struct packed_git *p, | |
161 | unsigned long ofs) | |
162 | { | |
163 | int num; | |
164 | int lo, hi; | |
165 | struct pack_revindex *rix; | |
166 | unsigned long *revindex; | |
167 | num = pack_revindex_ix(p); | |
168 | if (num < 0) | |
169 | die("internal error: pack revindex uninitialized"); | |
170 | rix = &pack_revindex[num]; | |
171 | if (!rix->revindex) | |
172 | prepare_pack_revindex(rix); | |
173 | revindex = rix->revindex; | |
174 | lo = 0; | |
175 | hi = num_packed_objects(p) + 1; | |
176 | do { | |
177 | int mi = (lo + hi) / 2; | |
178 | if (revindex[mi] == ofs) { | |
179 | return revindex[mi+1] - ofs; | |
180 | } | |
181 | else if (ofs < revindex[mi]) | |
182 | hi = mi; | |
183 | else | |
184 | lo = mi + 1; | |
185 | } while (lo < hi); | |
186 | die("internal error: pack revindex corrupt"); | |
187 | } | |
188 | ||
c323ac7d LT |
189 | static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) |
190 | { | |
191 | unsigned long othersize, delta_size; | |
192 | char type[10]; | |
193 | void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize); | |
194 | void *delta_buf; | |
195 | ||
196 | if (!otherbuf) | |
197 | die("unable to read %s", sha1_to_hex(entry->delta->sha1)); | |
8ee378a0 | 198 | delta_buf = diff_delta(otherbuf, othersize, |
dcde55bc | 199 | buf, size, &delta_size, 0); |
75c42d8c | 200 | if (!delta_buf || delta_size != entry->delta_size) |
c323ac7d LT |
201 | die("delta size changed"); |
202 | free(buf); | |
203 | free(otherbuf); | |
204 | return delta_buf; | |
205 | } | |
206 | ||
a733cb60 LT |
207 | /* |
208 | * The per-object header is a pretty dense thing, which is | |
209 | * - first byte: low four bits are "size", then three bits of "type", | |
210 | * and the high bit is "size continues". | |
211 | * - each byte afterwards: low seven bits are size continuation, | |
212 | * with the high bit being "size continues" | |
213 | */ | |
214 | static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr) | |
215 | { | |
01247d87 | 216 | int n = 1; |
a733cb60 LT |
217 | unsigned char c; |
218 | ||
219 | if (type < OBJ_COMMIT || type > OBJ_DELTA) | |
220 | die("bad type %d", type); | |
221 | ||
01247d87 LT |
222 | c = (type << 4) | (size & 15); |
223 | size >>= 4; | |
224 | while (size) { | |
a733cb60 | 225 | *hdr++ = c | 0x80; |
01247d87 LT |
226 | c = size & 0x7f; |
227 | size >>= 7; | |
228 | n++; | |
a733cb60 LT |
229 | } |
230 | *hdr = c; | |
231 | return n; | |
232 | } | |
233 | ||
c38138cd | 234 | static unsigned long write_object(struct sha1file *f, struct object_entry *entry) |
c323ac7d LT |
235 | { |
236 | unsigned long size; | |
237 | char type[10]; | |
3f9ac8d2 | 238 | void *buf; |
a733cb60 | 239 | unsigned char header[10]; |
c323ac7d | 240 | unsigned hdrlen, datalen; |
a733cb60 | 241 | enum object_type obj_type; |
ab7cd7bb | 242 | int to_reuse = 0; |
c323ac7d | 243 | |
a733cb60 | 244 | obj_type = entry->type; |
ab7cd7bb JH |
245 | if (! entry->in_pack) |
246 | to_reuse = 0; /* can't reuse what we don't have */ | |
247 | else if (obj_type == OBJ_DELTA) | |
248 | to_reuse = 1; /* check_object() decided it for us */ | |
249 | else if (obj_type != entry->in_pack_type) | |
250 | to_reuse = 0; /* pack has delta which is unusable */ | |
251 | else if (entry->delta) | |
252 | to_reuse = 0; /* we want to pack afresh */ | |
253 | else | |
254 | to_reuse = 1; /* we have it in-pack undeltified, | |
255 | * and we do not need to deltify it. | |
256 | */ | |
257 | ||
258 | if (! to_reuse) { | |
3f9ac8d2 JH |
259 | buf = read_sha1_file(entry->sha1, type, &size); |
260 | if (!buf) | |
261 | die("unable to read %s", sha1_to_hex(entry->sha1)); | |
262 | if (size != entry->size) | |
263 | die("object %s size inconsistency (%lu vs %lu)", | |
264 | sha1_to_hex(entry->sha1), size, entry->size); | |
265 | if (entry->delta) { | |
266 | buf = delta_against(buf, size, entry); | |
267 | size = entry->delta_size; | |
268 | obj_type = OBJ_DELTA; | |
269 | } | |
270 | /* | |
271 | * The object header is a byte of 'type' followed by zero or | |
272 | * more bytes of length. For deltas, the 20 bytes of delta | |
273 | * sha1 follows that. | |
274 | */ | |
275 | hdrlen = encode_header(obj_type, size, header); | |
276 | sha1write(f, header, hdrlen); | |
277 | ||
278 | if (entry->delta) { | |
279 | sha1write(f, entry->delta, 20); | |
280 | hdrlen += 20; | |
281 | } | |
282 | datalen = sha1write_compressed(f, buf, size); | |
283 | free(buf); | |
c323ac7d | 284 | } |
3f9ac8d2 JH |
285 | else { |
286 | struct packed_git *p = entry->in_pack; | |
287 | use_packed_git(p); | |
288 | ||
289 | datalen = find_packed_object_size(p, entry->in_pack_offset); | |
290 | buf = p->pack_base + entry->in_pack_offset; | |
291 | sha1write(f, buf, datalen); | |
292 | unuse_packed_git(p); | |
293 | hdrlen = 0; /* not really */ | |
ab7cd7bb JH |
294 | if (obj_type == OBJ_DELTA) |
295 | reused_delta++; | |
3f9ac8d2 | 296 | reused++; |
a733cb60 | 297 | } |
ab7cd7bb JH |
298 | if (obj_type == OBJ_DELTA) |
299 | written_delta++; | |
3f9ac8d2 | 300 | written++; |
c323ac7d LT |
301 | return hdrlen + datalen; |
302 | } | |
303 | ||
9d5ab962 JH |
304 | static unsigned long write_one(struct sha1file *f, |
305 | struct object_entry *e, | |
306 | unsigned long offset) | |
307 | { | |
308 | if (e->offset) | |
309 | /* offset starts from header size and cannot be zero | |
310 | * if it is written already. | |
311 | */ | |
312 | return offset; | |
313 | e->offset = offset; | |
314 | offset += write_object(f, e); | |
82f9d58a | 315 | /* if we are deltified, write out its base object. */ |
9d5ab962 JH |
316 | if (e->delta) |
317 | offset = write_one(f, e->delta, offset); | |
318 | return offset; | |
319 | } | |
320 | ||
c323ac7d LT |
321 | static void write_pack_file(void) |
322 | { | |
323 | int i; | |
d22b9290 | 324 | struct sha1file *f; |
a733cb60 | 325 | unsigned long offset; |
a733cb60 | 326 | struct pack_header hdr; |
183bdb2c JH |
327 | unsigned last_percent = 999; |
328 | int do_progress = 0; | |
c323ac7d | 329 | |
d22b9290 LT |
330 | if (!base_name) |
331 | f = sha1fd(1, "<stdout>"); | |
183bdb2c JH |
332 | else { |
333 | f = sha1create("%s-%s.%s", base_name, | |
334 | sha1_to_hex(object_list_sha1), "pack"); | |
335 | do_progress = progress; | |
336 | } | |
337 | if (do_progress) | |
338 | fprintf(stderr, "Writing %d objects.\n", nr_objects); | |
339 | ||
a733cb60 | 340 | hdr.hdr_signature = htonl(PACK_SIGNATURE); |
01247d87 | 341 | hdr.hdr_version = htonl(PACK_VERSION); |
a733cb60 LT |
342 | hdr.hdr_entries = htonl(nr_objects); |
343 | sha1write(f, &hdr, sizeof(hdr)); | |
344 | offset = sizeof(hdr); | |
5e8dc750 | 345 | for (i = 0; i < nr_objects; i++) { |
9d5ab962 | 346 | offset = write_one(f, objects + i, offset); |
183bdb2c JH |
347 | if (do_progress) { |
348 | unsigned percent = written * 100 / nr_objects; | |
349 | if (progress_update || percent != last_percent) { | |
350 | fprintf(stderr, "%4u%% (%u/%u) done\r", | |
351 | percent, written, nr_objects); | |
352 | progress_update = 0; | |
353 | last_percent = percent; | |
354 | } | |
5e8dc750 NP |
355 | } |
356 | } | |
183bdb2c JH |
357 | if (do_progress) |
358 | fputc('\n', stderr); | |
9d5ab962 | 359 | |
e1808845 | 360 | sha1close(f, pack_file_sha1, 1); |
c323ac7d LT |
361 | } |
362 | ||
363 | static void write_index_file(void) | |
364 | { | |
365 | int i; | |
5f3de58f | 366 | struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx"); |
c323ac7d LT |
367 | struct object_entry **list = sorted_by_sha; |
368 | struct object_entry **last = list + nr_objects; | |
369 | unsigned int array[256]; | |
370 | ||
371 | /* | |
372 | * Write the first-level table (the list is sorted, | |
373 | * but we use a 256-entry lookup to be able to avoid | |
e1808845 | 374 | * having to do eight extra binary search iterations). |
c323ac7d LT |
375 | */ |
376 | for (i = 0; i < 256; i++) { | |
377 | struct object_entry **next = list; | |
378 | while (next < last) { | |
379 | struct object_entry *entry = *next; | |
380 | if (entry->sha1[0] != i) | |
381 | break; | |
382 | next++; | |
383 | } | |
384 | array[i] = htonl(next - sorted_by_sha); | |
385 | list = next; | |
386 | } | |
c38138cd | 387 | sha1write(f, array, 256 * sizeof(int)); |
c323ac7d LT |
388 | |
389 | /* | |
390 | * Write the actual SHA1 entries.. | |
391 | */ | |
392 | list = sorted_by_sha; | |
49397104 | 393 | for (i = 0; i < nr_objects; i++) { |
c323ac7d LT |
394 | struct object_entry *entry = *list++; |
395 | unsigned int offset = htonl(entry->offset); | |
c38138cd LT |
396 | sha1write(f, &offset, 4); |
397 | sha1write(f, entry->sha1, 20); | |
c323ac7d | 398 | } |
e1808845 LT |
399 | sha1write(f, pack_file_sha1, 20); |
400 | sha1close(f, NULL, 1); | |
c323ac7d LT |
401 | } |
402 | ||
5f3de58f | 403 | static int add_object_entry(unsigned char *sha1, unsigned int hash) |
c323ac7d LT |
404 | { |
405 | unsigned int idx = nr_objects; | |
406 | struct object_entry *entry; | |
3f9ac8d2 | 407 | struct packed_git *p; |
ab7cd7bb JH |
408 | unsigned int found_offset = 0; |
409 | struct packed_git *found_pack = NULL; | |
3f9ac8d2 | 410 | |
3f9ac8d2 JH |
411 | for (p = packed_git; p; p = p->next) { |
412 | struct pack_entry e; | |
413 | if (find_pack_entry_one(sha1, &e, p)) { | |
414 | if (incremental) | |
415 | return 0; | |
416 | if (local && !p->pack_local) | |
417 | return 0; | |
418 | if (!found_pack) { | |
419 | found_offset = e.offset; | |
420 | found_pack = e.p; | |
64560374 LT |
421 | } |
422 | } | |
423 | } | |
eb019375 | 424 | |
c323ac7d LT |
425 | if (idx >= nr_alloc) { |
426 | unsigned int needed = (idx + 1024) * 3 / 2; | |
427 | objects = xrealloc(objects, needed * sizeof(*entry)); | |
428 | nr_alloc = needed; | |
429 | } | |
430 | entry = objects + idx; | |
431 | memset(entry, 0, sizeof(*entry)); | |
432 | memcpy(entry->sha1, sha1, 20); | |
27225f2e | 433 | entry->hash = hash; |
3f9ac8d2 JH |
434 | if (found_pack) { |
435 | entry->in_pack = found_pack; | |
436 | entry->in_pack_offset = found_offset; | |
437 | } | |
c323ac7d | 438 | nr_objects = idx+1; |
5f3de58f | 439 | return 1; |
c323ac7d LT |
440 | } |
441 | ||
3f9ac8d2 JH |
442 | static int locate_object_entry_hash(unsigned char *sha1) |
443 | { | |
444 | int i; | |
445 | unsigned int ui; | |
446 | memcpy(&ui, sha1, sizeof(unsigned int)); | |
447 | i = ui % object_ix_hashsz; | |
448 | while (0 < object_ix[i]) { | |
449 | if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20)) | |
450 | return i; | |
451 | if (++i == object_ix_hashsz) | |
452 | i = 0; | |
453 | } | |
454 | return -1 - i; | |
455 | } | |
456 | ||
457 | static struct object_entry *locate_object_entry(unsigned char *sha1) | |
458 | { | |
459 | int i = locate_object_entry_hash(sha1); | |
460 | if (0 <= i) | |
461 | return &objects[object_ix[i]-1]; | |
462 | return NULL; | |
463 | } | |
464 | ||
c323ac7d LT |
465 | static void check_object(struct object_entry *entry) |
466 | { | |
36e4d74a JH |
467 | char type[20]; |
468 | ||
3f9ac8d2 | 469 | if (entry->in_pack) { |
ab7cd7bb JH |
470 | unsigned char base[20]; |
471 | unsigned long size; | |
472 | struct object_entry *base_entry; | |
473 | ||
474 | /* We want in_pack_type even if we do not reuse delta. | |
475 | * There is no point not reusing non-delta representations. | |
476 | */ | |
477 | check_reuse_pack_delta(entry->in_pack, | |
478 | entry->in_pack_offset, | |
479 | base, &size, | |
480 | &entry->in_pack_type); | |
481 | ||
3f9ac8d2 JH |
482 | /* Check if it is delta, and the base is also an object |
483 | * we are going to pack. If so we will reuse the existing | |
484 | * delta. | |
485 | */ | |
ab7cd7bb JH |
486 | if (!no_reuse_delta && |
487 | entry->in_pack_type == OBJ_DELTA && | |
3f9ac8d2 | 488 | (base_entry = locate_object_entry(base))) { |
ab7cd7bb JH |
489 | |
490 | /* Depth value does not matter - find_deltas() | |
491 | * will never consider reused delta as the | |
492 | * base object to deltify other objects | |
493 | * against, in order to avoid circular deltas. | |
3f9ac8d2 | 494 | */ |
ab7cd7bb JH |
495 | |
496 | /* uncompressed size of the delta data */ | |
3f9ac8d2 JH |
497 | entry->size = entry->delta_size = size; |
498 | entry->delta = base_entry; | |
499 | entry->type = OBJ_DELTA; | |
ab7cd7bb | 500 | |
15b4d577 JH |
501 | entry->delta_sibling = base_entry->delta_child; |
502 | base_entry->delta_child = entry; | |
ab7cd7bb | 503 | |
3f9ac8d2 JH |
504 | return; |
505 | } | |
506 | /* Otherwise we would do the usual */ | |
36e4d74a | 507 | } |
3f9ac8d2 JH |
508 | |
509 | if (sha1_object_info(entry->sha1, type, &entry->size)) | |
36e4d74a JH |
510 | die("unable to get type of object %s", |
511 | sha1_to_hex(entry->sha1)); | |
3f9ac8d2 JH |
512 | |
513 | if (!strcmp(type, "commit")) { | |
514 | entry->type = OBJ_COMMIT; | |
515 | } else if (!strcmp(type, "tree")) { | |
516 | entry->type = OBJ_TREE; | |
517 | } else if (!strcmp(type, "blob")) { | |
518 | entry->type = OBJ_BLOB; | |
519 | } else if (!strcmp(type, "tag")) { | |
520 | entry->type = OBJ_TAG; | |
521 | } else | |
522 | die("unable to pack object %s of type %s", | |
523 | sha1_to_hex(entry->sha1), type); | |
524 | } | |
525 | ||
526 | static void hash_objects(void) | |
527 | { | |
528 | int i; | |
529 | struct object_entry *oe; | |
530 | ||
531 | object_ix_hashsz = nr_objects * 2; | |
532 | object_ix = xcalloc(sizeof(int), object_ix_hashsz); | |
533 | for (i = 0, oe = objects; i < nr_objects; i++, oe++) { | |
534 | int ix = locate_object_entry_hash(oe->sha1); | |
535 | if (0 <= ix) { | |
536 | error("the same object '%s' added twice", | |
537 | sha1_to_hex(oe->sha1)); | |
538 | continue; | |
539 | } | |
540 | ix = -1 - ix; | |
541 | object_ix[ix] = i + 1; | |
542 | } | |
c323ac7d LT |
543 | } |
544 | ||
15b4d577 JH |
545 | static unsigned int check_delta_limit(struct object_entry *me, unsigned int n) |
546 | { | |
547 | struct object_entry *child = me->delta_child; | |
548 | unsigned int m = n; | |
549 | while (child) { | |
550 | unsigned int c = check_delta_limit(child, n + 1); | |
551 | if (m < c) | |
552 | m = c; | |
553 | child = child->delta_sibling; | |
554 | } | |
555 | return m; | |
556 | } | |
557 | ||
c323ac7d LT |
558 | static void get_object_details(void) |
559 | { | |
560 | int i; | |
15b4d577 | 561 | struct object_entry *entry; |
c323ac7d | 562 | |
3f9ac8d2 JH |
563 | hash_objects(); |
564 | prepare_pack_ix(); | |
15b4d577 JH |
565 | for (i = 0, entry = objects; i < nr_objects; i++, entry++) |
566 | check_object(entry); | |
567 | for (i = 0, entry = objects; i < nr_objects; i++, entry++) | |
568 | if (!entry->delta && entry->delta_child) | |
569 | entry->delta_limit = | |
570 | check_delta_limit(entry, 1); | |
c323ac7d LT |
571 | } |
572 | ||
573 | typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *); | |
574 | ||
575 | static entry_sort_t current_sort; | |
576 | ||
577 | static int sort_comparator(const void *_a, const void *_b) | |
578 | { | |
579 | struct object_entry *a = *(struct object_entry **)_a; | |
580 | struct object_entry *b = *(struct object_entry **)_b; | |
581 | return current_sort(a,b); | |
582 | } | |
583 | ||
584 | static struct object_entry **create_sorted_list(entry_sort_t sort) | |
585 | { | |
586 | struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *)); | |
587 | int i; | |
588 | ||
589 | for (i = 0; i < nr_objects; i++) | |
590 | list[i] = objects + i; | |
591 | current_sort = sort; | |
592 | qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator); | |
593 | return list; | |
594 | } | |
595 | ||
596 | static int sha1_sort(const struct object_entry *a, const struct object_entry *b) | |
597 | { | |
598 | return memcmp(a->sha1, b->sha1, 20); | |
599 | } | |
600 | ||
601 | static int type_size_sort(const struct object_entry *a, const struct object_entry *b) | |
602 | { | |
603 | if (a->type < b->type) | |
604 | return -1; | |
605 | if (a->type > b->type) | |
606 | return 1; | |
27225f2e LT |
607 | if (a->hash < b->hash) |
608 | return -1; | |
609 | if (a->hash > b->hash) | |
610 | return 1; | |
c323ac7d LT |
611 | if (a->size < b->size) |
612 | return -1; | |
613 | if (a->size > b->size) | |
614 | return 1; | |
615 | return a < b ? -1 : (a > b); | |
616 | } | |
617 | ||
618 | struct unpacked { | |
619 | struct object_entry *entry; | |
620 | void *data; | |
621 | }; | |
622 | ||
623 | /* | |
521a4f4c LT |
624 | * We search for deltas _backwards_ in a list sorted by type and |
625 | * by size, so that we see progressively smaller and smaller files. | |
626 | * That's because we prefer deltas to be from the bigger file | |
627 | * to the smaller - deletes are potentially cheaper, but perhaps | |
628 | * more importantly, the bigger file is likely the more recent | |
629 | * one. | |
c323ac7d | 630 | */ |
d116a45a | 631 | static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth) |
c323ac7d LT |
632 | { |
633 | struct object_entry *cur_entry = cur->entry; | |
634 | struct object_entry *old_entry = old->entry; | |
521a4f4c | 635 | unsigned long size, oldsize, delta_size, sizediff; |
75c42d8c | 636 | long max_size; |
c323ac7d LT |
637 | void *delta_buf; |
638 | ||
639 | /* Don't bother doing diffs between different types */ | |
640 | if (cur_entry->type != old_entry->type) | |
641 | return -1; | |
642 | ||
ab7cd7bb JH |
643 | /* If the current object is at edge, take the depth the objects |
644 | * that depend on the current object into account -- otherwise | |
645 | * they would become too deep. | |
646 | */ | |
15b4d577 JH |
647 | if (cur_entry->delta_child) { |
648 | if (max_depth <= cur_entry->delta_limit) | |
649 | return 0; | |
650 | max_depth -= cur_entry->delta_limit; | |
651 | } | |
ab7cd7bb | 652 | |
c323ac7d | 653 | size = cur_entry->size; |
75c42d8c LT |
654 | if (size < 50) |
655 | return -1; | |
c323ac7d | 656 | oldsize = old_entry->size; |
521a4f4c LT |
657 | sizediff = oldsize > size ? oldsize - size : size - oldsize; |
658 | if (sizediff > size / 8) | |
c323ac7d | 659 | return -1; |
d116a45a LT |
660 | if (old_entry->depth >= max_depth) |
661 | return 0; | |
c323ac7d LT |
662 | |
663 | /* | |
664 | * NOTE! | |
665 | * | |
666 | * We always delta from the bigger to the smaller, since that's | |
667 | * more space-efficient (deletes don't have to say _what_ they | |
668 | * delete). | |
669 | */ | |
75c42d8c LT |
670 | max_size = size / 2 - 20; |
671 | if (cur_entry->delta) | |
672 | max_size = cur_entry->delta_size-1; | |
27225f2e LT |
673 | if (sizediff >= max_size) |
674 | return -1; | |
8ee378a0 JH |
675 | delta_buf = diff_delta(old->data, oldsize, |
676 | cur->data, size, &delta_size, max_size); | |
c323ac7d | 677 | if (!delta_buf) |
75c42d8c LT |
678 | return 0; |
679 | cur_entry->delta = old_entry; | |
680 | cur_entry->delta_size = delta_size; | |
d116a45a | 681 | cur_entry->depth = old_entry->depth + 1; |
c323ac7d | 682 | free(delta_buf); |
eb41ab11 | 683 | return 0; |
c323ac7d LT |
684 | } |
685 | ||
b2504a0d NP |
686 | static void progress_interval(int signum) |
687 | { | |
b2504a0d NP |
688 | progress_update = 1; |
689 | } | |
690 | ||
d116a45a | 691 | static void find_deltas(struct object_entry **list, int window, int depth) |
c323ac7d | 692 | { |
521a4f4c | 693 | int i, idx; |
c323ac7d LT |
694 | unsigned int array_size = window * sizeof(struct unpacked); |
695 | struct unpacked *array = xmalloc(array_size); | |
183bdb2c JH |
696 | unsigned processed = 0; |
697 | unsigned last_percent = 999; | |
c323ac7d LT |
698 | |
699 | memset(array, 0, array_size); | |
521a4f4c LT |
700 | i = nr_objects; |
701 | idx = 0; | |
183bdb2c JH |
702 | if (progress) |
703 | fprintf(stderr, "Deltifying %d objects.\n", nr_objects); | |
21fcd1bd | 704 | |
521a4f4c | 705 | while (--i >= 0) { |
c323ac7d LT |
706 | struct object_entry *entry = list[i]; |
707 | struct unpacked *n = array + idx; | |
708 | unsigned long size; | |
709 | char type[10]; | |
710 | int j; | |
711 | ||
183bdb2c JH |
712 | processed++; |
713 | if (progress) { | |
714 | unsigned percent = processed * 100 / nr_objects; | |
715 | if (percent != last_percent || progress_update) { | |
716 | fprintf(stderr, "%4u%% (%u/%u) done\r", | |
717 | percent, processed, nr_objects); | |
718 | progress_update = 0; | |
719 | last_percent = percent; | |
720 | } | |
21fcd1bd | 721 | } |
3f9ac8d2 JH |
722 | |
723 | if (entry->delta) | |
724 | /* This happens if we decided to reuse existing | |
ab7cd7bb | 725 | * delta from a pack. "!no_reuse_delta &&" is implied. |
3f9ac8d2 JH |
726 | */ |
727 | continue; | |
728 | ||
c323ac7d LT |
729 | free(n->data); |
730 | n->entry = entry; | |
731 | n->data = read_sha1_file(entry->sha1, type, &size); | |
732 | if (size != entry->size) | |
733 | die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); | |
ab7cd7bb | 734 | |
78817c15 LT |
735 | j = window; |
736 | while (--j > 0) { | |
737 | unsigned int other_idx = idx + j; | |
c323ac7d | 738 | struct unpacked *m; |
78817c15 LT |
739 | if (other_idx >= window) |
740 | other_idx -= window; | |
c323ac7d LT |
741 | m = array + other_idx; |
742 | if (!m->entry) | |
743 | break; | |
d116a45a | 744 | if (try_delta(n, m, depth) < 0) |
c323ac7d LT |
745 | break; |
746 | } | |
521a4f4c LT |
747 | idx++; |
748 | if (idx >= window) | |
749 | idx = 0; | |
c323ac7d | 750 | } |
adee7bdf | 751 | |
b2504a0d NP |
752 | if (progress) |
753 | fputc('\n', stderr); | |
754 | ||
adee7bdf SV |
755 | for (i = 0; i < window; ++i) |
756 | free(array[i].data); | |
757 | free(array); | |
c323ac7d LT |
758 | } |
759 | ||
f3123c4a JH |
760 | static void prepare_pack(int window, int depth) |
761 | { | |
3f9ac8d2 | 762 | get_object_details(); |
f3123c4a JH |
763 | sorted_by_type = create_sorted_list(type_size_sort); |
764 | if (window && depth) | |
765 | find_deltas(sorted_by_type, window+1, depth); | |
f3123c4a JH |
766 | } |
767 | ||
768 | static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout) | |
769 | { | |
770 | static const char cache[] = "pack-cache/pack-%s.%s"; | |
771 | char *cached_pack, *cached_idx; | |
772 | int ifd, ofd, ifd_ix = -1; | |
773 | ||
774 | cached_pack = git_path(cache, sha1_to_hex(sha1), "pack"); | |
775 | ifd = open(cached_pack, O_RDONLY); | |
776 | if (ifd < 0) | |
777 | return 0; | |
778 | ||
779 | if (!pack_to_stdout) { | |
780 | cached_idx = git_path(cache, sha1_to_hex(sha1), "idx"); | |
781 | ifd_ix = open(cached_idx, O_RDONLY); | |
782 | if (ifd_ix < 0) { | |
783 | close(ifd); | |
784 | return 0; | |
785 | } | |
786 | } | |
787 | ||
ab7cd7bb JH |
788 | if (progress) |
789 | fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects, | |
790 | sha1_to_hex(sha1)); | |
f3123c4a JH |
791 | |
792 | if (pack_to_stdout) { | |
793 | if (copy_fd(ifd, 1)) | |
794 | exit(1); | |
795 | close(ifd); | |
796 | } | |
797 | else { | |
798 | char name[PATH_MAX]; | |
799 | snprintf(name, sizeof(name), | |
800 | "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack"); | |
801 | ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666); | |
802 | if (ofd < 0) | |
803 | die("unable to open %s (%s)", name, strerror(errno)); | |
804 | if (copy_fd(ifd, ofd)) | |
805 | exit(1); | |
806 | close(ifd); | |
807 | ||
808 | snprintf(name, sizeof(name), | |
809 | "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx"); | |
810 | ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666); | |
811 | if (ofd < 0) | |
812 | die("unable to open %s (%s)", name, strerror(errno)); | |
813 | if (copy_fd(ifd_ix, ofd)) | |
814 | exit(1); | |
815 | close(ifd_ix); | |
816 | puts(sha1_to_hex(sha1)); | |
817 | } | |
818 | ||
819 | return 1; | |
820 | } | |
821 | ||
fb7a6531 LT |
822 | static void setup_progress_signal(void) |
823 | { | |
824 | struct sigaction sa; | |
825 | struct itimerval v; | |
826 | ||
827 | memset(&sa, 0, sizeof(sa)); | |
828 | sa.sa_handler = progress_interval; | |
829 | sigemptyset(&sa.sa_mask); | |
830 | sa.sa_flags = SA_RESTART; | |
831 | sigaction(SIGALRM, &sa, NULL); | |
832 | ||
833 | v.it_interval.tv_sec = 1; | |
834 | v.it_interval.tv_usec = 0; | |
835 | v.it_value = v.it_interval; | |
836 | setitimer(ITIMER_REAL, &v, NULL); | |
837 | } | |
838 | ||
c323ac7d LT |
839 | int main(int argc, char **argv) |
840 | { | |
5f3de58f | 841 | SHA_CTX ctx; |
27225f2e | 842 | char line[PATH_MAX + 20]; |
d22b9290 | 843 | int window = 10, depth = 10, pack_to_stdout = 0; |
84c8d8ae | 844 | struct object_entry **list; |
c323ac7d LT |
845 | int i; |
846 | ||
53228a5f JH |
847 | setup_git_directory(); |
848 | ||
c323ac7d LT |
849 | for (i = 1; i < argc; i++) { |
850 | const char *arg = argv[i]; | |
851 | ||
852 | if (*arg == '-') { | |
1c4a2912 LT |
853 | if (!strcmp("--non-empty", arg)) { |
854 | non_empty = 1; | |
855 | continue; | |
856 | } | |
64560374 LT |
857 | if (!strcmp("--local", arg)) { |
858 | local = 1; | |
859 | continue; | |
860 | } | |
eb019375 LT |
861 | if (!strcmp("--incremental", arg)) { |
862 | incremental = 1; | |
863 | continue; | |
864 | } | |
c323ac7d LT |
865 | if (!strncmp("--window=", arg, 9)) { |
866 | char *end; | |
f846bbff | 867 | window = strtoul(arg+9, &end, 0); |
c323ac7d LT |
868 | if (!arg[9] || *end) |
869 | usage(pack_usage); | |
870 | continue; | |
871 | } | |
d116a45a LT |
872 | if (!strncmp("--depth=", arg, 8)) { |
873 | char *end; | |
874 | depth = strtoul(arg+8, &end, 0); | |
875 | if (!arg[8] || *end) | |
876 | usage(pack_usage); | |
877 | continue; | |
878 | } | |
024701f1 JH |
879 | if (!strcmp("-q", arg)) { |
880 | progress = 0; | |
881 | continue; | |
882 | } | |
ab7cd7bb JH |
883 | if (!strcmp("--no-reuse-delta", arg)) { |
884 | no_reuse_delta = 1; | |
885 | continue; | |
886 | } | |
d22b9290 LT |
887 | if (!strcmp("--stdout", arg)) { |
888 | pack_to_stdout = 1; | |
889 | continue; | |
890 | } | |
c323ac7d LT |
891 | usage(pack_usage); |
892 | } | |
893 | if (base_name) | |
894 | usage(pack_usage); | |
895 | base_name = arg; | |
896 | } | |
897 | ||
d22b9290 | 898 | if (pack_to_stdout != !base_name) |
c323ac7d LT |
899 | usage(pack_usage); |
900 | ||
64560374 | 901 | prepare_packed_git(); |
b2504a0d | 902 | |
21fcd1bd JH |
903 | if (progress) { |
904 | fprintf(stderr, "Generating pack...\n"); | |
fb7a6531 | 905 | setup_progress_signal(); |
21fcd1bd | 906 | } |
b2504a0d | 907 | |
da93d12b | 908 | for (;;) { |
27225f2e LT |
909 | unsigned int hash; |
910 | char *p; | |
c323ac7d | 911 | unsigned char sha1[20]; |
27225f2e | 912 | |
da93d12b LT |
913 | if (!fgets(line, sizeof(line), stdin)) { |
914 | if (feof(stdin)) | |
915 | break; | |
916 | if (!ferror(stdin)) | |
917 | die("fgets returned NULL, not EOF, not error!"); | |
918 | if (errno == EINTR) | |
919 | continue; | |
920 | die("fgets: %s", strerror(errno)); | |
921 | } | |
922 | ||
b2504a0d | 923 | if (progress_update) { |
21fcd1bd | 924 | fprintf(stderr, "Counting objects...%d\r", nr_objects); |
b2504a0d | 925 | progress_update = 0; |
21fcd1bd | 926 | } |
c323ac7d | 927 | if (get_sha1_hex(line, sha1)) |
ef07618f | 928 | die("expected sha1, got garbage:\n %s", line); |
27225f2e LT |
929 | hash = 0; |
930 | p = line+40; | |
931 | while (*p) { | |
932 | unsigned char c = *p++; | |
933 | if (isspace(c)) | |
934 | continue; | |
935 | hash = hash * 11 + c; | |
936 | } | |
84c8d8ae | 937 | add_object_entry(sha1, hash); |
c323ac7d | 938 | } |
21fcd1bd JH |
939 | if (progress) |
940 | fprintf(stderr, "Done counting %d objects.\n", nr_objects); | |
1c4a2912 LT |
941 | if (non_empty && !nr_objects) |
942 | return 0; | |
c323ac7d LT |
943 | |
944 | sorted_by_sha = create_sorted_list(sha1_sort); | |
84c8d8ae JH |
945 | SHA1_Init(&ctx); |
946 | list = sorted_by_sha; | |
947 | for (i = 0; i < nr_objects; i++) { | |
948 | struct object_entry *entry = *list++; | |
949 | SHA1_Update(&ctx, entry->sha1, 20); | |
950 | } | |
951 | SHA1_Final(object_list_sha1, &ctx); | |
952 | ||
f3123c4a JH |
953 | if (reuse_cached_pack(object_list_sha1, pack_to_stdout)) |
954 | ; | |
955 | else { | |
956 | prepare_pack(window, depth); | |
5e8dc750 NP |
957 | if (progress && pack_to_stdout) { |
958 | /* the other end usually displays progress itself */ | |
959 | struct itimerval v = {{0,},}; | |
960 | setitimer(ITIMER_REAL, &v, NULL); | |
961 | signal(SIGALRM, SIG_IGN ); | |
962 | progress_update = 0; | |
963 | } | |
964 | write_pack_file(); | |
f3123c4a JH |
965 | if (!pack_to_stdout) { |
966 | write_index_file(); | |
967 | puts(sha1_to_hex(object_list_sha1)); | |
968 | } | |
5f3de58f | 969 | } |
ab7cd7bb JH |
970 | if (progress) |
971 | fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n", | |
972 | nr_objects, written, written_delta, reused, reused_delta); | |
c323ac7d LT |
973 | return 0; |
974 | } |