]>
Commit | Line | Data |
---|---|---|
c323ac7d LT |
1 | #include "cache.h" |
2 | #include "object.h" | |
3 | #include "delta.h" | |
a733cb60 | 4 | #include "pack.h" |
c38138cd | 5 | #include "csum-file.h" |
21fcd1bd | 6 | #include <sys/time.h> |
c323ac7d | 7 | |
ab7cd7bb | 8 | static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] {--stdout | base-name} < object-list"; |
c323ac7d | 9 | |
c323ac7d LT |
10 | struct object_entry { |
11 | unsigned char sha1[20]; | |
3f9ac8d2 JH |
12 | unsigned long size; /* uncompressed size */ |
13 | unsigned long offset; /* offset into the final pack file (nonzero if already written) */ | |
14 | unsigned int depth; /* delta depth */ | |
15 | unsigned int hash; /* name hint hash */ | |
a733cb60 | 16 | enum object_type type; |
ab7cd7bb JH |
17 | unsigned char edge; /* reused delta chain points at this entry. */ |
18 | enum object_type in_pack_type; /* could be delta */ | |
3f9ac8d2 JH |
19 | unsigned long delta_size; /* delta data size (uncompressed) */ |
20 | struct object_entry *delta; /* delta base object */ | |
21 | struct packed_git *in_pack; /* already in pack */ | |
3f9ac8d2 | 22 | unsigned int in_pack_offset; |
c323ac7d LT |
23 | }; |
24 | ||
3f9ac8d2 JH |
25 | /* |
26 | * Objects we are going to pack are colected in objects array (dynamically | |
27 | * expanded). nr_objects & nr_alloc controls this array. They are stored | |
28 | * in the order we see -- typically rev-list --objects order that gives us | |
29 | * nice "minimum seek" order. | |
30 | * | |
31 | * sorted-by-sha ans sorted-by-type are arrays of pointers that point at | |
32 | * elements in the objects array. The former is used to build the pack | |
33 | * index (lists object names in the ascending order to help offset lookup), | |
34 | * and the latter is used to group similar things together by try_delta() | |
35 | * heuristics. | |
36 | */ | |
37 | ||
5f3de58f | 38 | static unsigned char object_list_sha1[20]; |
1c4a2912 | 39 | static int non_empty = 0; |
ab7cd7bb | 40 | static int no_reuse_delta = 0; |
64560374 | 41 | static int local = 0; |
eb019375 | 42 | static int incremental = 0; |
c323ac7d LT |
43 | static struct object_entry **sorted_by_sha, **sorted_by_type; |
44 | static struct object_entry *objects = NULL; | |
45 | static int nr_objects = 0, nr_alloc = 0; | |
c323ac7d | 46 | static const char *base_name; |
e1808845 | 47 | static unsigned char pack_file_sha1[20]; |
024701f1 | 48 | static int progress = 1; |
c323ac7d | 49 | |
3f9ac8d2 JH |
50 | /* |
51 | * The object names in objects array are hashed with this hashtable, | |
52 | * to help looking up the entry by object name. Binary search from | |
53 | * sorted_by_sha is also possible but this was easier to code and faster. | |
54 | * This hashtable is built after all the objects are seen. | |
55 | */ | |
56 | static int *object_ix = NULL; | |
57 | static int object_ix_hashsz = 0; | |
58 | ||
59 | /* | |
60 | * Pack index for existing packs give us easy access to the offsets into | |
61 | * corresponding pack file where each object's data starts, but the entries | |
62 | * do not store the size of the compressed representation (uncompressed | |
63 | * size is easily available by examining the pack entry header). We build | |
64 | * a hashtable of existing packs (pack_revindex), and keep reverse index | |
65 | * here -- pack index file is sorted by object name mapping to offset; this | |
66 | * pack_revindex[].revindex array is an ordered list of offsets, so if you | |
67 | * know the offset of an object, next offset is where its packed | |
68 | * representation ends. | |
69 | */ | |
70 | struct pack_revindex { | |
71 | struct packed_git *p; | |
72 | unsigned long *revindex; | |
73 | } *pack_revindex = NULL; | |
74 | static int pack_revindex_hashsz = 0; | |
75 | ||
76 | /* | |
77 | * stats | |
78 | */ | |
79 | static int written = 0; | |
ab7cd7bb | 80 | static int written_delta = 0; |
3f9ac8d2 | 81 | static int reused = 0; |
ab7cd7bb | 82 | static int reused_delta = 0; |
3f9ac8d2 JH |
83 | |
84 | static int pack_revindex_ix(struct packed_git *p) | |
85 | { | |
86 | unsigned int ui = (unsigned int) p; | |
87 | int i; | |
88 | ||
89 | ui = ui ^ (ui >> 16); /* defeat structure alignment */ | |
90 | i = (int)(ui % pack_revindex_hashsz); | |
91 | while (pack_revindex[i].p) { | |
92 | if (pack_revindex[i].p == p) | |
93 | return i; | |
94 | if (++i == pack_revindex_hashsz) | |
95 | i = 0; | |
96 | } | |
97 | return -1 - i; | |
98 | } | |
99 | ||
100 | static void prepare_pack_ix(void) | |
101 | { | |
102 | int num; | |
103 | struct packed_git *p; | |
104 | for (num = 0, p = packed_git; p; p = p->next) | |
105 | num++; | |
106 | if (!num) | |
107 | return; | |
108 | pack_revindex_hashsz = num * 11; | |
109 | pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz); | |
110 | for (p = packed_git; p; p = p->next) { | |
111 | num = pack_revindex_ix(p); | |
112 | num = - 1 - num; | |
113 | pack_revindex[num].p = p; | |
114 | } | |
115 | /* revindex elements are lazily initialized */ | |
116 | } | |
117 | ||
118 | static int cmp_offset(const void *a_, const void *b_) | |
119 | { | |
120 | unsigned long a = *(unsigned long *) a_; | |
121 | unsigned long b = *(unsigned long *) b_; | |
122 | if (a < b) | |
123 | return -1; | |
124 | else if (a == b) | |
125 | return 0; | |
126 | else | |
127 | return 1; | |
128 | } | |
129 | ||
130 | /* | |
131 | * Ordered list of offsets of objects in the pack. | |
132 | */ | |
133 | static void prepare_pack_revindex(struct pack_revindex *rix) | |
134 | { | |
135 | struct packed_git *p = rix->p; | |
136 | int num_ent = num_packed_objects(p); | |
137 | int i; | |
138 | void *index = p->index_base + 256; | |
139 | ||
140 | rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1)); | |
141 | for (i = 0; i < num_ent; i++) { | |
142 | long hl = *((long *)(index + 24 * i)); | |
143 | rix->revindex[i] = ntohl(hl); | |
144 | } | |
145 | /* This knows the pack format -- the 20-byte trailer | |
146 | * follows immediately after the last object data. | |
147 | */ | |
148 | rix->revindex[num_ent] = p->pack_size - 20; | |
149 | qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset); | |
150 | } | |
151 | ||
152 | static unsigned long find_packed_object_size(struct packed_git *p, | |
153 | unsigned long ofs) | |
154 | { | |
155 | int num; | |
156 | int lo, hi; | |
157 | struct pack_revindex *rix; | |
158 | unsigned long *revindex; | |
159 | num = pack_revindex_ix(p); | |
160 | if (num < 0) | |
161 | die("internal error: pack revindex uninitialized"); | |
162 | rix = &pack_revindex[num]; | |
163 | if (!rix->revindex) | |
164 | prepare_pack_revindex(rix); | |
165 | revindex = rix->revindex; | |
166 | lo = 0; | |
167 | hi = num_packed_objects(p) + 1; | |
168 | do { | |
169 | int mi = (lo + hi) / 2; | |
170 | if (revindex[mi] == ofs) { | |
171 | return revindex[mi+1] - ofs; | |
172 | } | |
173 | else if (ofs < revindex[mi]) | |
174 | hi = mi; | |
175 | else | |
176 | lo = mi + 1; | |
177 | } while (lo < hi); | |
178 | die("internal error: pack revindex corrupt"); | |
179 | } | |
180 | ||
c323ac7d LT |
181 | static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) |
182 | { | |
183 | unsigned long othersize, delta_size; | |
184 | char type[10]; | |
185 | void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize); | |
186 | void *delta_buf; | |
187 | ||
188 | if (!otherbuf) | |
189 | die("unable to read %s", sha1_to_hex(entry->delta->sha1)); | |
8ee378a0 | 190 | delta_buf = diff_delta(otherbuf, othersize, |
dcde55bc | 191 | buf, size, &delta_size, 0); |
75c42d8c | 192 | if (!delta_buf || delta_size != entry->delta_size) |
c323ac7d LT |
193 | die("delta size changed"); |
194 | free(buf); | |
195 | free(otherbuf); | |
196 | return delta_buf; | |
197 | } | |
198 | ||
a733cb60 LT |
199 | /* |
200 | * The per-object header is a pretty dense thing, which is | |
201 | * - first byte: low four bits are "size", then three bits of "type", | |
202 | * and the high bit is "size continues". | |
203 | * - each byte afterwards: low seven bits are size continuation, | |
204 | * with the high bit being "size continues" | |
205 | */ | |
206 | static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr) | |
207 | { | |
01247d87 | 208 | int n = 1; |
a733cb60 LT |
209 | unsigned char c; |
210 | ||
211 | if (type < OBJ_COMMIT || type > OBJ_DELTA) | |
212 | die("bad type %d", type); | |
213 | ||
01247d87 LT |
214 | c = (type << 4) | (size & 15); |
215 | size >>= 4; | |
216 | while (size) { | |
a733cb60 | 217 | *hdr++ = c | 0x80; |
01247d87 LT |
218 | c = size & 0x7f; |
219 | size >>= 7; | |
220 | n++; | |
a733cb60 LT |
221 | } |
222 | *hdr = c; | |
223 | return n; | |
224 | } | |
225 | ||
c38138cd | 226 | static unsigned long write_object(struct sha1file *f, struct object_entry *entry) |
c323ac7d LT |
227 | { |
228 | unsigned long size; | |
229 | char type[10]; | |
3f9ac8d2 | 230 | void *buf; |
a733cb60 | 231 | unsigned char header[10]; |
c323ac7d | 232 | unsigned hdrlen, datalen; |
a733cb60 | 233 | enum object_type obj_type; |
ab7cd7bb | 234 | int to_reuse = 0; |
c323ac7d | 235 | |
a733cb60 | 236 | obj_type = entry->type; |
ab7cd7bb JH |
237 | if (! entry->in_pack) |
238 | to_reuse = 0; /* can't reuse what we don't have */ | |
239 | else if (obj_type == OBJ_DELTA) | |
240 | to_reuse = 1; /* check_object() decided it for us */ | |
241 | else if (obj_type != entry->in_pack_type) | |
242 | to_reuse = 0; /* pack has delta which is unusable */ | |
243 | else if (entry->delta) | |
244 | to_reuse = 0; /* we want to pack afresh */ | |
245 | else | |
246 | to_reuse = 1; /* we have it in-pack undeltified, | |
247 | * and we do not need to deltify it. | |
248 | */ | |
249 | ||
250 | if (! to_reuse) { | |
3f9ac8d2 JH |
251 | buf = read_sha1_file(entry->sha1, type, &size); |
252 | if (!buf) | |
253 | die("unable to read %s", sha1_to_hex(entry->sha1)); | |
254 | if (size != entry->size) | |
255 | die("object %s size inconsistency (%lu vs %lu)", | |
256 | sha1_to_hex(entry->sha1), size, entry->size); | |
257 | if (entry->delta) { | |
258 | buf = delta_against(buf, size, entry); | |
259 | size = entry->delta_size; | |
260 | obj_type = OBJ_DELTA; | |
261 | } | |
262 | /* | |
263 | * The object header is a byte of 'type' followed by zero or | |
264 | * more bytes of length. For deltas, the 20 bytes of delta | |
265 | * sha1 follows that. | |
266 | */ | |
267 | hdrlen = encode_header(obj_type, size, header); | |
268 | sha1write(f, header, hdrlen); | |
269 | ||
270 | if (entry->delta) { | |
271 | sha1write(f, entry->delta, 20); | |
272 | hdrlen += 20; | |
273 | } | |
274 | datalen = sha1write_compressed(f, buf, size); | |
275 | free(buf); | |
c323ac7d | 276 | } |
3f9ac8d2 JH |
277 | else { |
278 | struct packed_git *p = entry->in_pack; | |
279 | use_packed_git(p); | |
280 | ||
281 | datalen = find_packed_object_size(p, entry->in_pack_offset); | |
282 | buf = p->pack_base + entry->in_pack_offset; | |
283 | sha1write(f, buf, datalen); | |
284 | unuse_packed_git(p); | |
285 | hdrlen = 0; /* not really */ | |
ab7cd7bb JH |
286 | if (obj_type == OBJ_DELTA) |
287 | reused_delta++; | |
3f9ac8d2 | 288 | reused++; |
a733cb60 | 289 | } |
ab7cd7bb JH |
290 | if (obj_type == OBJ_DELTA) |
291 | written_delta++; | |
3f9ac8d2 | 292 | written++; |
c323ac7d LT |
293 | return hdrlen + datalen; |
294 | } | |
295 | ||
9d5ab962 JH |
296 | static unsigned long write_one(struct sha1file *f, |
297 | struct object_entry *e, | |
298 | unsigned long offset) | |
299 | { | |
300 | if (e->offset) | |
301 | /* offset starts from header size and cannot be zero | |
302 | * if it is written already. | |
303 | */ | |
304 | return offset; | |
305 | e->offset = offset; | |
306 | offset += write_object(f, e); | |
82f9d58a | 307 | /* if we are deltified, write out its base object. */ |
9d5ab962 JH |
308 | if (e->delta) |
309 | offset = write_one(f, e->delta, offset); | |
310 | return offset; | |
311 | } | |
312 | ||
c323ac7d LT |
313 | static void write_pack_file(void) |
314 | { | |
315 | int i; | |
d22b9290 | 316 | struct sha1file *f; |
a733cb60 | 317 | unsigned long offset; |
a733cb60 | 318 | struct pack_header hdr; |
c323ac7d | 319 | |
d22b9290 LT |
320 | if (!base_name) |
321 | f = sha1fd(1, "<stdout>"); | |
322 | else | |
5f3de58f | 323 | f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "pack"); |
a733cb60 | 324 | hdr.hdr_signature = htonl(PACK_SIGNATURE); |
01247d87 | 325 | hdr.hdr_version = htonl(PACK_VERSION); |
a733cb60 LT |
326 | hdr.hdr_entries = htonl(nr_objects); |
327 | sha1write(f, &hdr, sizeof(hdr)); | |
328 | offset = sizeof(hdr); | |
9d5ab962 JH |
329 | for (i = 0; i < nr_objects; i++) |
330 | offset = write_one(f, objects + i, offset); | |
331 | ||
e1808845 | 332 | sha1close(f, pack_file_sha1, 1); |
c323ac7d LT |
333 | } |
334 | ||
335 | static void write_index_file(void) | |
336 | { | |
337 | int i; | |
5f3de58f | 338 | struct sha1file *f = sha1create("%s-%s.%s", base_name, sha1_to_hex(object_list_sha1), "idx"); |
c323ac7d LT |
339 | struct object_entry **list = sorted_by_sha; |
340 | struct object_entry **last = list + nr_objects; | |
341 | unsigned int array[256]; | |
342 | ||
343 | /* | |
344 | * Write the first-level table (the list is sorted, | |
345 | * but we use a 256-entry lookup to be able to avoid | |
e1808845 | 346 | * having to do eight extra binary search iterations). |
c323ac7d LT |
347 | */ |
348 | for (i = 0; i < 256; i++) { | |
349 | struct object_entry **next = list; | |
350 | while (next < last) { | |
351 | struct object_entry *entry = *next; | |
352 | if (entry->sha1[0] != i) | |
353 | break; | |
354 | next++; | |
355 | } | |
356 | array[i] = htonl(next - sorted_by_sha); | |
357 | list = next; | |
358 | } | |
c38138cd | 359 | sha1write(f, array, 256 * sizeof(int)); |
c323ac7d LT |
360 | |
361 | /* | |
362 | * Write the actual SHA1 entries.. | |
363 | */ | |
364 | list = sorted_by_sha; | |
49397104 | 365 | for (i = 0; i < nr_objects; i++) { |
c323ac7d LT |
366 | struct object_entry *entry = *list++; |
367 | unsigned int offset = htonl(entry->offset); | |
c38138cd LT |
368 | sha1write(f, &offset, 4); |
369 | sha1write(f, entry->sha1, 20); | |
c323ac7d | 370 | } |
e1808845 LT |
371 | sha1write(f, pack_file_sha1, 20); |
372 | sha1close(f, NULL, 1); | |
c323ac7d LT |
373 | } |
374 | ||
5f3de58f | 375 | static int add_object_entry(unsigned char *sha1, unsigned int hash) |
c323ac7d LT |
376 | { |
377 | unsigned int idx = nr_objects; | |
378 | struct object_entry *entry; | |
3f9ac8d2 | 379 | struct packed_git *p; |
ab7cd7bb JH |
380 | unsigned int found_offset = 0; |
381 | struct packed_git *found_pack = NULL; | |
3f9ac8d2 | 382 | |
3f9ac8d2 JH |
383 | for (p = packed_git; p; p = p->next) { |
384 | struct pack_entry e; | |
385 | if (find_pack_entry_one(sha1, &e, p)) { | |
386 | if (incremental) | |
387 | return 0; | |
388 | if (local && !p->pack_local) | |
389 | return 0; | |
390 | if (!found_pack) { | |
391 | found_offset = e.offset; | |
392 | found_pack = e.p; | |
64560374 LT |
393 | } |
394 | } | |
395 | } | |
eb019375 | 396 | |
c323ac7d LT |
397 | if (idx >= nr_alloc) { |
398 | unsigned int needed = (idx + 1024) * 3 / 2; | |
399 | objects = xrealloc(objects, needed * sizeof(*entry)); | |
400 | nr_alloc = needed; | |
401 | } | |
402 | entry = objects + idx; | |
403 | memset(entry, 0, sizeof(*entry)); | |
404 | memcpy(entry->sha1, sha1, 20); | |
27225f2e | 405 | entry->hash = hash; |
3f9ac8d2 JH |
406 | if (found_pack) { |
407 | entry->in_pack = found_pack; | |
408 | entry->in_pack_offset = found_offset; | |
409 | } | |
c323ac7d | 410 | nr_objects = idx+1; |
5f3de58f | 411 | return 1; |
c323ac7d LT |
412 | } |
413 | ||
3f9ac8d2 JH |
414 | static int locate_object_entry_hash(unsigned char *sha1) |
415 | { | |
416 | int i; | |
417 | unsigned int ui; | |
418 | memcpy(&ui, sha1, sizeof(unsigned int)); | |
419 | i = ui % object_ix_hashsz; | |
420 | while (0 < object_ix[i]) { | |
421 | if (!memcmp(sha1, objects[object_ix[i]-1].sha1, 20)) | |
422 | return i; | |
423 | if (++i == object_ix_hashsz) | |
424 | i = 0; | |
425 | } | |
426 | return -1 - i; | |
427 | } | |
428 | ||
429 | static struct object_entry *locate_object_entry(unsigned char *sha1) | |
430 | { | |
431 | int i = locate_object_entry_hash(sha1); | |
432 | if (0 <= i) | |
433 | return &objects[object_ix[i]-1]; | |
434 | return NULL; | |
435 | } | |
436 | ||
c323ac7d LT |
437 | static void check_object(struct object_entry *entry) |
438 | { | |
36e4d74a JH |
439 | char type[20]; |
440 | ||
3f9ac8d2 | 441 | if (entry->in_pack) { |
ab7cd7bb JH |
442 | unsigned char base[20]; |
443 | unsigned long size; | |
444 | struct object_entry *base_entry; | |
445 | ||
446 | /* We want in_pack_type even if we do not reuse delta. | |
447 | * There is no point not reusing non-delta representations. | |
448 | */ | |
449 | check_reuse_pack_delta(entry->in_pack, | |
450 | entry->in_pack_offset, | |
451 | base, &size, | |
452 | &entry->in_pack_type); | |
453 | ||
3f9ac8d2 JH |
454 | /* Check if it is delta, and the base is also an object |
455 | * we are going to pack. If so we will reuse the existing | |
456 | * delta. | |
457 | */ | |
ab7cd7bb JH |
458 | if (!no_reuse_delta && |
459 | entry->in_pack_type == OBJ_DELTA && | |
3f9ac8d2 | 460 | (base_entry = locate_object_entry(base))) { |
ab7cd7bb JH |
461 | |
462 | /* Depth value does not matter - find_deltas() | |
463 | * will never consider reused delta as the | |
464 | * base object to deltify other objects | |
465 | * against, in order to avoid circular deltas. | |
3f9ac8d2 | 466 | */ |
ab7cd7bb JH |
467 | |
468 | /* uncompressed size of the delta data */ | |
3f9ac8d2 JH |
469 | entry->size = entry->delta_size = size; |
470 | entry->delta = base_entry; | |
471 | entry->type = OBJ_DELTA; | |
ab7cd7bb JH |
472 | |
473 | base_entry->edge = 1; | |
474 | ||
3f9ac8d2 JH |
475 | return; |
476 | } | |
477 | /* Otherwise we would do the usual */ | |
36e4d74a | 478 | } |
3f9ac8d2 JH |
479 | |
480 | if (sha1_object_info(entry->sha1, type, &entry->size)) | |
36e4d74a JH |
481 | die("unable to get type of object %s", |
482 | sha1_to_hex(entry->sha1)); | |
3f9ac8d2 JH |
483 | |
484 | if (!strcmp(type, "commit")) { | |
485 | entry->type = OBJ_COMMIT; | |
486 | } else if (!strcmp(type, "tree")) { | |
487 | entry->type = OBJ_TREE; | |
488 | } else if (!strcmp(type, "blob")) { | |
489 | entry->type = OBJ_BLOB; | |
490 | } else if (!strcmp(type, "tag")) { | |
491 | entry->type = OBJ_TAG; | |
492 | } else | |
493 | die("unable to pack object %s of type %s", | |
494 | sha1_to_hex(entry->sha1), type); | |
495 | } | |
496 | ||
497 | static void hash_objects(void) | |
498 | { | |
499 | int i; | |
500 | struct object_entry *oe; | |
501 | ||
502 | object_ix_hashsz = nr_objects * 2; | |
503 | object_ix = xcalloc(sizeof(int), object_ix_hashsz); | |
504 | for (i = 0, oe = objects; i < nr_objects; i++, oe++) { | |
505 | int ix = locate_object_entry_hash(oe->sha1); | |
506 | if (0 <= ix) { | |
507 | error("the same object '%s' added twice", | |
508 | sha1_to_hex(oe->sha1)); | |
509 | continue; | |
510 | } | |
511 | ix = -1 - ix; | |
512 | object_ix[ix] = i + 1; | |
513 | } | |
c323ac7d LT |
514 | } |
515 | ||
516 | static void get_object_details(void) | |
517 | { | |
518 | int i; | |
519 | struct object_entry *entry = objects; | |
520 | ||
3f9ac8d2 JH |
521 | hash_objects(); |
522 | prepare_pack_ix(); | |
c323ac7d LT |
523 | for (i = 0; i < nr_objects; i++) |
524 | check_object(entry++); | |
525 | } | |
526 | ||
527 | typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *); | |
528 | ||
529 | static entry_sort_t current_sort; | |
530 | ||
531 | static int sort_comparator(const void *_a, const void *_b) | |
532 | { | |
533 | struct object_entry *a = *(struct object_entry **)_a; | |
534 | struct object_entry *b = *(struct object_entry **)_b; | |
535 | return current_sort(a,b); | |
536 | } | |
537 | ||
538 | static struct object_entry **create_sorted_list(entry_sort_t sort) | |
539 | { | |
540 | struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *)); | |
541 | int i; | |
542 | ||
543 | for (i = 0; i < nr_objects; i++) | |
544 | list[i] = objects + i; | |
545 | current_sort = sort; | |
546 | qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator); | |
547 | return list; | |
548 | } | |
549 | ||
550 | static int sha1_sort(const struct object_entry *a, const struct object_entry *b) | |
551 | { | |
552 | return memcmp(a->sha1, b->sha1, 20); | |
553 | } | |
554 | ||
555 | static int type_size_sort(const struct object_entry *a, const struct object_entry *b) | |
556 | { | |
557 | if (a->type < b->type) | |
558 | return -1; | |
559 | if (a->type > b->type) | |
560 | return 1; | |
27225f2e LT |
561 | if (a->hash < b->hash) |
562 | return -1; | |
563 | if (a->hash > b->hash) | |
564 | return 1; | |
c323ac7d LT |
565 | if (a->size < b->size) |
566 | return -1; | |
567 | if (a->size > b->size) | |
568 | return 1; | |
569 | return a < b ? -1 : (a > b); | |
570 | } | |
571 | ||
572 | struct unpacked { | |
573 | struct object_entry *entry; | |
574 | void *data; | |
575 | }; | |
576 | ||
577 | /* | |
521a4f4c LT |
578 | * We search for deltas _backwards_ in a list sorted by type and |
579 | * by size, so that we see progressively smaller and smaller files. | |
580 | * That's because we prefer deltas to be from the bigger file | |
581 | * to the smaller - deletes are potentially cheaper, but perhaps | |
582 | * more importantly, the bigger file is likely the more recent | |
583 | * one. | |
c323ac7d | 584 | */ |
d116a45a | 585 | static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth) |
c323ac7d LT |
586 | { |
587 | struct object_entry *cur_entry = cur->entry; | |
588 | struct object_entry *old_entry = old->entry; | |
521a4f4c | 589 | unsigned long size, oldsize, delta_size, sizediff; |
75c42d8c | 590 | long max_size; |
c323ac7d LT |
591 | void *delta_buf; |
592 | ||
593 | /* Don't bother doing diffs between different types */ | |
594 | if (cur_entry->type != old_entry->type) | |
595 | return -1; | |
596 | ||
ab7cd7bb JH |
597 | /* If the current object is at edge, take the depth the objects |
598 | * that depend on the current object into account -- otherwise | |
599 | * they would become too deep. | |
600 | */ | |
601 | if (cur_entry->edge) | |
602 | max_depth /= 4; | |
603 | ||
c323ac7d | 604 | size = cur_entry->size; |
75c42d8c LT |
605 | if (size < 50) |
606 | return -1; | |
c323ac7d | 607 | oldsize = old_entry->size; |
521a4f4c LT |
608 | sizediff = oldsize > size ? oldsize - size : size - oldsize; |
609 | if (sizediff > size / 8) | |
c323ac7d | 610 | return -1; |
d116a45a LT |
611 | if (old_entry->depth >= max_depth) |
612 | return 0; | |
c323ac7d LT |
613 | |
614 | /* | |
615 | * NOTE! | |
616 | * | |
617 | * We always delta from the bigger to the smaller, since that's | |
618 | * more space-efficient (deletes don't have to say _what_ they | |
619 | * delete). | |
620 | */ | |
75c42d8c LT |
621 | max_size = size / 2 - 20; |
622 | if (cur_entry->delta) | |
623 | max_size = cur_entry->delta_size-1; | |
27225f2e LT |
624 | if (sizediff >= max_size) |
625 | return -1; | |
8ee378a0 JH |
626 | delta_buf = diff_delta(old->data, oldsize, |
627 | cur->data, size, &delta_size, max_size); | |
c323ac7d | 628 | if (!delta_buf) |
75c42d8c LT |
629 | return 0; |
630 | cur_entry->delta = old_entry; | |
631 | cur_entry->delta_size = delta_size; | |
d116a45a | 632 | cur_entry->depth = old_entry->depth + 1; |
c323ac7d | 633 | free(delta_buf); |
eb41ab11 | 634 | return 0; |
c323ac7d LT |
635 | } |
636 | ||
d116a45a | 637 | static void find_deltas(struct object_entry **list, int window, int depth) |
c323ac7d | 638 | { |
521a4f4c | 639 | int i, idx; |
c323ac7d LT |
640 | unsigned int array_size = window * sizeof(struct unpacked); |
641 | struct unpacked *array = xmalloc(array_size); | |
21fcd1bd | 642 | int eye_candy; |
c323ac7d LT |
643 | |
644 | memset(array, 0, array_size); | |
521a4f4c LT |
645 | i = nr_objects; |
646 | idx = 0; | |
21fcd1bd JH |
647 | eye_candy = i - (nr_objects / 20); |
648 | ||
521a4f4c | 649 | while (--i >= 0) { |
c323ac7d LT |
650 | struct object_entry *entry = list[i]; |
651 | struct unpacked *n = array + idx; | |
652 | unsigned long size; | |
653 | char type[10]; | |
654 | int j; | |
655 | ||
21fcd1bd JH |
656 | if (progress && i <= eye_candy) { |
657 | eye_candy -= nr_objects / 20; | |
658 | fputc('.', stderr); | |
659 | } | |
3f9ac8d2 JH |
660 | |
661 | if (entry->delta) | |
662 | /* This happens if we decided to reuse existing | |
ab7cd7bb | 663 | * delta from a pack. "!no_reuse_delta &&" is implied. |
3f9ac8d2 JH |
664 | */ |
665 | continue; | |
666 | ||
c323ac7d LT |
667 | free(n->data); |
668 | n->entry = entry; | |
669 | n->data = read_sha1_file(entry->sha1, type, &size); | |
670 | if (size != entry->size) | |
671 | die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); | |
ab7cd7bb | 672 | |
78817c15 LT |
673 | j = window; |
674 | while (--j > 0) { | |
675 | unsigned int other_idx = idx + j; | |
c323ac7d | 676 | struct unpacked *m; |
78817c15 LT |
677 | if (other_idx >= window) |
678 | other_idx -= window; | |
c323ac7d LT |
679 | m = array + other_idx; |
680 | if (!m->entry) | |
681 | break; | |
d116a45a | 682 | if (try_delta(n, m, depth) < 0) |
c323ac7d LT |
683 | break; |
684 | } | |
521a4f4c LT |
685 | idx++; |
686 | if (idx >= window) | |
687 | idx = 0; | |
c323ac7d | 688 | } |
adee7bdf SV |
689 | |
690 | for (i = 0; i < window; ++i) | |
691 | free(array[i].data); | |
692 | free(array); | |
c323ac7d LT |
693 | } |
694 | ||
f3123c4a JH |
695 | static void prepare_pack(int window, int depth) |
696 | { | |
21fcd1bd JH |
697 | if (progress) |
698 | fprintf(stderr, "Packing %d objects", nr_objects); | |
3f9ac8d2 JH |
699 | get_object_details(); |
700 | if (progress) | |
ab7cd7bb | 701 | fputc('.', stderr); |
3f9ac8d2 | 702 | |
f3123c4a JH |
703 | sorted_by_type = create_sorted_list(type_size_sort); |
704 | if (window && depth) | |
705 | find_deltas(sorted_by_type, window+1, depth); | |
21fcd1bd JH |
706 | if (progress) |
707 | fputc('\n', stderr); | |
f3123c4a JH |
708 | write_pack_file(); |
709 | } | |
710 | ||
711 | static int reuse_cached_pack(unsigned char *sha1, int pack_to_stdout) | |
712 | { | |
713 | static const char cache[] = "pack-cache/pack-%s.%s"; | |
714 | char *cached_pack, *cached_idx; | |
715 | int ifd, ofd, ifd_ix = -1; | |
716 | ||
717 | cached_pack = git_path(cache, sha1_to_hex(sha1), "pack"); | |
718 | ifd = open(cached_pack, O_RDONLY); | |
719 | if (ifd < 0) | |
720 | return 0; | |
721 | ||
722 | if (!pack_to_stdout) { | |
723 | cached_idx = git_path(cache, sha1_to_hex(sha1), "idx"); | |
724 | ifd_ix = open(cached_idx, O_RDONLY); | |
725 | if (ifd_ix < 0) { | |
726 | close(ifd); | |
727 | return 0; | |
728 | } | |
729 | } | |
730 | ||
ab7cd7bb JH |
731 | if (progress) |
732 | fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects, | |
733 | sha1_to_hex(sha1)); | |
f3123c4a JH |
734 | |
735 | if (pack_to_stdout) { | |
736 | if (copy_fd(ifd, 1)) | |
737 | exit(1); | |
738 | close(ifd); | |
739 | } | |
740 | else { | |
741 | char name[PATH_MAX]; | |
742 | snprintf(name, sizeof(name), | |
743 | "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack"); | |
744 | ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666); | |
745 | if (ofd < 0) | |
746 | die("unable to open %s (%s)", name, strerror(errno)); | |
747 | if (copy_fd(ifd, ofd)) | |
748 | exit(1); | |
749 | close(ifd); | |
750 | ||
751 | snprintf(name, sizeof(name), | |
752 | "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx"); | |
753 | ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666); | |
754 | if (ofd < 0) | |
755 | die("unable to open %s (%s)", name, strerror(errno)); | |
756 | if (copy_fd(ifd_ix, ofd)) | |
757 | exit(1); | |
758 | close(ifd_ix); | |
759 | puts(sha1_to_hex(sha1)); | |
760 | } | |
761 | ||
762 | return 1; | |
763 | } | |
764 | ||
c323ac7d LT |
765 | int main(int argc, char **argv) |
766 | { | |
5f3de58f | 767 | SHA_CTX ctx; |
27225f2e | 768 | char line[PATH_MAX + 20]; |
d22b9290 | 769 | int window = 10, depth = 10, pack_to_stdout = 0; |
84c8d8ae | 770 | struct object_entry **list; |
c323ac7d | 771 | int i; |
21fcd1bd JH |
772 | struct timeval prev_tv; |
773 | int eye_candy = 0; | |
774 | int eye_candy_incr = 500; | |
775 | ||
c323ac7d | 776 | |
53228a5f JH |
777 | setup_git_directory(); |
778 | ||
c323ac7d LT |
779 | for (i = 1; i < argc; i++) { |
780 | const char *arg = argv[i]; | |
781 | ||
782 | if (*arg == '-') { | |
1c4a2912 LT |
783 | if (!strcmp("--non-empty", arg)) { |
784 | non_empty = 1; | |
785 | continue; | |
786 | } | |
64560374 LT |
787 | if (!strcmp("--local", arg)) { |
788 | local = 1; | |
789 | continue; | |
790 | } | |
eb019375 LT |
791 | if (!strcmp("--incremental", arg)) { |
792 | incremental = 1; | |
793 | continue; | |
794 | } | |
c323ac7d LT |
795 | if (!strncmp("--window=", arg, 9)) { |
796 | char *end; | |
f846bbff | 797 | window = strtoul(arg+9, &end, 0); |
c323ac7d LT |
798 | if (!arg[9] || *end) |
799 | usage(pack_usage); | |
800 | continue; | |
801 | } | |
d116a45a LT |
802 | if (!strncmp("--depth=", arg, 8)) { |
803 | char *end; | |
804 | depth = strtoul(arg+8, &end, 0); | |
805 | if (!arg[8] || *end) | |
806 | usage(pack_usage); | |
807 | continue; | |
808 | } | |
024701f1 JH |
809 | if (!strcmp("-q", arg)) { |
810 | progress = 0; | |
811 | continue; | |
812 | } | |
ab7cd7bb JH |
813 | if (!strcmp("--no-reuse-delta", arg)) { |
814 | no_reuse_delta = 1; | |
815 | continue; | |
816 | } | |
d22b9290 LT |
817 | if (!strcmp("--stdout", arg)) { |
818 | pack_to_stdout = 1; | |
819 | continue; | |
820 | } | |
c323ac7d LT |
821 | usage(pack_usage); |
822 | } | |
823 | if (base_name) | |
824 | usage(pack_usage); | |
825 | base_name = arg; | |
826 | } | |
827 | ||
d22b9290 | 828 | if (pack_to_stdout != !base_name) |
c323ac7d LT |
829 | usage(pack_usage); |
830 | ||
64560374 | 831 | prepare_packed_git(); |
21fcd1bd JH |
832 | if (progress) { |
833 | fprintf(stderr, "Generating pack...\n"); | |
834 | gettimeofday(&prev_tv, NULL); | |
835 | } | |
c323ac7d | 836 | while (fgets(line, sizeof(line), stdin) != NULL) { |
27225f2e LT |
837 | unsigned int hash; |
838 | char *p; | |
c323ac7d | 839 | unsigned char sha1[20]; |
27225f2e | 840 | |
21fcd1bd JH |
841 | if (progress && (eye_candy <= nr_objects)) { |
842 | fprintf(stderr, "Counting objects...%d\r", nr_objects); | |
843 | if (eye_candy && (50 <= eye_candy_incr)) { | |
844 | struct timeval tv; | |
845 | int time_diff; | |
846 | gettimeofday(&tv, NULL); | |
847 | time_diff = (tv.tv_sec - prev_tv.tv_sec); | |
848 | time_diff <<= 10; | |
849 | time_diff += (tv.tv_usec - prev_tv.tv_usec); | |
850 | if ((1 << 9) < time_diff) | |
851 | eye_candy_incr += 50; | |
852 | else if (50 < eye_candy_incr) | |
853 | eye_candy_incr -= 50; | |
854 | } | |
855 | eye_candy += eye_candy_incr; | |
856 | } | |
c323ac7d | 857 | if (get_sha1_hex(line, sha1)) |
ef07618f | 858 | die("expected sha1, got garbage:\n %s", line); |
27225f2e LT |
859 | hash = 0; |
860 | p = line+40; | |
861 | while (*p) { | |
862 | unsigned char c = *p++; | |
863 | if (isspace(c)) | |
864 | continue; | |
865 | hash = hash * 11 + c; | |
866 | } | |
84c8d8ae | 867 | add_object_entry(sha1, hash); |
c323ac7d | 868 | } |
21fcd1bd JH |
869 | if (progress) |
870 | fprintf(stderr, "Done counting %d objects.\n", nr_objects); | |
1c4a2912 LT |
871 | if (non_empty && !nr_objects) |
872 | return 0; | |
c323ac7d LT |
873 | |
874 | sorted_by_sha = create_sorted_list(sha1_sort); | |
84c8d8ae JH |
875 | SHA1_Init(&ctx); |
876 | list = sorted_by_sha; | |
877 | for (i = 0; i < nr_objects; i++) { | |
878 | struct object_entry *entry = *list++; | |
879 | SHA1_Update(&ctx, entry->sha1, 20); | |
880 | } | |
881 | SHA1_Final(object_list_sha1, &ctx); | |
882 | ||
f3123c4a JH |
883 | if (reuse_cached_pack(object_list_sha1, pack_to_stdout)) |
884 | ; | |
885 | else { | |
886 | prepare_pack(window, depth); | |
887 | if (!pack_to_stdout) { | |
888 | write_index_file(); | |
889 | puts(sha1_to_hex(object_list_sha1)); | |
890 | } | |
5f3de58f | 891 | } |
ab7cd7bb JH |
892 | if (progress) |
893 | fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n", | |
894 | nr_objects, written, written_delta, reused, reused_delta); | |
c323ac7d LT |
895 | return 0; |
896 | } |