]>
Commit | Line | Data |
---|---|---|
5d4a6003 | 1 | #include "builtin.h" |
c323ac7d LT |
2 | #include "cache.h" |
3 | #include "object.h" | |
8e440259 PE |
4 | #include "blob.h" |
5 | #include "commit.h" | |
6 | #include "tag.h" | |
7 | #include "tree.h" | |
c323ac7d | 8 | #include "delta.h" |
a733cb60 | 9 | #include "pack.h" |
c38138cd | 10 | #include "csum-file.h" |
1b0c7174 | 11 | #include "tree-walk.h" |
b5d97e6b JH |
12 | #include "diff.h" |
13 | #include "revision.h" | |
14 | #include "list-objects.h" | |
21fcd1bd | 15 | #include <sys/time.h> |
b2504a0d | 16 | #include <signal.h> |
c323ac7d | 17 | |
4321134c | 18 | static const char pack_usage[] = "git-pack-objects [-q] [--no-reuse-delta] [--non-empty] [--local] [--incremental] [--window=N] [--depth=N] [--revs [--unpacked | --all]*] [--stdout | base-name] <ref-list | <object-list]"; |
c323ac7d | 19 | |
c323ac7d LT |
20 | struct object_entry { |
21 | unsigned char sha1[20]; | |
3f9ac8d2 | 22 | unsigned long size; /* uncompressed size */ |
15b4d577 JH |
23 | unsigned long offset; /* offset into the final pack file; |
24 | * nonzero if already written. | |
25 | */ | |
3f9ac8d2 | 26 | unsigned int depth; /* delta depth */ |
15b4d577 | 27 | unsigned int delta_limit; /* base adjustment for in-pack delta */ |
3f9ac8d2 | 28 | unsigned int hash; /* name hint hash */ |
a733cb60 | 29 | enum object_type type; |
ab7cd7bb | 30 | enum object_type in_pack_type; /* could be delta */ |
3f9ac8d2 JH |
31 | unsigned long delta_size; /* delta data size (uncompressed) */ |
32 | struct object_entry *delta; /* delta base object */ | |
33 | struct packed_git *in_pack; /* already in pack */ | |
3f9ac8d2 | 34 | unsigned int in_pack_offset; |
82e5a82f | 35 | struct object_entry *delta_child; /* deltified objects who bases me */ |
15b4d577 JH |
36 | struct object_entry *delta_sibling; /* other deltified objects who |
37 | * uses the same base as me | |
38 | */ | |
7a979d99 JH |
39 | int preferred_base; /* we do not pack this, but is encouraged to |
40 | * be used as the base objectto delta huge | |
41 | * objects against. | |
42 | */ | |
c323ac7d LT |
43 | }; |
44 | ||
3f9ac8d2 | 45 | /* |
82e5a82f | 46 | * Objects we are going to pack are collected in objects array (dynamically |
3f9ac8d2 JH |
47 | * expanded). nr_objects & nr_alloc controls this array. They are stored |
48 | * in the order we see -- typically rev-list --objects order that gives us | |
49 | * nice "minimum seek" order. | |
50 | * | |
51 | * sorted-by-sha ans sorted-by-type are arrays of pointers that point at | |
52 | * elements in the objects array. The former is used to build the pack | |
53 | * index (lists object names in the ascending order to help offset lookup), | |
54 | * and the latter is used to group similar things together by try_delta() | |
55 | * heuristics. | |
56 | */ | |
57 | ||
5f3de58f | 58 | static unsigned char object_list_sha1[20]; |
96f1e58f DR |
59 | static int non_empty; |
60 | static int no_reuse_delta; | |
61 | static int local; | |
62 | static int incremental; | |
c323ac7d | 63 | static struct object_entry **sorted_by_sha, **sorted_by_type; |
96f1e58f DR |
64 | static struct object_entry *objects; |
65 | static int nr_objects, nr_alloc, nr_result; | |
c323ac7d | 66 | static const char *base_name; |
e1808845 | 67 | static unsigned char pack_file_sha1[20]; |
024701f1 | 68 | static int progress = 1; |
96f1e58f | 69 | static volatile sig_atomic_t progress_update; |
4812a93a | 70 | static int window = 10; |
df6d6101 | 71 | static int pack_to_stdout; |
8d1d8f83 | 72 | static int num_preferred_base; |
c323ac7d | 73 | |
3f9ac8d2 JH |
74 | /* |
75 | * The object names in objects array are hashed with this hashtable, | |
76 | * to help looking up the entry by object name. Binary search from | |
77 | * sorted_by_sha is also possible but this was easier to code and faster. | |
78 | * This hashtable is built after all the objects are seen. | |
79 | */ | |
96f1e58f DR |
80 | static int *object_ix; |
81 | static int object_ix_hashsz; | |
3f9ac8d2 JH |
82 | |
83 | /* | |
84 | * Pack index for existing packs give us easy access to the offsets into | |
85 | * corresponding pack file where each object's data starts, but the entries | |
86 | * do not store the size of the compressed representation (uncompressed | |
87 | * size is easily available by examining the pack entry header). We build | |
88 | * a hashtable of existing packs (pack_revindex), and keep reverse index | |
89 | * here -- pack index file is sorted by object name mapping to offset; this | |
90 | * pack_revindex[].revindex array is an ordered list of offsets, so if you | |
91 | * know the offset of an object, next offset is where its packed | |
92 | * representation ends. | |
93 | */ | |
94 | struct pack_revindex { | |
95 | struct packed_git *p; | |
96 | unsigned long *revindex; | |
97 | } *pack_revindex = NULL; | |
96f1e58f | 98 | static int pack_revindex_hashsz; |
3f9ac8d2 JH |
99 | |
100 | /* | |
101 | * stats | |
102 | */ | |
96f1e58f DR |
103 | static int written; |
104 | static int written_delta; | |
105 | static int reused; | |
106 | static int reused_delta; | |
3f9ac8d2 JH |
107 | |
108 | static int pack_revindex_ix(struct packed_git *p) | |
109 | { | |
2b74cffa | 110 | unsigned long ui = (unsigned long)p; |
3f9ac8d2 JH |
111 | int i; |
112 | ||
113 | ui = ui ^ (ui >> 16); /* defeat structure alignment */ | |
114 | i = (int)(ui % pack_revindex_hashsz); | |
115 | while (pack_revindex[i].p) { | |
116 | if (pack_revindex[i].p == p) | |
117 | return i; | |
118 | if (++i == pack_revindex_hashsz) | |
119 | i = 0; | |
120 | } | |
121 | return -1 - i; | |
122 | } | |
123 | ||
124 | static void prepare_pack_ix(void) | |
125 | { | |
126 | int num; | |
127 | struct packed_git *p; | |
128 | for (num = 0, p = packed_git; p; p = p->next) | |
129 | num++; | |
130 | if (!num) | |
131 | return; | |
132 | pack_revindex_hashsz = num * 11; | |
133 | pack_revindex = xcalloc(sizeof(*pack_revindex), pack_revindex_hashsz); | |
134 | for (p = packed_git; p; p = p->next) { | |
135 | num = pack_revindex_ix(p); | |
136 | num = - 1 - num; | |
137 | pack_revindex[num].p = p; | |
138 | } | |
139 | /* revindex elements are lazily initialized */ | |
140 | } | |
141 | ||
142 | static int cmp_offset(const void *a_, const void *b_) | |
143 | { | |
144 | unsigned long a = *(unsigned long *) a_; | |
145 | unsigned long b = *(unsigned long *) b_; | |
146 | if (a < b) | |
147 | return -1; | |
148 | else if (a == b) | |
149 | return 0; | |
150 | else | |
151 | return 1; | |
152 | } | |
153 | ||
154 | /* | |
155 | * Ordered list of offsets of objects in the pack. | |
156 | */ | |
157 | static void prepare_pack_revindex(struct pack_revindex *rix) | |
158 | { | |
159 | struct packed_git *p = rix->p; | |
160 | int num_ent = num_packed_objects(p); | |
161 | int i; | |
162 | void *index = p->index_base + 256; | |
163 | ||
164 | rix->revindex = xmalloc(sizeof(unsigned long) * (num_ent + 1)); | |
165 | for (i = 0; i < num_ent; i++) { | |
1d7f171c | 166 | unsigned int hl = *((unsigned int *)((char *) index + 24*i)); |
3f9ac8d2 JH |
167 | rix->revindex[i] = ntohl(hl); |
168 | } | |
169 | /* This knows the pack format -- the 20-byte trailer | |
170 | * follows immediately after the last object data. | |
171 | */ | |
172 | rix->revindex[num_ent] = p->pack_size - 20; | |
173 | qsort(rix->revindex, num_ent, sizeof(unsigned long), cmp_offset); | |
174 | } | |
175 | ||
176 | static unsigned long find_packed_object_size(struct packed_git *p, | |
177 | unsigned long ofs) | |
178 | { | |
179 | int num; | |
180 | int lo, hi; | |
181 | struct pack_revindex *rix; | |
182 | unsigned long *revindex; | |
183 | num = pack_revindex_ix(p); | |
184 | if (num < 0) | |
185 | die("internal error: pack revindex uninitialized"); | |
186 | rix = &pack_revindex[num]; | |
187 | if (!rix->revindex) | |
188 | prepare_pack_revindex(rix); | |
189 | revindex = rix->revindex; | |
190 | lo = 0; | |
191 | hi = num_packed_objects(p) + 1; | |
192 | do { | |
193 | int mi = (lo + hi) / 2; | |
194 | if (revindex[mi] == ofs) { | |
195 | return revindex[mi+1] - ofs; | |
196 | } | |
197 | else if (ofs < revindex[mi]) | |
198 | hi = mi; | |
199 | else | |
200 | lo = mi + 1; | |
201 | } while (lo < hi); | |
202 | die("internal error: pack revindex corrupt"); | |
203 | } | |
204 | ||
c323ac7d LT |
205 | static void *delta_against(void *buf, unsigned long size, struct object_entry *entry) |
206 | { | |
207 | unsigned long othersize, delta_size; | |
208 | char type[10]; | |
209 | void *otherbuf = read_sha1_file(entry->delta->sha1, type, &othersize); | |
210 | void *delta_buf; | |
211 | ||
212 | if (!otherbuf) | |
213 | die("unable to read %s", sha1_to_hex(entry->delta->sha1)); | |
8ee378a0 | 214 | delta_buf = diff_delta(otherbuf, othersize, |
dcde55bc | 215 | buf, size, &delta_size, 0); |
75c42d8c | 216 | if (!delta_buf || delta_size != entry->delta_size) |
c323ac7d LT |
217 | die("delta size changed"); |
218 | free(buf); | |
219 | free(otherbuf); | |
220 | return delta_buf; | |
221 | } | |
222 | ||
a733cb60 LT |
223 | /* |
224 | * The per-object header is a pretty dense thing, which is | |
225 | * - first byte: low four bits are "size", then three bits of "type", | |
226 | * and the high bit is "size continues". | |
227 | * - each byte afterwards: low seven bits are size continuation, | |
228 | * with the high bit being "size continues" | |
229 | */ | |
230 | static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr) | |
231 | { | |
01247d87 | 232 | int n = 1; |
a733cb60 LT |
233 | unsigned char c; |
234 | ||
235 | if (type < OBJ_COMMIT || type > OBJ_DELTA) | |
236 | die("bad type %d", type); | |
237 | ||
01247d87 LT |
238 | c = (type << 4) | (size & 15); |
239 | size >>= 4; | |
240 | while (size) { | |
a733cb60 | 241 | *hdr++ = c | 0x80; |
01247d87 LT |
242 | c = size & 0x7f; |
243 | size >>= 7; | |
244 | n++; | |
a733cb60 LT |
245 | } |
246 | *hdr = c; | |
247 | return n; | |
248 | } | |
249 | ||
72518e9c | 250 | static int check_inflate(unsigned char *data, unsigned long len, unsigned long expect) |
df6d6101 | 251 | { |
72518e9c JH |
252 | z_stream stream; |
253 | unsigned char fakebuf[4096]; | |
254 | int st; | |
255 | ||
256 | memset(&stream, 0, sizeof(stream)); | |
257 | stream.next_in = data; | |
258 | stream.avail_in = len; | |
259 | stream.next_out = fakebuf; | |
260 | stream.avail_out = sizeof(fakebuf); | |
261 | inflateInit(&stream); | |
262 | ||
263 | while (1) { | |
264 | st = inflate(&stream, Z_FINISH); | |
265 | if (st == Z_STREAM_END || st == Z_OK) { | |
266 | st = (stream.total_out == expect && | |
267 | stream.total_in == len) ? 0 : -1; | |
268 | break; | |
269 | } | |
270 | if (st != Z_BUF_ERROR) { | |
271 | st = -1; | |
272 | break; | |
273 | } | |
274 | stream.next_out = fakebuf; | |
275 | stream.avail_out = sizeof(fakebuf); | |
276 | } | |
277 | inflateEnd(&stream); | |
278 | return st; | |
df6d6101 JH |
279 | } |
280 | ||
281 | /* | |
282 | * we are going to reuse the existing pack entry data. make | |
283 | * sure it is not corrupt. | |
284 | */ | |
72518e9c | 285 | static int revalidate_pack_entry(struct object_entry *entry, unsigned char *data, unsigned long len) |
df6d6101 | 286 | { |
72518e9c JH |
287 | enum object_type type; |
288 | unsigned long size, used; | |
df6d6101 JH |
289 | |
290 | if (pack_to_stdout) | |
291 | return 0; | |
292 | ||
72518e9c JH |
293 | /* the caller has already called use_packed_git() for us, |
294 | * so it is safe to access the pack data from mmapped location. | |
295 | * make sure the entry inflates correctly. | |
296 | */ | |
297 | used = unpack_object_header_gently(data, len, &type, &size); | |
298 | if (!used) | |
299 | return -1; | |
300 | if (type == OBJ_DELTA) | |
301 | used += 20; /* skip base object name */ | |
302 | data += used; | |
303 | len -= used; | |
304 | return check_inflate(data, len, entry->size); | |
df6d6101 JH |
305 | } |
306 | ||
307 | static int revalidate_loose_object(struct object_entry *entry, | |
308 | unsigned char *map, | |
309 | unsigned long mapsize) | |
310 | { | |
311 | /* we already know this is a loose object with new type header. */ | |
72518e9c JH |
312 | enum object_type type; |
313 | unsigned long size, used; | |
df6d6101 JH |
314 | |
315 | if (pack_to_stdout) | |
316 | return 0; | |
317 | ||
72518e9c JH |
318 | used = unpack_object_header_gently(map, mapsize, &type, &size); |
319 | if (!used) | |
320 | return -1; | |
321 | map += used; | |
322 | mapsize -= used; | |
323 | return check_inflate(map, mapsize, size); | |
df6d6101 JH |
324 | } |
325 | ||
7a979d99 JH |
326 | static unsigned long write_object(struct sha1file *f, |
327 | struct object_entry *entry) | |
c323ac7d LT |
328 | { |
329 | unsigned long size; | |
330 | char type[10]; | |
3f9ac8d2 | 331 | void *buf; |
a733cb60 | 332 | unsigned char header[10]; |
c323ac7d | 333 | unsigned hdrlen, datalen; |
a733cb60 | 334 | enum object_type obj_type; |
ab7cd7bb | 335 | int to_reuse = 0; |
c323ac7d | 336 | |
7a979d99 JH |
337 | if (entry->preferred_base) |
338 | return 0; | |
c323ac7d | 339 | |
a733cb60 | 340 | obj_type = entry->type; |
ab7cd7bb JH |
341 | if (! entry->in_pack) |
342 | to_reuse = 0; /* can't reuse what we don't have */ | |
343 | else if (obj_type == OBJ_DELTA) | |
344 | to_reuse = 1; /* check_object() decided it for us */ | |
345 | else if (obj_type != entry->in_pack_type) | |
346 | to_reuse = 0; /* pack has delta which is unusable */ | |
347 | else if (entry->delta) | |
348 | to_reuse = 0; /* we want to pack afresh */ | |
349 | else | |
350 | to_reuse = 1; /* we have it in-pack undeltified, | |
351 | * and we do not need to deltify it. | |
352 | */ | |
353 | ||
ceec1361 JH |
354 | if (!entry->in_pack && !entry->delta) { |
355 | unsigned char *map; | |
356 | unsigned long mapsize; | |
357 | map = map_sha1_file(entry->sha1, &mapsize); | |
358 | if (map && !legacy_loose_object(map)) { | |
359 | /* We can copy straight into the pack file */ | |
df6d6101 JH |
360 | if (revalidate_loose_object(entry, map, mapsize)) |
361 | die("corrupt loose object %s", | |
362 | sha1_to_hex(entry->sha1)); | |
ceec1361 JH |
363 | sha1write(f, map, mapsize); |
364 | munmap(map, mapsize); | |
365 | written++; | |
366 | reused++; | |
367 | return mapsize; | |
368 | } | |
369 | if (map) | |
370 | munmap(map, mapsize); | |
371 | } | |
372 | ||
df6d6101 | 373 | if (!to_reuse) { |
3f9ac8d2 JH |
374 | buf = read_sha1_file(entry->sha1, type, &size); |
375 | if (!buf) | |
376 | die("unable to read %s", sha1_to_hex(entry->sha1)); | |
377 | if (size != entry->size) | |
378 | die("object %s size inconsistency (%lu vs %lu)", | |
379 | sha1_to_hex(entry->sha1), size, entry->size); | |
380 | if (entry->delta) { | |
381 | buf = delta_against(buf, size, entry); | |
382 | size = entry->delta_size; | |
383 | obj_type = OBJ_DELTA; | |
384 | } | |
385 | /* | |
386 | * The object header is a byte of 'type' followed by zero or | |
387 | * more bytes of length. For deltas, the 20 bytes of delta | |
388 | * sha1 follows that. | |
389 | */ | |
390 | hdrlen = encode_header(obj_type, size, header); | |
391 | sha1write(f, header, hdrlen); | |
392 | ||
393 | if (entry->delta) { | |
394 | sha1write(f, entry->delta, 20); | |
395 | hdrlen += 20; | |
396 | } | |
397 | datalen = sha1write_compressed(f, buf, size); | |
398 | free(buf); | |
c323ac7d | 399 | } |
3f9ac8d2 JH |
400 | else { |
401 | struct packed_git *p = entry->in_pack; | |
402 | use_packed_git(p); | |
403 | ||
404 | datalen = find_packed_object_size(p, entry->in_pack_offset); | |
1d7f171c | 405 | buf = (char *) p->pack_base + entry->in_pack_offset; |
df6d6101 | 406 | |
72518e9c | 407 | if (revalidate_pack_entry(entry, buf, datalen)) |
df6d6101 | 408 | die("corrupt delta in pack %s", sha1_to_hex(entry->sha1)); |
3f9ac8d2 JH |
409 | sha1write(f, buf, datalen); |
410 | unuse_packed_git(p); | |
411 | hdrlen = 0; /* not really */ | |
ab7cd7bb JH |
412 | if (obj_type == OBJ_DELTA) |
413 | reused_delta++; | |
3f9ac8d2 | 414 | reused++; |
a733cb60 | 415 | } |
ab7cd7bb JH |
416 | if (obj_type == OBJ_DELTA) |
417 | written_delta++; | |
3f9ac8d2 | 418 | written++; |
c323ac7d LT |
419 | return hdrlen + datalen; |
420 | } | |
421 | ||
9d5ab962 JH |
422 | static unsigned long write_one(struct sha1file *f, |
423 | struct object_entry *e, | |
424 | unsigned long offset) | |
425 | { | |
426 | if (e->offset) | |
427 | /* offset starts from header size and cannot be zero | |
428 | * if it is written already. | |
429 | */ | |
430 | return offset; | |
431 | e->offset = offset; | |
432 | offset += write_object(f, e); | |
82f9d58a | 433 | /* if we are deltified, write out its base object. */ |
9d5ab962 JH |
434 | if (e->delta) |
435 | offset = write_one(f, e->delta, offset); | |
436 | return offset; | |
437 | } | |
438 | ||
c323ac7d LT |
439 | static void write_pack_file(void) |
440 | { | |
441 | int i; | |
d22b9290 | 442 | struct sha1file *f; |
a733cb60 | 443 | unsigned long offset; |
a733cb60 | 444 | struct pack_header hdr; |
183bdb2c JH |
445 | unsigned last_percent = 999; |
446 | int do_progress = 0; | |
c323ac7d | 447 | |
d22b9290 LT |
448 | if (!base_name) |
449 | f = sha1fd(1, "<stdout>"); | |
183bdb2c JH |
450 | else { |
451 | f = sha1create("%s-%s.%s", base_name, | |
452 | sha1_to_hex(object_list_sha1), "pack"); | |
453 | do_progress = progress; | |
454 | } | |
455 | if (do_progress) | |
f0b0af1b | 456 | fprintf(stderr, "Writing %d objects.\n", nr_result); |
183bdb2c | 457 | |
a733cb60 | 458 | hdr.hdr_signature = htonl(PACK_SIGNATURE); |
01247d87 | 459 | hdr.hdr_version = htonl(PACK_VERSION); |
7a979d99 | 460 | hdr.hdr_entries = htonl(nr_result); |
a733cb60 LT |
461 | sha1write(f, &hdr, sizeof(hdr)); |
462 | offset = sizeof(hdr); | |
f0b0af1b JH |
463 | if (!nr_result) |
464 | goto done; | |
5e8dc750 | 465 | for (i = 0; i < nr_objects; i++) { |
9d5ab962 | 466 | offset = write_one(f, objects + i, offset); |
183bdb2c | 467 | if (do_progress) { |
f0b0af1b | 468 | unsigned percent = written * 100 / nr_result; |
183bdb2c JH |
469 | if (progress_update || percent != last_percent) { |
470 | fprintf(stderr, "%4u%% (%u/%u) done\r", | |
f0b0af1b | 471 | percent, written, nr_result); |
183bdb2c JH |
472 | progress_update = 0; |
473 | last_percent = percent; | |
474 | } | |
5e8dc750 NP |
475 | } |
476 | } | |
183bdb2c JH |
477 | if (do_progress) |
478 | fputc('\n', stderr); | |
f0b0af1b | 479 | done: |
e1808845 | 480 | sha1close(f, pack_file_sha1, 1); |
c323ac7d LT |
481 | } |
482 | ||
483 | static void write_index_file(void) | |
484 | { | |
485 | int i; | |
7a979d99 JH |
486 | struct sha1file *f = sha1create("%s-%s.%s", base_name, |
487 | sha1_to_hex(object_list_sha1), "idx"); | |
c323ac7d | 488 | struct object_entry **list = sorted_by_sha; |
7a979d99 | 489 | struct object_entry **last = list + nr_result; |
c323ac7d LT |
490 | unsigned int array[256]; |
491 | ||
492 | /* | |
493 | * Write the first-level table (the list is sorted, | |
494 | * but we use a 256-entry lookup to be able to avoid | |
e1808845 | 495 | * having to do eight extra binary search iterations). |
c323ac7d LT |
496 | */ |
497 | for (i = 0; i < 256; i++) { | |
498 | struct object_entry **next = list; | |
499 | while (next < last) { | |
500 | struct object_entry *entry = *next; | |
501 | if (entry->sha1[0] != i) | |
502 | break; | |
503 | next++; | |
504 | } | |
505 | array[i] = htonl(next - sorted_by_sha); | |
506 | list = next; | |
507 | } | |
c38138cd | 508 | sha1write(f, array, 256 * sizeof(int)); |
c323ac7d LT |
509 | |
510 | /* | |
511 | * Write the actual SHA1 entries.. | |
512 | */ | |
513 | list = sorted_by_sha; | |
7a979d99 | 514 | for (i = 0; i < nr_result; i++) { |
c323ac7d LT |
515 | struct object_entry *entry = *list++; |
516 | unsigned int offset = htonl(entry->offset); | |
c38138cd LT |
517 | sha1write(f, &offset, 4); |
518 | sha1write(f, entry->sha1, 20); | |
c323ac7d | 519 | } |
e1808845 LT |
520 | sha1write(f, pack_file_sha1, 20); |
521 | sha1close(f, NULL, 1); | |
c323ac7d LT |
522 | } |
523 | ||
7a979d99 JH |
524 | static int locate_object_entry_hash(const unsigned char *sha1) |
525 | { | |
526 | int i; | |
527 | unsigned int ui; | |
528 | memcpy(&ui, sha1, sizeof(unsigned int)); | |
529 | i = ui % object_ix_hashsz; | |
530 | while (0 < object_ix[i]) { | |
a89fccd2 | 531 | if (!hashcmp(sha1, objects[object_ix[i] - 1].sha1)) |
7a979d99 JH |
532 | return i; |
533 | if (++i == object_ix_hashsz) | |
534 | i = 0; | |
535 | } | |
536 | return -1 - i; | |
537 | } | |
538 | ||
539 | static struct object_entry *locate_object_entry(const unsigned char *sha1) | |
540 | { | |
541 | int i; | |
542 | ||
543 | if (!object_ix_hashsz) | |
544 | return NULL; | |
545 | ||
546 | i = locate_object_entry_hash(sha1); | |
547 | if (0 <= i) | |
548 | return &objects[object_ix[i]-1]; | |
549 | return NULL; | |
550 | } | |
551 | ||
552 | static void rehash_objects(void) | |
c323ac7d | 553 | { |
7a979d99 JH |
554 | int i; |
555 | struct object_entry *oe; | |
556 | ||
557 | object_ix_hashsz = nr_objects * 3; | |
558 | if (object_ix_hashsz < 1024) | |
559 | object_ix_hashsz = 1024; | |
560 | object_ix = xrealloc(object_ix, sizeof(int) * object_ix_hashsz); | |
5379a5c5 | 561 | memset(object_ix, 0, sizeof(int) * object_ix_hashsz); |
7a979d99 JH |
562 | for (i = 0, oe = objects; i < nr_objects; i++, oe++) { |
563 | int ix = locate_object_entry_hash(oe->sha1); | |
564 | if (0 <= ix) | |
565 | continue; | |
566 | ix = -1 - ix; | |
567 | object_ix[ix] = i + 1; | |
568 | } | |
569 | } | |
570 | ||
ce0bd642 | 571 | static unsigned name_hash(const char *name) |
1d6b38cc | 572 | { |
ce0bd642 LT |
573 | unsigned char c; |
574 | unsigned hash = 0; | |
575 | ||
eeef7135 | 576 | /* |
ce0bd642 LT |
577 | * This effectively just creates a sortable number from the |
578 | * last sixteen non-whitespace characters. Last characters | |
579 | * count "most", so things that end in ".c" sort together. | |
eeef7135 | 580 | */ |
ce0bd642 LT |
581 | while ((c = *name++) != 0) { |
582 | if (isspace(c)) | |
583 | continue; | |
584 | hash = (hash >> 2) + (c << 24); | |
585 | } | |
1d6b38cc JH |
586 | return hash; |
587 | } | |
588 | ||
589 | static int add_object_entry(const unsigned char *sha1, unsigned hash, int exclude) | |
c323ac7d LT |
590 | { |
591 | unsigned int idx = nr_objects; | |
592 | struct object_entry *entry; | |
3f9ac8d2 | 593 | struct packed_git *p; |
ab7cd7bb JH |
594 | unsigned int found_offset = 0; |
595 | struct packed_git *found_pack = NULL; | |
b925410d | 596 | int ix, status = 0; |
c323ac7d | 597 | |
7a979d99 | 598 | if (!exclude) { |
64560374 LT |
599 | for (p = packed_git; p; p = p->next) { |
600 | struct pack_entry e; | |
64560374 LT |
601 | if (find_pack_entry_one(sha1, &e, p)) { |
602 | if (incremental) | |
603 | return 0; | |
604 | if (local && !p->pack_local) | |
605 | return 0; | |
7a979d99 JH |
606 | if (!found_pack) { |
607 | found_offset = e.offset; | |
608 | found_pack = e.p; | |
609 | } | |
64560374 LT |
610 | } |
611 | } | |
612 | } | |
7a979d99 JH |
613 | if ((entry = locate_object_entry(sha1)) != NULL) |
614 | goto already_added; | |
eb019375 | 615 | |
c323ac7d LT |
616 | if (idx >= nr_alloc) { |
617 | unsigned int needed = (idx + 1024) * 3 / 2; | |
618 | objects = xrealloc(objects, needed * sizeof(*entry)); | |
619 | nr_alloc = needed; | |
620 | } | |
621 | entry = objects + idx; | |
7a979d99 | 622 | nr_objects = idx + 1; |
c323ac7d | 623 | memset(entry, 0, sizeof(*entry)); |
e702496e | 624 | hashcpy(entry->sha1, sha1); |
27225f2e | 625 | entry->hash = hash; |
7a979d99 JH |
626 | |
627 | if (object_ix_hashsz * 3 <= nr_objects * 4) | |
628 | rehash_objects(); | |
629 | else { | |
630 | ix = locate_object_entry_hash(entry->sha1); | |
631 | if (0 <= ix) | |
632 | die("internal error in object hashing."); | |
633 | object_ix[-1 - ix] = idx + 1; | |
3f9ac8d2 | 634 | } |
b925410d | 635 | status = 1; |
7a979d99 JH |
636 | |
637 | already_added: | |
f0b0af1b JH |
638 | if (progress_update) { |
639 | fprintf(stderr, "Counting objects...%d\r", nr_objects); | |
640 | progress_update = 0; | |
641 | } | |
7a979d99 JH |
642 | if (exclude) |
643 | entry->preferred_base = 1; | |
644 | else { | |
645 | if (found_pack) { | |
646 | entry->in_pack = found_pack; | |
647 | entry->in_pack_offset = found_offset; | |
648 | } | |
a49dd05f | 649 | } |
b925410d | 650 | return status; |
c323ac7d LT |
651 | } |
652 | ||
5379a5c5 JH |
653 | struct pbase_tree_cache { |
654 | unsigned char sha1[20]; | |
655 | int ref; | |
656 | int temporary; | |
657 | void *tree_data; | |
658 | unsigned long tree_size; | |
659 | }; | |
660 | ||
661 | static struct pbase_tree_cache *(pbase_tree_cache[256]); | |
662 | static int pbase_tree_cache_ix(const unsigned char *sha1) | |
663 | { | |
664 | return sha1[0] % ARRAY_SIZE(pbase_tree_cache); | |
665 | } | |
666 | static int pbase_tree_cache_ix_incr(int ix) | |
667 | { | |
668 | return (ix+1) % ARRAY_SIZE(pbase_tree_cache); | |
669 | } | |
670 | ||
671 | static struct pbase_tree { | |
672 | struct pbase_tree *next; | |
673 | /* This is a phony "cache" entry; we are not | |
674 | * going to evict it nor find it through _get() | |
675 | * mechanism -- this is for the toplevel node that | |
676 | * would almost always change with any commit. | |
677 | */ | |
678 | struct pbase_tree_cache pcache; | |
679 | } *pbase_tree; | |
680 | ||
681 | static struct pbase_tree_cache *pbase_tree_get(const unsigned char *sha1) | |
682 | { | |
683 | struct pbase_tree_cache *ent, *nent; | |
684 | void *data; | |
685 | unsigned long size; | |
686 | char type[20]; | |
687 | int neigh; | |
688 | int my_ix = pbase_tree_cache_ix(sha1); | |
689 | int available_ix = -1; | |
690 | ||
691 | /* pbase-tree-cache acts as a limited hashtable. | |
692 | * your object will be found at your index or within a few | |
693 | * slots after that slot if it is cached. | |
694 | */ | |
695 | for (neigh = 0; neigh < 8; neigh++) { | |
696 | ent = pbase_tree_cache[my_ix]; | |
a89fccd2 | 697 | if (ent && !hashcmp(ent->sha1, sha1)) { |
5379a5c5 JH |
698 | ent->ref++; |
699 | return ent; | |
700 | } | |
701 | else if (((available_ix < 0) && (!ent || !ent->ref)) || | |
702 | ((0 <= available_ix) && | |
703 | (!ent && pbase_tree_cache[available_ix]))) | |
704 | available_ix = my_ix; | |
705 | if (!ent) | |
706 | break; | |
707 | my_ix = pbase_tree_cache_ix_incr(my_ix); | |
708 | } | |
709 | ||
710 | /* Did not find one. Either we got a bogus request or | |
711 | * we need to read and perhaps cache. | |
712 | */ | |
713 | data = read_sha1_file(sha1, type, &size); | |
714 | if (!data) | |
715 | return NULL; | |
716 | if (strcmp(type, tree_type)) { | |
717 | free(data); | |
718 | return NULL; | |
719 | } | |
720 | ||
721 | /* We need to either cache or return a throwaway copy */ | |
722 | ||
723 | if (available_ix < 0) | |
724 | ent = NULL; | |
725 | else { | |
726 | ent = pbase_tree_cache[available_ix]; | |
727 | my_ix = available_ix; | |
728 | } | |
729 | ||
730 | if (!ent) { | |
731 | nent = xmalloc(sizeof(*nent)); | |
732 | nent->temporary = (available_ix < 0); | |
733 | } | |
734 | else { | |
735 | /* evict and reuse */ | |
736 | free(ent->tree_data); | |
737 | nent = ent; | |
738 | } | |
e702496e | 739 | hashcpy(nent->sha1, sha1); |
5379a5c5 JH |
740 | nent->tree_data = data; |
741 | nent->tree_size = size; | |
742 | nent->ref = 1; | |
743 | if (!nent->temporary) | |
744 | pbase_tree_cache[my_ix] = nent; | |
745 | return nent; | |
746 | } | |
747 | ||
748 | static void pbase_tree_put(struct pbase_tree_cache *cache) | |
749 | { | |
750 | if (!cache->temporary) { | |
751 | cache->ref--; | |
752 | return; | |
753 | } | |
754 | free(cache->tree_data); | |
755 | free(cache); | |
756 | } | |
757 | ||
758 | static int name_cmp_len(const char *name) | |
759 | { | |
760 | int i; | |
761 | for (i = 0; name[i] && name[i] != '\n' && name[i] != '/'; i++) | |
762 | ; | |
763 | return i; | |
764 | } | |
765 | ||
766 | static void add_pbase_object(struct tree_desc *tree, | |
5379a5c5 | 767 | const char *name, |
ce0bd642 LT |
768 | int cmplen, |
769 | const char *fullname) | |
3f9ac8d2 | 770 | { |
4c068a98 LT |
771 | struct name_entry entry; |
772 | ||
773 | while (tree_entry(tree,&entry)) { | |
7a979d99 JH |
774 | unsigned long size; |
775 | char type[20]; | |
776 | ||
4c068a98 LT |
777 | if (entry.pathlen != cmplen || |
778 | memcmp(entry.path, name, cmplen) || | |
779 | !has_sha1_file(entry.sha1) || | |
780 | sha1_object_info(entry.sha1, type, &size)) | |
7a979d99 | 781 | continue; |
5379a5c5 | 782 | if (name[cmplen] != '/') { |
ce0bd642 | 783 | unsigned hash = name_hash(fullname); |
4c068a98 | 784 | add_object_entry(entry.sha1, hash, 1); |
5379a5c5 JH |
785 | return; |
786 | } | |
8e440259 | 787 | if (!strcmp(type, tree_type)) { |
7a979d99 | 788 | struct tree_desc sub; |
5379a5c5 JH |
789 | struct pbase_tree_cache *tree; |
790 | const char *down = name+cmplen+1; | |
791 | int downlen = name_cmp_len(down); | |
792 | ||
4c068a98 | 793 | tree = pbase_tree_get(entry.sha1); |
5379a5c5 JH |
794 | if (!tree) |
795 | return; | |
796 | sub.buf = tree->tree_data; | |
797 | sub.size = tree->tree_size; | |
798 | ||
ce0bd642 | 799 | add_pbase_object(&sub, down, downlen, fullname); |
5379a5c5 JH |
800 | pbase_tree_put(tree); |
801 | } | |
802 | } | |
803 | } | |
1d6b38cc | 804 | |
5379a5c5 JH |
805 | static unsigned *done_pbase_paths; |
806 | static int done_pbase_paths_num; | |
807 | static int done_pbase_paths_alloc; | |
808 | static int done_pbase_path_pos(unsigned hash) | |
809 | { | |
810 | int lo = 0; | |
811 | int hi = done_pbase_paths_num; | |
812 | while (lo < hi) { | |
813 | int mi = (hi + lo) / 2; | |
814 | if (done_pbase_paths[mi] == hash) | |
815 | return mi; | |
816 | if (done_pbase_paths[mi] < hash) | |
817 | hi = mi; | |
818 | else | |
819 | lo = mi + 1; | |
820 | } | |
821 | return -lo-1; | |
822 | } | |
823 | ||
824 | static int check_pbase_path(unsigned hash) | |
825 | { | |
826 | int pos = (!done_pbase_paths) ? -1 : done_pbase_path_pos(hash); | |
827 | if (0 <= pos) | |
828 | return 1; | |
829 | pos = -pos - 1; | |
830 | if (done_pbase_paths_alloc <= done_pbase_paths_num) { | |
831 | done_pbase_paths_alloc = alloc_nr(done_pbase_paths_alloc); | |
832 | done_pbase_paths = xrealloc(done_pbase_paths, | |
833 | done_pbase_paths_alloc * | |
834 | sizeof(unsigned)); | |
835 | } | |
836 | done_pbase_paths_num++; | |
837 | if (pos < done_pbase_paths_num) | |
838 | memmove(done_pbase_paths + pos + 1, | |
839 | done_pbase_paths + pos, | |
840 | (done_pbase_paths_num - pos - 1) * sizeof(unsigned)); | |
841 | done_pbase_paths[pos] = hash; | |
842 | return 0; | |
843 | } | |
844 | ||
8d1d8f83 | 845 | static void add_preferred_base_object(const char *name, unsigned hash) |
5379a5c5 JH |
846 | { |
847 | struct pbase_tree *it; | |
848 | int cmplen = name_cmp_len(name); | |
849 | ||
850 | if (check_pbase_path(hash)) | |
851 | return; | |
852 | ||
853 | for (it = pbase_tree; it; it = it->next) { | |
854 | if (cmplen == 0) { | |
ce0bd642 | 855 | hash = name_hash(""); |
5379a5c5 JH |
856 | add_object_entry(it->pcache.sha1, hash, 1); |
857 | } | |
858 | else { | |
859 | struct tree_desc tree; | |
860 | tree.buf = it->pcache.tree_data; | |
861 | tree.size = it->pcache.tree_size; | |
ce0bd642 | 862 | add_pbase_object(&tree, name, cmplen, name); |
7a979d99 | 863 | } |
3f9ac8d2 | 864 | } |
3f9ac8d2 JH |
865 | } |
866 | ||
7a979d99 | 867 | static void add_preferred_base(unsigned char *sha1) |
3f9ac8d2 | 868 | { |
5379a5c5 JH |
869 | struct pbase_tree *it; |
870 | void *data; | |
871 | unsigned long size; | |
872 | unsigned char tree_sha1[20]; | |
1d6b38cc | 873 | |
8d1d8f83 JH |
874 | if (window <= num_preferred_base++) |
875 | return; | |
876 | ||
5379a5c5 JH |
877 | data = read_object_with_reference(sha1, tree_type, &size, tree_sha1); |
878 | if (!data) | |
7a979d99 | 879 | return; |
5379a5c5 JH |
880 | |
881 | for (it = pbase_tree; it; it = it->next) { | |
a89fccd2 | 882 | if (!hashcmp(it->pcache.sha1, tree_sha1)) { |
5379a5c5 JH |
883 | free(data); |
884 | return; | |
885 | } | |
886 | } | |
887 | ||
888 | it = xcalloc(1, sizeof(*it)); | |
889 | it->next = pbase_tree; | |
890 | pbase_tree = it; | |
891 | ||
e702496e | 892 | hashcpy(it->pcache.sha1, tree_sha1); |
5379a5c5 JH |
893 | it->pcache.tree_data = data; |
894 | it->pcache.tree_size = size; | |
3f9ac8d2 JH |
895 | } |
896 | ||
c323ac7d LT |
897 | static void check_object(struct object_entry *entry) |
898 | { | |
36e4d74a JH |
899 | char type[20]; |
900 | ||
7a979d99 | 901 | if (entry->in_pack && !entry->preferred_base) { |
ab7cd7bb JH |
902 | unsigned char base[20]; |
903 | unsigned long size; | |
904 | struct object_entry *base_entry; | |
905 | ||
906 | /* We want in_pack_type even if we do not reuse delta. | |
907 | * There is no point not reusing non-delta representations. | |
908 | */ | |
909 | check_reuse_pack_delta(entry->in_pack, | |
910 | entry->in_pack_offset, | |
911 | base, &size, | |
912 | &entry->in_pack_type); | |
913 | ||
3f9ac8d2 JH |
914 | /* Check if it is delta, and the base is also an object |
915 | * we are going to pack. If so we will reuse the existing | |
916 | * delta. | |
917 | */ | |
ab7cd7bb JH |
918 | if (!no_reuse_delta && |
919 | entry->in_pack_type == OBJ_DELTA && | |
7a979d99 JH |
920 | (base_entry = locate_object_entry(base)) && |
921 | (!base_entry->preferred_base)) { | |
ab7cd7bb JH |
922 | |
923 | /* Depth value does not matter - find_deltas() | |
924 | * will never consider reused delta as the | |
925 | * base object to deltify other objects | |
926 | * against, in order to avoid circular deltas. | |
3f9ac8d2 | 927 | */ |
ab7cd7bb JH |
928 | |
929 | /* uncompressed size of the delta data */ | |
3f9ac8d2 JH |
930 | entry->size = entry->delta_size = size; |
931 | entry->delta = base_entry; | |
932 | entry->type = OBJ_DELTA; | |
ab7cd7bb | 933 | |
15b4d577 JH |
934 | entry->delta_sibling = base_entry->delta_child; |
935 | base_entry->delta_child = entry; | |
ab7cd7bb | 936 | |
3f9ac8d2 JH |
937 | return; |
938 | } | |
939 | /* Otherwise we would do the usual */ | |
36e4d74a | 940 | } |
3f9ac8d2 JH |
941 | |
942 | if (sha1_object_info(entry->sha1, type, &entry->size)) | |
36e4d74a JH |
943 | die("unable to get type of object %s", |
944 | sha1_to_hex(entry->sha1)); | |
3f9ac8d2 | 945 | |
8e440259 | 946 | if (!strcmp(type, commit_type)) { |
3f9ac8d2 | 947 | entry->type = OBJ_COMMIT; |
8e440259 | 948 | } else if (!strcmp(type, tree_type)) { |
3f9ac8d2 | 949 | entry->type = OBJ_TREE; |
8e440259 | 950 | } else if (!strcmp(type, blob_type)) { |
3f9ac8d2 | 951 | entry->type = OBJ_BLOB; |
8e440259 | 952 | } else if (!strcmp(type, tag_type)) { |
3f9ac8d2 JH |
953 | entry->type = OBJ_TAG; |
954 | } else | |
955 | die("unable to pack object %s of type %s", | |
956 | sha1_to_hex(entry->sha1), type); | |
957 | } | |
958 | ||
15b4d577 JH |
959 | static unsigned int check_delta_limit(struct object_entry *me, unsigned int n) |
960 | { | |
961 | struct object_entry *child = me->delta_child; | |
962 | unsigned int m = n; | |
963 | while (child) { | |
964 | unsigned int c = check_delta_limit(child, n + 1); | |
965 | if (m < c) | |
966 | m = c; | |
967 | child = child->delta_sibling; | |
968 | } | |
969 | return m; | |
970 | } | |
971 | ||
c323ac7d LT |
972 | static void get_object_details(void) |
973 | { | |
974 | int i; | |
15b4d577 | 975 | struct object_entry *entry; |
c323ac7d | 976 | |
3f9ac8d2 | 977 | prepare_pack_ix(); |
15b4d577 JH |
978 | for (i = 0, entry = objects; i < nr_objects; i++, entry++) |
979 | check_object(entry); | |
b76f6b62 JH |
980 | |
981 | if (nr_objects == nr_result) { | |
982 | /* | |
983 | * Depth of objects that depend on the entry -- this | |
984 | * is subtracted from depth-max to break too deep | |
985 | * delta chain because of delta data reusing. | |
986 | * However, we loosen this restriction when we know we | |
987 | * are creating a thin pack -- it will have to be | |
988 | * expanded on the other end anyway, so do not | |
989 | * artificially cut the delta chain and let it go as | |
990 | * deep as it wants. | |
991 | */ | |
992 | for (i = 0, entry = objects; i < nr_objects; i++, entry++) | |
993 | if (!entry->delta && entry->delta_child) | |
994 | entry->delta_limit = | |
995 | check_delta_limit(entry, 1); | |
996 | } | |
c323ac7d LT |
997 | } |
998 | ||
999 | typedef int (*entry_sort_t)(const struct object_entry *, const struct object_entry *); | |
1000 | ||
1001 | static entry_sort_t current_sort; | |
1002 | ||
1003 | static int sort_comparator(const void *_a, const void *_b) | |
1004 | { | |
1005 | struct object_entry *a = *(struct object_entry **)_a; | |
1006 | struct object_entry *b = *(struct object_entry **)_b; | |
1007 | return current_sort(a,b); | |
1008 | } | |
1009 | ||
1010 | static struct object_entry **create_sorted_list(entry_sort_t sort) | |
1011 | { | |
1012 | struct object_entry **list = xmalloc(nr_objects * sizeof(struct object_entry *)); | |
1013 | int i; | |
1014 | ||
1015 | for (i = 0; i < nr_objects; i++) | |
1016 | list[i] = objects + i; | |
1017 | current_sort = sort; | |
1018 | qsort(list, nr_objects, sizeof(struct object_entry *), sort_comparator); | |
1019 | return list; | |
1020 | } | |
1021 | ||
1022 | static int sha1_sort(const struct object_entry *a, const struct object_entry *b) | |
1023 | { | |
a89fccd2 | 1024 | return hashcmp(a->sha1, b->sha1); |
c323ac7d LT |
1025 | } |
1026 | ||
962554c6 | 1027 | static struct object_entry **create_final_object_list(void) |
7a979d99 JH |
1028 | { |
1029 | struct object_entry **list; | |
1030 | int i, j; | |
1031 | ||
1032 | for (i = nr_result = 0; i < nr_objects; i++) | |
1033 | if (!objects[i].preferred_base) | |
1034 | nr_result++; | |
1035 | list = xmalloc(nr_result * sizeof(struct object_entry *)); | |
1036 | for (i = j = 0; i < nr_objects; i++) { | |
1037 | if (!objects[i].preferred_base) | |
1038 | list[j++] = objects + i; | |
1039 | } | |
1040 | current_sort = sha1_sort; | |
1041 | qsort(list, nr_result, sizeof(struct object_entry *), sort_comparator); | |
1042 | return list; | |
1043 | } | |
1044 | ||
c323ac7d LT |
1045 | static int type_size_sort(const struct object_entry *a, const struct object_entry *b) |
1046 | { | |
1047 | if (a->type < b->type) | |
1048 | return -1; | |
1049 | if (a->type > b->type) | |
1050 | return 1; | |
27225f2e LT |
1051 | if (a->hash < b->hash) |
1052 | return -1; | |
1053 | if (a->hash > b->hash) | |
1054 | return 1; | |
7a979d99 JH |
1055 | if (a->preferred_base < b->preferred_base) |
1056 | return -1; | |
1057 | if (a->preferred_base > b->preferred_base) | |
1058 | return 1; | |
c323ac7d LT |
1059 | if (a->size < b->size) |
1060 | return -1; | |
1061 | if (a->size > b->size) | |
1062 | return 1; | |
1063 | return a < b ? -1 : (a > b); | |
1064 | } | |
1065 | ||
1066 | struct unpacked { | |
1067 | struct object_entry *entry; | |
1068 | void *data; | |
f6c7081a | 1069 | struct delta_index *index; |
c323ac7d LT |
1070 | }; |
1071 | ||
1072 | /* | |
521a4f4c LT |
1073 | * We search for deltas _backwards_ in a list sorted by type and |
1074 | * by size, so that we see progressively smaller and smaller files. | |
1075 | * That's because we prefer deltas to be from the bigger file | |
1076 | * to the smaller - deletes are potentially cheaper, but perhaps | |
1077 | * more importantly, the bigger file is likely the more recent | |
1078 | * one. | |
c323ac7d | 1079 | */ |
f6c7081a | 1080 | static int try_delta(struct unpacked *trg, struct unpacked *src, |
560b25a8 | 1081 | unsigned max_depth) |
c323ac7d | 1082 | { |
f6c7081a NP |
1083 | struct object_entry *trg_entry = trg->entry; |
1084 | struct object_entry *src_entry = src->entry; | |
560b25a8 NP |
1085 | unsigned long trg_size, src_size, delta_size, sizediff, max_size, sz; |
1086 | char type[10]; | |
c323ac7d LT |
1087 | void *delta_buf; |
1088 | ||
1089 | /* Don't bother doing diffs between different types */ | |
f6c7081a | 1090 | if (trg_entry->type != src_entry->type) |
c323ac7d LT |
1091 | return -1; |
1092 | ||
7a979d99 JH |
1093 | /* We do not compute delta to *create* objects we are not |
1094 | * going to pack. | |
1095 | */ | |
f6c7081a | 1096 | if (trg_entry->preferred_base) |
75c42d8c | 1097 | return -1; |
7a979d99 | 1098 | |
51d1e83f LT |
1099 | /* |
1100 | * We do not bother to try a delta that we discarded | |
8dbbd14e | 1101 | * on an earlier try, but only when reusing delta data. |
51d1e83f | 1102 | */ |
8dbbd14e NP |
1103 | if (!no_reuse_delta && trg_entry->in_pack && |
1104 | trg_entry->in_pack == src_entry->in_pack) | |
51d1e83f LT |
1105 | return 0; |
1106 | ||
f6c7081a NP |
1107 | /* |
1108 | * If the current object is at pack edge, take the depth the | |
7a979d99 JH |
1109 | * objects that depend on the current object into account -- |
1110 | * otherwise they would become too deep. | |
ab7cd7bb | 1111 | */ |
f6c7081a NP |
1112 | if (trg_entry->delta_child) { |
1113 | if (max_depth <= trg_entry->delta_limit) | |
15b4d577 | 1114 | return 0; |
f6c7081a | 1115 | max_depth -= trg_entry->delta_limit; |
15b4d577 | 1116 | } |
f6c7081a | 1117 | if (src_entry->depth >= max_depth) |
d116a45a | 1118 | return 0; |
c323ac7d | 1119 | |
c3b06a69 | 1120 | /* Now some size filtering heuristics. */ |
560b25a8 NP |
1121 | trg_size = trg_entry->size; |
1122 | max_size = trg_size/2 - 20; | |
c3b06a69 NP |
1123 | max_size = max_size * (max_depth - src_entry->depth) / max_depth; |
1124 | if (max_size == 0) | |
1125 | return 0; | |
4e8da195 | 1126 | if (trg_entry->delta && trg_entry->delta_size <= max_size) |
f6c7081a NP |
1127 | max_size = trg_entry->delta_size-1; |
1128 | src_size = src_entry->size; | |
560b25a8 | 1129 | sizediff = src_size < trg_size ? trg_size - src_size : 0; |
27225f2e | 1130 | if (sizediff >= max_size) |
f527cb8c | 1131 | return 0; |
f6c7081a | 1132 | |
560b25a8 NP |
1133 | /* Load data if not already done */ |
1134 | if (!trg->data) { | |
1135 | trg->data = read_sha1_file(trg_entry->sha1, type, &sz); | |
1136 | if (sz != trg_size) | |
1137 | die("object %s inconsistent object length (%lu vs %lu)", | |
1138 | sha1_to_hex(trg_entry->sha1), sz, trg_size); | |
1139 | } | |
1140 | if (!src->data) { | |
1141 | src->data = read_sha1_file(src_entry->sha1, type, &sz); | |
1142 | if (sz != src_size) | |
1143 | die("object %s inconsistent object length (%lu vs %lu)", | |
1144 | sha1_to_hex(src_entry->sha1), sz, src_size); | |
1145 | } | |
1146 | if (!src->index) { | |
1147 | src->index = create_delta_index(src->data, src_size); | |
1148 | if (!src->index) | |
1149 | die("out of memory"); | |
1150 | } | |
1151 | ||
1152 | delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size); | |
c323ac7d | 1153 | if (!delta_buf) |
75c42d8c | 1154 | return 0; |
f6c7081a NP |
1155 | |
1156 | trg_entry->delta = src_entry; | |
1157 | trg_entry->delta_size = delta_size; | |
1158 | trg_entry->depth = src_entry->depth + 1; | |
c323ac7d | 1159 | free(delta_buf); |
f6c7081a | 1160 | return 1; |
c323ac7d LT |
1161 | } |
1162 | ||
b2504a0d NP |
1163 | static void progress_interval(int signum) |
1164 | { | |
b2504a0d NP |
1165 | progress_update = 1; |
1166 | } | |
1167 | ||
d116a45a | 1168 | static void find_deltas(struct object_entry **list, int window, int depth) |
c323ac7d | 1169 | { |
521a4f4c | 1170 | int i, idx; |
c323ac7d LT |
1171 | unsigned int array_size = window * sizeof(struct unpacked); |
1172 | struct unpacked *array = xmalloc(array_size); | |
183bdb2c JH |
1173 | unsigned processed = 0; |
1174 | unsigned last_percent = 999; | |
c323ac7d LT |
1175 | |
1176 | memset(array, 0, array_size); | |
521a4f4c LT |
1177 | i = nr_objects; |
1178 | idx = 0; | |
183bdb2c | 1179 | if (progress) |
f0b0af1b | 1180 | fprintf(stderr, "Deltifying %d objects.\n", nr_result); |
21fcd1bd | 1181 | |
521a4f4c | 1182 | while (--i >= 0) { |
c323ac7d LT |
1183 | struct object_entry *entry = list[i]; |
1184 | struct unpacked *n = array + idx; | |
c323ac7d LT |
1185 | int j; |
1186 | ||
f0b0af1b JH |
1187 | if (!entry->preferred_base) |
1188 | processed++; | |
1189 | ||
183bdb2c | 1190 | if (progress) { |
f0b0af1b | 1191 | unsigned percent = processed * 100 / nr_result; |
183bdb2c JH |
1192 | if (percent != last_percent || progress_update) { |
1193 | fprintf(stderr, "%4u%% (%u/%u) done\r", | |
f0b0af1b | 1194 | percent, processed, nr_result); |
183bdb2c JH |
1195 | progress_update = 0; |
1196 | last_percent = percent; | |
1197 | } | |
21fcd1bd | 1198 | } |
3f9ac8d2 JH |
1199 | |
1200 | if (entry->delta) | |
1201 | /* This happens if we decided to reuse existing | |
ab7cd7bb | 1202 | * delta from a pack. "!no_reuse_delta &&" is implied. |
3f9ac8d2 JH |
1203 | */ |
1204 | continue; | |
1205 | ||
9a8b6a0a JH |
1206 | if (entry->size < 50) |
1207 | continue; | |
ff45715c NP |
1208 | free_delta_index(n->index); |
1209 | n->index = NULL; | |
c323ac7d | 1210 | free(n->data); |
560b25a8 | 1211 | n->data = NULL; |
c323ac7d | 1212 | n->entry = entry; |
ab7cd7bb | 1213 | |
78817c15 LT |
1214 | j = window; |
1215 | while (--j > 0) { | |
1216 | unsigned int other_idx = idx + j; | |
c323ac7d | 1217 | struct unpacked *m; |
78817c15 LT |
1218 | if (other_idx >= window) |
1219 | other_idx -= window; | |
c323ac7d LT |
1220 | m = array + other_idx; |
1221 | if (!m->entry) | |
1222 | break; | |
560b25a8 | 1223 | if (try_delta(n, m, depth) < 0) |
c323ac7d LT |
1224 | break; |
1225 | } | |
70ca1a3f JH |
1226 | /* if we made n a delta, and if n is already at max |
1227 | * depth, leaving it in the window is pointless. we | |
1228 | * should evict it first. | |
70ca1a3f JH |
1229 | */ |
1230 | if (entry->delta && depth <= entry->depth) | |
1231 | continue; | |
ff45715c | 1232 | |
521a4f4c LT |
1233 | idx++; |
1234 | if (idx >= window) | |
1235 | idx = 0; | |
c323ac7d | 1236 | } |
adee7bdf | 1237 | |
b2504a0d NP |
1238 | if (progress) |
1239 | fputc('\n', stderr); | |
1240 | ||
f6c7081a | 1241 | for (i = 0; i < window; ++i) { |
ff45715c | 1242 | free_delta_index(array[i].index); |
adee7bdf | 1243 | free(array[i].data); |
f6c7081a | 1244 | } |
adee7bdf | 1245 | free(array); |
c323ac7d LT |
1246 | } |
1247 | ||
f3123c4a JH |
1248 | static void prepare_pack(int window, int depth) |
1249 | { | |
3f9ac8d2 | 1250 | get_object_details(); |
f3123c4a JH |
1251 | sorted_by_type = create_sorted_list(type_size_sort); |
1252 | if (window && depth) | |
1253 | find_deltas(sorted_by_type, window+1, depth); | |
f3123c4a JH |
1254 | } |
1255 | ||
df6d6101 | 1256 | static int reuse_cached_pack(unsigned char *sha1) |
f3123c4a JH |
1257 | { |
1258 | static const char cache[] = "pack-cache/pack-%s.%s"; | |
1259 | char *cached_pack, *cached_idx; | |
1260 | int ifd, ofd, ifd_ix = -1; | |
1261 | ||
1262 | cached_pack = git_path(cache, sha1_to_hex(sha1), "pack"); | |
1263 | ifd = open(cached_pack, O_RDONLY); | |
1264 | if (ifd < 0) | |
1265 | return 0; | |
1266 | ||
1267 | if (!pack_to_stdout) { | |
1268 | cached_idx = git_path(cache, sha1_to_hex(sha1), "idx"); | |
1269 | ifd_ix = open(cached_idx, O_RDONLY); | |
1270 | if (ifd_ix < 0) { | |
1271 | close(ifd); | |
1272 | return 0; | |
1273 | } | |
1274 | } | |
1275 | ||
ab7cd7bb JH |
1276 | if (progress) |
1277 | fprintf(stderr, "Reusing %d objects pack %s\n", nr_objects, | |
1278 | sha1_to_hex(sha1)); | |
f3123c4a JH |
1279 | |
1280 | if (pack_to_stdout) { | |
1281 | if (copy_fd(ifd, 1)) | |
1282 | exit(1); | |
1283 | close(ifd); | |
1284 | } | |
1285 | else { | |
1286 | char name[PATH_MAX]; | |
1287 | snprintf(name, sizeof(name), | |
1288 | "%s-%s.%s", base_name, sha1_to_hex(sha1), "pack"); | |
1289 | ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666); | |
1290 | if (ofd < 0) | |
1291 | die("unable to open %s (%s)", name, strerror(errno)); | |
1292 | if (copy_fd(ifd, ofd)) | |
1293 | exit(1); | |
1294 | close(ifd); | |
1295 | ||
1296 | snprintf(name, sizeof(name), | |
1297 | "%s-%s.%s", base_name, sha1_to_hex(sha1), "idx"); | |
1298 | ofd = open(name, O_CREAT | O_EXCL | O_WRONLY, 0666); | |
1299 | if (ofd < 0) | |
1300 | die("unable to open %s (%s)", name, strerror(errno)); | |
1301 | if (copy_fd(ifd_ix, ofd)) | |
1302 | exit(1); | |
1303 | close(ifd_ix); | |
1304 | puts(sha1_to_hex(sha1)); | |
1305 | } | |
1306 | ||
1307 | return 1; | |
1308 | } | |
1309 | ||
fb7a6531 LT |
1310 | static void setup_progress_signal(void) |
1311 | { | |
1312 | struct sigaction sa; | |
1313 | struct itimerval v; | |
1314 | ||
1315 | memset(&sa, 0, sizeof(sa)); | |
1316 | sa.sa_handler = progress_interval; | |
1317 | sigemptyset(&sa.sa_mask); | |
1318 | sa.sa_flags = SA_RESTART; | |
1319 | sigaction(SIGALRM, &sa, NULL); | |
1320 | ||
1321 | v.it_interval.tv_sec = 1; | |
1322 | v.it_interval.tv_usec = 0; | |
1323 | v.it_value = v.it_interval; | |
1324 | setitimer(ITIMER_REAL, &v, NULL); | |
1325 | } | |
1326 | ||
4812a93a JK |
1327 | static int git_pack_config(const char *k, const char *v) |
1328 | { | |
1329 | if(!strcmp(k, "pack.window")) { | |
1330 | window = git_config_int(k, v); | |
1331 | return 0; | |
1332 | } | |
1333 | return git_default_config(k, v); | |
1334 | } | |
1335 | ||
b5d97e6b | 1336 | static void read_object_list_from_stdin(void) |
c323ac7d | 1337 | { |
b5d97e6b JH |
1338 | char line[40 + 1 + PATH_MAX + 2]; |
1339 | unsigned char sha1[20]; | |
1340 | unsigned hash; | |
b2504a0d | 1341 | |
da93d12b | 1342 | for (;;) { |
da93d12b LT |
1343 | if (!fgets(line, sizeof(line), stdin)) { |
1344 | if (feof(stdin)) | |
1345 | break; | |
1346 | if (!ferror(stdin)) | |
1347 | die("fgets returned NULL, not EOF, not error!"); | |
687dd75c JH |
1348 | if (errno != EINTR) |
1349 | die("fgets: %s", strerror(errno)); | |
1350 | clearerr(stdin); | |
1351 | continue; | |
da93d12b | 1352 | } |
7a979d99 JH |
1353 | if (line[0] == '-') { |
1354 | if (get_sha1_hex(line+1, sha1)) | |
1355 | die("expected edge sha1, got garbage:\n %s", | |
b5d97e6b | 1356 | line); |
8d1d8f83 | 1357 | add_preferred_base(sha1); |
7a979d99 | 1358 | continue; |
21fcd1bd | 1359 | } |
c323ac7d | 1360 | if (get_sha1_hex(line, sha1)) |
ef07618f | 1361 | die("expected sha1, got garbage:\n %s", line); |
b5d97e6b | 1362 | |
ce0bd642 | 1363 | hash = name_hash(line+41); |
5379a5c5 JH |
1364 | add_preferred_base_object(line+41, hash); |
1365 | add_object_entry(sha1, hash, 0); | |
c323ac7d | 1366 | } |
b5d97e6b JH |
1367 | } |
1368 | ||
b5d97e6b JH |
1369 | static void show_commit(struct commit *commit) |
1370 | { | |
1371 | unsigned hash = name_hash(""); | |
8d1d8f83 | 1372 | add_preferred_base_object("", hash); |
b5d97e6b JH |
1373 | add_object_entry(commit->object.sha1, hash, 0); |
1374 | } | |
1375 | ||
1376 | static void show_object(struct object_array_entry *p) | |
1377 | { | |
1378 | unsigned hash = name_hash(p->name); | |
8d1d8f83 | 1379 | add_preferred_base_object(p->name, hash); |
b5d97e6b JH |
1380 | add_object_entry(p->item->sha1, hash, 0); |
1381 | } | |
1382 | ||
8d1d8f83 JH |
1383 | static void show_edge(struct commit *commit) |
1384 | { | |
1385 | add_preferred_base(commit->object.sha1); | |
1386 | } | |
1387 | ||
1388 | static void get_object_list(int ac, const char **av) | |
b5d97e6b JH |
1389 | { |
1390 | struct rev_info revs; | |
1391 | char line[1000]; | |
b5d97e6b JH |
1392 | int flags = 0; |
1393 | ||
b5d97e6b JH |
1394 | init_revisions(&revs, NULL); |
1395 | save_commit_buffer = 0; | |
1396 | track_object_refs = 0; | |
1397 | setup_revisions(ac, av, &revs, NULL); | |
1398 | ||
b5d97e6b JH |
1399 | while (fgets(line, sizeof(line), stdin) != NULL) { |
1400 | int len = strlen(line); | |
1401 | if (line[len - 1] == '\n') | |
1402 | line[--len] = 0; | |
1403 | if (!len) | |
1404 | break; | |
1405 | if (*line == '-') { | |
1406 | if (!strcmp(line, "--not")) { | |
1407 | flags ^= UNINTERESTING; | |
1408 | continue; | |
1409 | } | |
1410 | die("not a rev '%s'", line); | |
1411 | } | |
1412 | if (handle_revision_arg(line, &revs, flags, 1)) | |
1413 | die("bad revision '%s'", line); | |
1414 | } | |
1415 | ||
1416 | prepare_revision_walk(&revs); | |
8d1d8f83 | 1417 | mark_edges_uninteresting(revs.commits, &revs, show_edge); |
b5d97e6b JH |
1418 | traverse_commit_list(&revs, show_commit, show_object); |
1419 | } | |
1420 | ||
1421 | int cmd_pack_objects(int argc, const char **argv, const char *prefix) | |
1422 | { | |
1423 | SHA_CTX ctx; | |
1424 | int depth = 10; | |
1425 | struct object_entry **list; | |
1426 | int use_internal_rev_list = 0; | |
8d1d8f83 | 1427 | int thin = 0; |
b5d97e6b | 1428 | int i; |
8d1d8f83 JH |
1429 | const char *rp_av[64]; |
1430 | int rp_ac; | |
1431 | ||
1432 | rp_av[0] = "pack-objects"; | |
1433 | rp_av[1] = "--objects"; /* --thin will make it --objects-edge */ | |
1434 | rp_ac = 2; | |
b5d97e6b JH |
1435 | |
1436 | git_config(git_pack_config); | |
1437 | ||
1438 | progress = isatty(2); | |
1439 | for (i = 1; i < argc; i++) { | |
1440 | const char *arg = argv[i]; | |
1441 | ||
1442 | if (*arg != '-') | |
1443 | break; | |
1444 | ||
1445 | if (!strcmp("--non-empty", arg)) { | |
1446 | non_empty = 1; | |
1447 | continue; | |
1448 | } | |
1449 | if (!strcmp("--local", arg)) { | |
1450 | local = 1; | |
1451 | continue; | |
1452 | } | |
1453 | if (!strcmp("--progress", arg)) { | |
1454 | progress = 1; | |
1455 | continue; | |
1456 | } | |
1457 | if (!strcmp("--incremental", arg)) { | |
1458 | incremental = 1; | |
1459 | continue; | |
1460 | } | |
1461 | if (!strncmp("--window=", arg, 9)) { | |
1462 | char *end; | |
1463 | window = strtoul(arg+9, &end, 0); | |
1464 | if (!arg[9] || *end) | |
1465 | usage(pack_usage); | |
1466 | continue; | |
1467 | } | |
1468 | if (!strncmp("--depth=", arg, 8)) { | |
1469 | char *end; | |
1470 | depth = strtoul(arg+8, &end, 0); | |
1471 | if (!arg[8] || *end) | |
1472 | usage(pack_usage); | |
1473 | continue; | |
1474 | } | |
1475 | if (!strcmp("--progress", arg)) { | |
1476 | progress = 1; | |
1477 | continue; | |
1478 | } | |
1479 | if (!strcmp("-q", arg)) { | |
1480 | progress = 0; | |
1481 | continue; | |
1482 | } | |
1483 | if (!strcmp("--no-reuse-delta", arg)) { | |
1484 | no_reuse_delta = 1; | |
1485 | continue; | |
1486 | } | |
1487 | if (!strcmp("--stdout", arg)) { | |
1488 | pack_to_stdout = 1; | |
1489 | continue; | |
1490 | } | |
1491 | if (!strcmp("--revs", arg)) { | |
1492 | use_internal_rev_list = 1; | |
1493 | continue; | |
1494 | } | |
8d1d8f83 JH |
1495 | if (!strcmp("--unpacked", arg) || |
1496 | !strncmp("--unpacked=", arg, 11) || | |
1497 | !strcmp("--all", arg)) { | |
1498 | use_internal_rev_list = 1; | |
1499 | if (ARRAY_SIZE(rp_av) - 1 <= rp_ac) | |
1500 | die("too many internal rev-list options"); | |
1501 | rp_av[rp_ac++] = arg; | |
b5d97e6b JH |
1502 | continue; |
1503 | } | |
8d1d8f83 JH |
1504 | if (!strcmp("--thin", arg)) { |
1505 | use_internal_rev_list = 1; | |
1506 | thin = 1; | |
1507 | rp_av[1] = "--objects-edge"; | |
b5d97e6b JH |
1508 | continue; |
1509 | } | |
1510 | usage(pack_usage); | |
1511 | } | |
1512 | ||
1513 | /* Traditionally "pack-objects [options] base extra" failed; | |
1514 | * we would however want to take refs parameter that would | |
1515 | * have been given to upstream rev-list ourselves, which means | |
1516 | * we somehow want to say what the base name is. So the | |
1517 | * syntax would be: | |
1518 | * | |
1519 | * pack-objects [options] base <refs...> | |
1520 | * | |
1521 | * in other words, we would treat the first non-option as the | |
1522 | * base_name and send everything else to the internal revision | |
1523 | * walker. | |
1524 | */ | |
1525 | ||
1526 | if (!pack_to_stdout) | |
1527 | base_name = argv[i++]; | |
1528 | ||
1529 | if (pack_to_stdout != !base_name) | |
1530 | usage(pack_usage); | |
1531 | ||
8d1d8f83 JH |
1532 | if (!pack_to_stdout && thin) |
1533 | die("--thin cannot be used to build an indexable pack."); | |
b5d97e6b JH |
1534 | |
1535 | prepare_packed_git(); | |
1536 | ||
1537 | if (progress) { | |
1538 | fprintf(stderr, "Generating pack...\n"); | |
1539 | setup_progress_signal(); | |
1540 | } | |
1541 | ||
1542 | if (!use_internal_rev_list) | |
1543 | read_object_list_from_stdin(); | |
8d1d8f83 JH |
1544 | else { |
1545 | rp_av[rp_ac] = NULL; | |
1546 | get_object_list(rp_ac, rp_av); | |
1547 | } | |
b5d97e6b | 1548 | |
21fcd1bd JH |
1549 | if (progress) |
1550 | fprintf(stderr, "Done counting %d objects.\n", nr_objects); | |
f0b0af1b JH |
1551 | sorted_by_sha = create_final_object_list(); |
1552 | if (non_empty && !nr_result) | |
1c4a2912 | 1553 | return 0; |
c323ac7d | 1554 | |
84c8d8ae JH |
1555 | SHA1_Init(&ctx); |
1556 | list = sorted_by_sha; | |
7a979d99 | 1557 | for (i = 0; i < nr_result; i++) { |
84c8d8ae JH |
1558 | struct object_entry *entry = *list++; |
1559 | SHA1_Update(&ctx, entry->sha1, 20); | |
1560 | } | |
1561 | SHA1_Final(object_list_sha1, &ctx); | |
7a979d99 JH |
1562 | if (progress && (nr_objects != nr_result)) |
1563 | fprintf(stderr, "Result has %d objects.\n", nr_result); | |
84c8d8ae | 1564 | |
df6d6101 | 1565 | if (reuse_cached_pack(object_list_sha1)) |
f3123c4a JH |
1566 | ; |
1567 | else { | |
f0b0af1b JH |
1568 | if (nr_result) |
1569 | prepare_pack(window, depth); | |
5e8dc750 NP |
1570 | if (progress && pack_to_stdout) { |
1571 | /* the other end usually displays progress itself */ | |
1572 | struct itimerval v = {{0,},}; | |
1573 | setitimer(ITIMER_REAL, &v, NULL); | |
1574 | signal(SIGALRM, SIG_IGN ); | |
1575 | progress_update = 0; | |
1576 | } | |
1577 | write_pack_file(); | |
f3123c4a JH |
1578 | if (!pack_to_stdout) { |
1579 | write_index_file(); | |
1580 | puts(sha1_to_hex(object_list_sha1)); | |
1581 | } | |
5f3de58f | 1582 | } |
ab7cd7bb JH |
1583 | if (progress) |
1584 | fprintf(stderr, "Total %d, written %d (delta %d), reused %d (delta %d)\n", | |
7a979d99 | 1585 | nr_result, written, written_delta, reused, reused_delta); |
c323ac7d LT |
1586 | return 0; |
1587 | } |