]>
git.ipfire.org Git - thirdparty/git.git/blob - index-pack.c
cecdd2680c7cbdfce85c352ed59e1eaef673f38c
10 static const char index_pack_usage
[] =
11 "git-index-pack [-o <index-file>] { <pack-file> | --stdin [<pack-file>] }";
17 unsigned int hdr_size
;
18 enum object_type type
;
19 enum object_type real_type
;
20 unsigned char sha1
[20];
24 unsigned char sha1
[20];
29 * Even if sizeof(union delta_base) == 24 on 64-bit archs, we really want
30 * to memcmp() only the first 20 bytes.
32 #define UNION_BASE_SZ 20
36 struct object_entry
*obj
;
37 union delta_base base
;
40 static struct object_entry
*objects
;
41 static struct delta_entry
*deltas
;
42 static int nr_objects
;
45 static int from_stdin
;
47 /* We always read in 4kB chunks. */
48 static unsigned char input_buffer
[4096];
49 static unsigned long input_offset
, input_len
, consumed_bytes
;
50 static SHA_CTX input_ctx
;
51 static int input_fd
, output_fd
, mmap_fd
;
54 * Make sure at least "min" bytes are available in the buffer, and
55 * return the pointer to the buffer.
57 static void * fill(int min
)
60 return input_buffer
+ input_offset
;
61 if (min
> sizeof(input_buffer
))
62 die("cannot fill %d bytes", min
);
65 write_or_die(output_fd
, input_buffer
, input_offset
);
66 SHA1_Update(&input_ctx
, input_buffer
, input_offset
);
67 memcpy(input_buffer
, input_buffer
+ input_offset
, input_len
);
71 int ret
= xread(input_fd
, input_buffer
+ input_len
,
72 sizeof(input_buffer
) - input_len
);
76 die("read error on input: %s", strerror(errno
));
79 } while (input_len
< min
);
83 static void use(int bytes
)
85 if (bytes
> input_len
)
86 die("used more bytes than were available");
88 input_offset
+= bytes
;
89 consumed_bytes
+= bytes
;
92 static const char * open_pack_file(const char *pack_name
)
97 static char tmpfile
[PATH_MAX
];
98 snprintf(tmpfile
, sizeof(tmpfile
),
99 "%s/pack_XXXXXX", get_object_directory());
100 output_fd
= mkstemp(tmpfile
);
101 pack_name
= xstrdup(tmpfile
);
103 output_fd
= open(pack_name
, O_CREAT
|O_EXCL
|O_RDWR
, 0600);
105 die("unable to create %s: %s\n", pack_name
, strerror(errno
));
108 input_fd
= open(pack_name
, O_RDONLY
);
110 die("cannot open packfile '%s': %s",
111 pack_name
, strerror(errno
));
115 SHA1_Init(&input_ctx
);
119 static void parse_pack_header(void)
121 struct pack_header
*hdr
= fill(sizeof(struct pack_header
));
123 /* Header consistency check */
124 if (hdr
->hdr_signature
!= htonl(PACK_SIGNATURE
))
125 die("pack signature mismatch");
126 if (!pack_version_ok(hdr
->hdr_version
))
127 die("pack version %d unsupported", ntohl(hdr
->hdr_version
));
129 nr_objects
= ntohl(hdr
->hdr_entries
);
130 use(sizeof(struct pack_header
));
131 /*fprintf(stderr, "Indexing %d objects\n", nr_objects);*/
134 static void bad_object(unsigned long offset
, const char *format
,
135 ...) NORETURN
__attribute__((format (printf
, 2, 3)));
137 static void bad_object(unsigned long offset
, const char *format
, ...)
142 va_start(params
, format
);
143 vsnprintf(buf
, sizeof(buf
), format
, params
);
145 die("pack has bad object at offset %lu: %s", offset
, buf
);
148 static void *unpack_entry_data(unsigned long offset
, unsigned long size
)
151 void *buf
= xmalloc(size
);
153 memset(&stream
, 0, sizeof(stream
));
154 stream
.next_out
= buf
;
155 stream
.avail_out
= size
;
156 stream
.next_in
= fill(1);
157 stream
.avail_in
= input_len
;
158 inflateInit(&stream
);
161 int ret
= inflate(&stream
, 0);
162 use(input_len
- stream
.avail_in
);
163 if (stream
.total_out
== size
&& ret
== Z_STREAM_END
)
166 bad_object(offset
, "inflate returned %d", ret
);
167 stream
.next_in
= fill(1);
168 stream
.avail_in
= input_len
;
174 static void *unpack_raw_entry(struct object_entry
*obj
, union delta_base
*delta_base
)
177 unsigned long size
, base_offset
;
180 obj
->offset
= consumed_bytes
;
185 obj
->type
= (c
>> 4) & 7;
192 size
+= (c
& 0x7fUL
) << shift
;
199 hashcpy(delta_base
->sha1
, fill(20));
203 memset(delta_base
, 0, sizeof(*delta_base
));
207 base_offset
= c
& 127;
210 if (!base_offset
|| base_offset
& ~(~0UL >> 7))
211 bad_object(obj
->offset
, "offset value overflow for delta base object");
215 base_offset
= (base_offset
<< 7) + (c
& 127);
217 delta_base
->offset
= obj
->offset
- base_offset
;
218 if (delta_base
->offset
>= obj
->offset
)
219 bad_object(obj
->offset
, "delta base offset is out of bound");
227 bad_object(obj
->offset
, "bad object type %d", obj
->type
);
229 obj
->hdr_size
= consumed_bytes
- obj
->offset
;
231 return unpack_entry_data(obj
->offset
, obj
->size
);
234 static void * get_data_from_pack(struct object_entry
*obj
)
236 unsigned long from
= obj
[0].offset
+ obj
[0].hdr_size
;
237 unsigned long len
= obj
[1].offset
- from
;
238 unsigned pg_offset
= from
% getpagesize();
239 unsigned char *map
, *data
;
243 map
= mmap(NULL
, len
+ pg_offset
, PROT_READ
, MAP_PRIVATE
,
244 mmap_fd
, from
- pg_offset
);
245 if (map
== MAP_FAILED
)
246 die("cannot mmap pack file: %s", strerror(errno
));
247 data
= xmalloc(obj
->size
);
248 memset(&stream
, 0, sizeof(stream
));
249 stream
.next_out
= data
;
250 stream
.avail_out
= obj
->size
;
251 stream
.next_in
= map
+ pg_offset
;
252 stream
.avail_in
= len
;
253 inflateInit(&stream
);
254 while ((st
= inflate(&stream
, Z_FINISH
)) == Z_OK
);
256 if (st
!= Z_STREAM_END
|| stream
.total_out
!= obj
->size
)
257 die("serious inflate inconsistency");
258 munmap(map
, len
+ pg_offset
);
262 static int find_delta(const union delta_base
*base
)
264 int first
= 0, last
= nr_deltas
;
266 while (first
< last
) {
267 int next
= (first
+ last
) / 2;
268 struct delta_entry
*delta
= &deltas
[next
];
271 cmp
= memcmp(base
, &delta
->base
, UNION_BASE_SZ
);
283 static int find_delta_childs(const union delta_base
*base
,
284 int *first_index
, int *last_index
)
286 int first
= find_delta(base
);
288 int end
= nr_deltas
- 1;
292 while (first
> 0 && !memcmp(&deltas
[first
- 1].base
, base
, UNION_BASE_SZ
))
294 while (last
< end
&& !memcmp(&deltas
[last
+ 1].base
, base
, UNION_BASE_SZ
))
296 *first_index
= first
;
301 static void sha1_object(const void *data
, unsigned long size
,
302 enum object_type type
, unsigned char *sha1
)
307 const char *type_str
;
310 case OBJ_COMMIT
: type_str
= commit_type
; break;
311 case OBJ_TREE
: type_str
= tree_type
; break;
312 case OBJ_BLOB
: type_str
= blob_type
; break;
313 case OBJ_TAG
: type_str
= tag_type
; break;
315 die("bad type %d", type
);
318 header_size
= sprintf(header
, "%s %lu", type_str
, size
) + 1;
321 SHA1_Update(&ctx
, header
, header_size
);
322 SHA1_Update(&ctx
, data
, size
);
323 SHA1_Final(sha1
, &ctx
);
326 static void resolve_delta(struct delta_entry
*delta
, void *base_data
,
327 unsigned long base_size
, enum object_type type
)
329 struct object_entry
*obj
= delta
->obj
;
331 unsigned long delta_size
;
333 unsigned long result_size
;
334 union delta_base delta_base
;
337 obj
->real_type
= type
;
338 delta_data
= get_data_from_pack(obj
);
339 delta_size
= obj
->size
;
340 result
= patch_delta(base_data
, base_size
, delta_data
, delta_size
,
344 bad_object(obj
->offset
, "failed to apply delta");
345 sha1_object(result
, result_size
, type
, obj
->sha1
);
347 hashcpy(delta_base
.sha1
, obj
->sha1
);
348 if (!find_delta_childs(&delta_base
, &first
, &last
)) {
349 for (j
= first
; j
<= last
; j
++)
350 if (deltas
[j
].obj
->type
== OBJ_REF_DELTA
)
351 resolve_delta(&deltas
[j
], result
, result_size
, type
);
354 memset(&delta_base
, 0, sizeof(delta_base
));
355 delta_base
.offset
= obj
->offset
;
356 if (!find_delta_childs(&delta_base
, &first
, &last
)) {
357 for (j
= first
; j
<= last
; j
++)
358 if (deltas
[j
].obj
->type
== OBJ_OFS_DELTA
)
359 resolve_delta(&deltas
[j
], result
, result_size
, type
);
365 static int compare_delta_entry(const void *a
, const void *b
)
367 const struct delta_entry
*delta_a
= a
;
368 const struct delta_entry
*delta_b
= b
;
369 return memcmp(&delta_a
->base
, &delta_b
->base
, UNION_BASE_SZ
);
372 /* Parse all objects and return the pack content SHA1 hash */
373 static void parse_pack_objects(unsigned char *sha1
)
376 struct delta_entry
*delta
= deltas
;
382 * - find locations of all objects;
383 * - calculate SHA1 of all non-delta objects;
384 * - remember base SHA1 for all deltas.
386 for (i
= 0; i
< nr_objects
; i
++) {
387 struct object_entry
*obj
= &objects
[i
];
388 data
= unpack_raw_entry(obj
, &delta
->base
);
389 obj
->real_type
= obj
->type
;
390 if (obj
->type
== OBJ_REF_DELTA
|| obj
->type
== OBJ_OFS_DELTA
) {
395 sha1_object(data
, obj
->size
, obj
->type
, obj
->sha1
);
398 objects
[i
].offset
= consumed_bytes
;
400 /* Check pack integrity */
401 SHA1_Update(&input_ctx
, input_buffer
, input_offset
);
402 SHA1_Final(sha1
, &input_ctx
);
403 if (hashcmp(fill(20), sha1
))
404 die("pack is corrupted (SHA1 mismatch)");
407 write_or_die(output_fd
, input_buffer
, input_offset
);
409 /* If input_fd is a file, we should have reached its end now. */
410 if (fstat(input_fd
, &st
))
411 die("cannot fstat packfile: %s", strerror(errno
));
412 if (S_ISREG(st
.st_mode
) && st
.st_size
!= consumed_bytes
)
413 die("pack has junk at the end");
415 /* Sort deltas by base SHA1/offset for fast searching */
416 qsort(deltas
, nr_deltas
, sizeof(struct delta_entry
),
417 compare_delta_entry
);
421 * - for all non-delta objects, look if it is used as a base for
423 * - if used as a base, uncompress the object and apply all deltas,
424 * recursively checking if the resulting object is used as a base
425 * for some more deltas.
427 for (i
= 0; i
< nr_objects
; i
++) {
428 struct object_entry
*obj
= &objects
[i
];
429 union delta_base base
;
430 int j
, ref
, ref_first
, ref_last
, ofs
, ofs_first
, ofs_last
;
432 if (obj
->type
== OBJ_REF_DELTA
|| obj
->type
== OBJ_OFS_DELTA
)
434 hashcpy(base
.sha1
, obj
->sha1
);
435 ref
= !find_delta_childs(&base
, &ref_first
, &ref_last
);
436 memset(&base
, 0, sizeof(base
));
437 base
.offset
= obj
->offset
;
438 ofs
= !find_delta_childs(&base
, &ofs_first
, &ofs_last
);
441 data
= get_data_from_pack(obj
);
443 for (j
= ref_first
; j
<= ref_last
; j
++)
444 if (deltas
[j
].obj
->type
== OBJ_REF_DELTA
)
445 resolve_delta(&deltas
[j
], data
,
446 obj
->size
, obj
->type
);
448 for (j
= ofs_first
; j
<= ofs_last
; j
++)
449 if (deltas
[j
].obj
->type
== OBJ_OFS_DELTA
)
450 resolve_delta(&deltas
[j
], data
,
451 obj
->size
, obj
->type
);
455 /* Check for unresolved deltas */
456 for (i
= 0; i
< nr_deltas
; i
++) {
457 if (deltas
[i
].obj
->real_type
== OBJ_REF_DELTA
||
458 deltas
[i
].obj
->real_type
== OBJ_OFS_DELTA
)
459 die("pack has unresolved deltas");
463 static int sha1_compare(const void *_a
, const void *_b
)
465 struct object_entry
*a
= *(struct object_entry
**)_a
;
466 struct object_entry
*b
= *(struct object_entry
**)_b
;
467 return hashcmp(a
->sha1
, b
->sha1
);
471 * On entry *sha1 contains the pack content SHA1 hash, on exit it is
472 * the SHA1 hash of sorted object names.
474 static const char * write_index_file(const char *index_name
, unsigned char *sha1
)
477 struct object_entry
**sorted_by_sha
, **list
, **last
;
478 unsigned int array
[256];
484 xcalloc(nr_objects
, sizeof(struct object_entry
*));
485 list
= sorted_by_sha
;
486 last
= sorted_by_sha
+ nr_objects
;
487 for (i
= 0; i
< nr_objects
; ++i
)
488 sorted_by_sha
[i
] = &objects
[i
];
489 qsort(sorted_by_sha
, nr_objects
, sizeof(sorted_by_sha
[0]),
494 sorted_by_sha
= list
= last
= NULL
;
497 static char tmpfile
[PATH_MAX
];
498 snprintf(tmpfile
, sizeof(tmpfile
),
499 "%s/index_XXXXXX", get_object_directory());
500 fd
= mkstemp(tmpfile
);
501 index_name
= xstrdup(tmpfile
);
504 fd
= open(index_name
, O_CREAT
|O_EXCL
|O_WRONLY
, 0600);
507 die("unable to create %s: %s", index_name
, strerror(errno
));
508 f
= sha1fd(fd
, index_name
);
511 * Write the first-level table (the list is sorted,
512 * but we use a 256-entry lookup to be able to avoid
513 * having to do eight extra binary search iterations).
515 for (i
= 0; i
< 256; i
++) {
516 struct object_entry
**next
= list
;
517 while (next
< last
) {
518 struct object_entry
*obj
= *next
;
519 if (obj
->sha1
[0] != i
)
523 array
[i
] = htonl(next
- sorted_by_sha
);
526 sha1write(f
, array
, 256 * sizeof(int));
528 /* recompute the SHA1 hash of sorted object names.
529 * currently pack-objects does not do this, but that
534 * Write the actual SHA1 entries..
536 list
= sorted_by_sha
;
537 for (i
= 0; i
< nr_objects
; i
++) {
538 struct object_entry
*obj
= *list
++;
539 unsigned int offset
= htonl(obj
->offset
);
540 sha1write(f
, &offset
, 4);
541 sha1write(f
, obj
->sha1
, 20);
542 SHA1_Update(&ctx
, obj
->sha1
, 20);
544 sha1write(f
, sha1
, 20);
545 sha1close(f
, NULL
, 1);
547 SHA1_Final(sha1
, &ctx
);
551 static void final(const char *final_pack_name
, const char *curr_pack_name
,
552 const char *final_index_name
, const char *curr_index_name
,
561 err
= close(output_fd
);
563 die("error while closing pack file: %s", strerror(errno
));
564 chmod(curr_pack_name
, 0444);
567 if (final_pack_name
!= curr_pack_name
) {
568 if (!final_pack_name
) {
569 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.pack",
570 get_object_directory(), sha1_to_hex(sha1
));
571 final_pack_name
= name
;
573 if (move_temp_to_file(curr_pack_name
, final_pack_name
))
574 die("cannot store pack file");
577 chmod(curr_index_name
, 0444);
578 if (final_index_name
!= curr_index_name
) {
579 if (!final_index_name
) {
580 snprintf(name
, sizeof(name
), "%s/pack/pack-%s.idx",
581 get_object_directory(), sha1_to_hex(sha1
));
582 final_index_name
= name
;
584 if (move_temp_to_file(curr_index_name
, final_index_name
))
585 die("cannot store index file");
589 int main(int argc
, char **argv
)
592 const char *curr_pack
, *pack_name
= NULL
;
593 const char *curr_index
, *index_name
= NULL
;
594 char *index_name_buf
= NULL
;
595 unsigned char sha1
[20];
597 for (i
= 1; i
< argc
; i
++) {
598 const char *arg
= argv
[i
];
601 if (!strcmp(arg
, "--stdin")) {
603 } else if (!strcmp(arg
, "-o")) {
604 if (index_name
|| (i
+1) >= argc
)
605 usage(index_pack_usage
);
606 index_name
= argv
[++i
];
608 usage(index_pack_usage
);
613 usage(index_pack_usage
);
617 if (!pack_name
&& !from_stdin
)
618 usage(index_pack_usage
);
619 if (!index_name
&& pack_name
) {
620 int len
= strlen(pack_name
);
621 if (!has_extension(pack_name
, ".pack"))
622 die("packfile name '%s' does not end with '.pack'",
624 index_name_buf
= xmalloc(len
);
625 memcpy(index_name_buf
, pack_name
, len
- 5);
626 strcpy(index_name_buf
+ len
- 5, ".idx");
627 index_name
= index_name_buf
;
630 curr_pack
= open_pack_file(pack_name
);
632 objects
= xcalloc(nr_objects
+ 1, sizeof(struct object_entry
));
633 deltas
= xcalloc(nr_objects
, sizeof(struct delta_entry
));
634 parse_pack_objects(sha1
);
636 curr_index
= write_index_file(index_name
, sha1
);
637 final(pack_name
, curr_pack
, index_name
, curr_index
, sha1
);
639 free(index_name_buf
);
641 printf("%s\n", sha1_to_hex(sha1
));