]> git.ipfire.org Git - thirdparty/git.git/blob - builtin/unpack-objects.c
e905d5f4e1964b43e2262500053f3e53753eaeab
[thirdparty/git.git] / builtin / unpack-objects.c
1 #define USE_THE_REPOSITORY_VARIABLE
2 #define DISABLE_SIGN_COMPARE_WARNINGS
3
4 #include "builtin.h"
5 #include "bulk-checkin.h"
6 #include "config.h"
7 #include "environment.h"
8 #include "gettext.h"
9 #include "git-zlib.h"
10 #include "hex.h"
11 #include "object-file.h"
12 #include "object-store.h"
13 #include "object.h"
14 #include "delta.h"
15 #include "pack.h"
16 #include "blob.h"
17 #include "replace-object.h"
18 #include "strbuf.h"
19 #include "progress.h"
20 #include "decorate.h"
21 #include "fsck.h"
22 #include "packfile.h"
23
24 static int dry_run, quiet, recover, has_errors, strict;
25 static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict]";
26
27 /* We always read in 4kB chunks. */
28 static unsigned char buffer[4096];
29 static unsigned int offset, len;
30 static off_t consumed_bytes;
31 static off_t max_input_size;
32 static struct git_hash_ctx ctx;
33 static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
34 static struct progress *progress;
35
36 /*
37 * When running under --strict mode, objects whose reachability are
38 * suspect are kept in core without getting written in the object
39 * store.
40 */
41 struct obj_buffer {
42 char *buffer;
43 unsigned long size;
44 };
45
46 static struct decoration obj_decorate;
47
48 static struct obj_buffer *lookup_object_buffer(struct object *base)
49 {
50 return lookup_decoration(&obj_decorate, base);
51 }
52
53 static void add_object_buffer(struct object *object, char *buffer, unsigned long size)
54 {
55 struct obj_buffer *obj;
56 CALLOC_ARRAY(obj, 1);
57 obj->buffer = buffer;
58 obj->size = size;
59 if (add_decoration(&obj_decorate, object, obj))
60 die("object %s tried to add buffer twice!", oid_to_hex(&object->oid));
61 }
62
63 /*
64 * Make sure at least "min" bytes are available in the buffer, and
65 * return the pointer to the buffer.
66 */
67 static void *fill(int min)
68 {
69 if (min <= len)
70 return buffer + offset;
71 if (min > sizeof(buffer))
72 die("cannot fill %d bytes", min);
73 if (offset) {
74 git_hash_update(&ctx, buffer, offset);
75 memmove(buffer, buffer + offset, len);
76 offset = 0;
77 }
78 do {
79 ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len);
80 if (ret <= 0) {
81 if (!ret)
82 die("early EOF");
83 die_errno("read error on input");
84 }
85 len += ret;
86 } while (len < min);
87 return buffer;
88 }
89
90 static void use(int bytes)
91 {
92 if (bytes > len)
93 die("used more bytes than were available");
94 len -= bytes;
95 offset += bytes;
96
97 /* make sure off_t is sufficiently large not to wrap */
98 if (signed_add_overflows(consumed_bytes, bytes))
99 die("pack too large for current definition of off_t");
100 consumed_bytes += bytes;
101 if (max_input_size && consumed_bytes > max_input_size)
102 die(_("pack exceeds maximum allowed size"));
103 display_throughput(progress, consumed_bytes);
104 }
105
106 /*
107 * Decompress zstream from the standard input into a newly
108 * allocated buffer of specified size and return the buffer.
109 * The caller is responsible to free the returned buffer.
110 *
111 * But for dry_run mode, "get_data()" is only used to check the
112 * integrity of data, and the returned buffer is not used at all.
113 * Therefore, in dry_run mode, "get_data()" will release the small
114 * allocated buffer which is reused to hold temporary zstream output
115 * and return NULL instead of returning garbage data.
116 */
117 static void *get_data(unsigned long size)
118 {
119 git_zstream stream;
120 unsigned long bufsize = dry_run && size > 8192 ? 8192 : size;
121 void *buf = xmallocz(bufsize);
122
123 memset(&stream, 0, sizeof(stream));
124
125 stream.next_out = buf;
126 stream.avail_out = bufsize;
127 stream.next_in = fill(1);
128 stream.avail_in = len;
129 git_inflate_init(&stream);
130
131 for (;;) {
132 int ret = git_inflate(&stream, 0);
133 use(len - stream.avail_in);
134 if (stream.total_out == size && ret == Z_STREAM_END)
135 break;
136 if (ret != Z_OK) {
137 error("inflate returned %d", ret);
138 FREE_AND_NULL(buf);
139 if (!recover)
140 exit(1);
141 has_errors = 1;
142 break;
143 }
144 stream.next_in = fill(1);
145 stream.avail_in = len;
146 if (dry_run) {
147 /* reuse the buffer in dry_run mode */
148 stream.next_out = buf;
149 stream.avail_out = bufsize > size - stream.total_out ?
150 size - stream.total_out :
151 bufsize;
152 }
153 }
154 git_inflate_end(&stream);
155 if (dry_run)
156 FREE_AND_NULL(buf);
157 return buf;
158 }
159
160 struct delta_info {
161 struct object_id base_oid;
162 unsigned nr;
163 off_t base_offset;
164 unsigned long size;
165 void *delta;
166 struct delta_info *next;
167 };
168
169 static struct delta_info *delta_list;
170
171 static void add_delta_to_list(unsigned nr, const struct object_id *base_oid,
172 off_t base_offset,
173 void *delta, unsigned long size)
174 {
175 struct delta_info *info = xmalloc(sizeof(*info));
176
177 oidcpy(&info->base_oid, base_oid);
178 info->base_offset = base_offset;
179 info->size = size;
180 info->delta = delta;
181 info->nr = nr;
182 info->next = delta_list;
183 delta_list = info;
184 }
185
186 struct obj_info {
187 off_t offset;
188 struct object_id oid;
189 struct object *obj;
190 };
191
192 /* Remember to update object flag allocation in object.h */
193 #define FLAG_OPEN (1u<<20)
194 #define FLAG_WRITTEN (1u<<21)
195
196 static struct obj_info *obj_list;
197 static unsigned nr_objects;
198
199 /*
200 * Called only from check_object() after it verified this object
201 * is Ok.
202 */
203 static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf)
204 {
205 struct object_id oid;
206
207 if (write_object_file(obj_buf->buffer, obj_buf->size,
208 obj->type, &oid) < 0)
209 die("failed to write object %s", oid_to_hex(&obj->oid));
210 obj->flags |= FLAG_WRITTEN;
211 }
212
213 /*
214 * At the very end of the processing, write_rest() scans the objects
215 * that have reachability requirements and calls this function.
216 * Verify its reachability and validity recursively and write it out.
217 */
218 static int check_object(struct object *obj, enum object_type type,
219 void *data UNUSED,
220 struct fsck_options *options UNUSED)
221 {
222 struct obj_buffer *obj_buf;
223
224 if (!obj)
225 return 1;
226
227 if (obj->flags & FLAG_WRITTEN)
228 return 0;
229
230 if (type != OBJ_ANY && obj->type != type)
231 die("object type mismatch");
232
233 if (!(obj->flags & FLAG_OPEN)) {
234 unsigned long size;
235 int type = oid_object_info(the_repository, &obj->oid, &size);
236 if (type != obj->type || type <= 0)
237 die("object of unexpected type");
238 obj->flags |= FLAG_WRITTEN;
239 return 0;
240 }
241
242 obj_buf = lookup_object_buffer(obj);
243 if (!obj_buf)
244 die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
245 if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
246 die("fsck error in packed object");
247 fsck_options.walk = check_object;
248 if (fsck_walk(obj, NULL, &fsck_options))
249 die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
250 write_cached_object(obj, obj_buf);
251 return 0;
252 }
253
254 static void write_rest(void)
255 {
256 unsigned i;
257 for (i = 0; i < nr_objects; i++) {
258 if (obj_list[i].obj)
259 check_object(obj_list[i].obj, OBJ_ANY, NULL, NULL);
260 }
261 }
262
263 static void added_object(unsigned nr, enum object_type type,
264 void *data, unsigned long size);
265
266 /*
267 * Write out nr-th object from the list, now we know the contents
268 * of it. Under --strict, this buffers structured objects in-core,
269 * to be checked at the end.
270 */
271 static void write_object(unsigned nr, enum object_type type,
272 void *buf, unsigned long size)
273 {
274 if (!strict) {
275 if (write_object_file(buf, size, type,
276 &obj_list[nr].oid) < 0)
277 die("failed to write object");
278 added_object(nr, type, buf, size);
279 free(buf);
280 obj_list[nr].obj = NULL;
281 } else if (type == OBJ_BLOB) {
282 struct blob *blob;
283 if (write_object_file(buf, size, type,
284 &obj_list[nr].oid) < 0)
285 die("failed to write object");
286 added_object(nr, type, buf, size);
287 free(buf);
288
289 blob = lookup_blob(the_repository, &obj_list[nr].oid);
290 if (blob)
291 blob->object.flags |= FLAG_WRITTEN;
292 else
293 die("invalid blob object");
294 obj_list[nr].obj = NULL;
295 } else {
296 struct object *obj;
297 int eaten;
298 hash_object_file(the_hash_algo, buf, size, type,
299 &obj_list[nr].oid);
300 added_object(nr, type, buf, size);
301 obj = parse_object_buffer(the_repository, &obj_list[nr].oid,
302 type, size, buf,
303 &eaten);
304 if (!obj)
305 die("invalid %s", type_name(type));
306 add_object_buffer(obj, buf, size);
307 obj->flags |= FLAG_OPEN;
308 obj_list[nr].obj = obj;
309 }
310 }
311
312 static void resolve_delta(unsigned nr, enum object_type type,
313 void *base, unsigned long base_size,
314 void *delta, unsigned long delta_size)
315 {
316 void *result;
317 unsigned long result_size;
318
319 result = patch_delta(base, base_size,
320 delta, delta_size,
321 &result_size);
322 if (!result)
323 die("failed to apply delta");
324 free(delta);
325 write_object(nr, type, result, result_size);
326 }
327
328 /*
329 * We now know the contents of an object (which is nr-th in the pack);
330 * resolve all the deltified objects that are based on it.
331 */
332 static void added_object(unsigned nr, enum object_type type,
333 void *data, unsigned long size)
334 {
335 struct delta_info **p = &delta_list;
336 struct delta_info *info;
337
338 while ((info = *p) != NULL) {
339 if (oideq(&info->base_oid, &obj_list[nr].oid) ||
340 info->base_offset == obj_list[nr].offset) {
341 *p = info->next;
342 p = &delta_list;
343 resolve_delta(info->nr, type, data, size,
344 info->delta, info->size);
345 free(info);
346 continue;
347 }
348 p = &info->next;
349 }
350 }
351
352 static void unpack_non_delta_entry(enum object_type type, unsigned long size,
353 unsigned nr)
354 {
355 void *buf = get_data(size);
356
357 if (buf)
358 write_object(nr, type, buf, size);
359 }
360
361 struct input_zstream_data {
362 git_zstream *zstream;
363 unsigned char buf[8192];
364 int status;
365 };
366
367 static const void *feed_input_zstream(struct input_stream *in_stream,
368 unsigned long *readlen)
369 {
370 struct input_zstream_data *data = in_stream->data;
371 git_zstream *zstream = data->zstream;
372 void *in = fill(1);
373
374 if (in_stream->is_finished) {
375 *readlen = 0;
376 return NULL;
377 }
378
379 zstream->next_out = data->buf;
380 zstream->avail_out = sizeof(data->buf);
381 zstream->next_in = in;
382 zstream->avail_in = len;
383
384 data->status = git_inflate(zstream, 0);
385
386 in_stream->is_finished = data->status != Z_OK;
387 use(len - zstream->avail_in);
388 *readlen = sizeof(data->buf) - zstream->avail_out;
389
390 return data->buf;
391 }
392
393 static void stream_blob(unsigned long size, unsigned nr)
394 {
395 git_zstream zstream = { 0 };
396 struct input_zstream_data data = { 0 };
397 struct input_stream in_stream = {
398 .read = feed_input_zstream,
399 .data = &data,
400 };
401 struct obj_info *info = &obj_list[nr];
402
403 data.zstream = &zstream;
404 git_inflate_init(&zstream);
405
406 if (stream_loose_object(&in_stream, size, &info->oid))
407 die(_("failed to write object in stream"));
408
409 if (data.status != Z_STREAM_END)
410 die(_("inflate returned (%d)"), data.status);
411 git_inflate_end(&zstream);
412
413 if (strict) {
414 struct blob *blob = lookup_blob(the_repository, &info->oid);
415
416 if (!blob)
417 die(_("invalid blob object from stream"));
418 blob->object.flags |= FLAG_WRITTEN;
419 }
420 info->obj = NULL;
421 }
422
423 static int resolve_against_held(unsigned nr, const struct object_id *base,
424 void *delta_data, unsigned long delta_size)
425 {
426 struct object *obj;
427 struct obj_buffer *obj_buffer;
428 obj = lookup_object(the_repository, base);
429 if (!obj)
430 return 0;
431 obj_buffer = lookup_object_buffer(obj);
432 if (!obj_buffer)
433 return 0;
434 resolve_delta(nr, obj->type, obj_buffer->buffer,
435 obj_buffer->size, delta_data, delta_size);
436 return 1;
437 }
438
439 static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
440 unsigned nr)
441 {
442 void *delta_data, *base;
443 unsigned long base_size;
444 struct object_id base_oid;
445
446 if (type == OBJ_REF_DELTA) {
447 oidread(&base_oid, fill(the_hash_algo->rawsz), the_repository->hash_algo);
448 use(the_hash_algo->rawsz);
449 delta_data = get_data(delta_size);
450 if (!delta_data)
451 return;
452 if (has_object(the_repository, &base_oid,
453 HAS_OBJECT_RECHECK_PACKED | HAS_OBJECT_FETCH_PROMISOR))
454 ; /* Ok we have this one */
455 else if (resolve_against_held(nr, &base_oid,
456 delta_data, delta_size))
457 return; /* we are done */
458 else {
459 /* cannot resolve yet --- queue it */
460 oidclr(&obj_list[nr].oid, the_repository->hash_algo);
461 add_delta_to_list(nr, &base_oid, 0, delta_data, delta_size);
462 return;
463 }
464 } else {
465 unsigned base_found = 0;
466 unsigned char *pack, c;
467 off_t base_offset;
468 unsigned lo, mid, hi;
469
470 pack = fill(1);
471 c = *pack;
472 use(1);
473 base_offset = c & 127;
474 while (c & 128) {
475 base_offset += 1;
476 if (!base_offset || MSB(base_offset, 7))
477 die("offset value overflow for delta base object");
478 pack = fill(1);
479 c = *pack;
480 use(1);
481 base_offset = (base_offset << 7) + (c & 127);
482 }
483 base_offset = obj_list[nr].offset - base_offset;
484 if (base_offset <= 0 || base_offset >= obj_list[nr].offset)
485 die("offset value out of bound for delta base object");
486
487 delta_data = get_data(delta_size);
488 if (!delta_data)
489 return;
490 lo = 0;
491 hi = nr;
492 while (lo < hi) {
493 mid = lo + (hi - lo) / 2;
494 if (base_offset < obj_list[mid].offset) {
495 hi = mid;
496 } else if (base_offset > obj_list[mid].offset) {
497 lo = mid + 1;
498 } else {
499 oidcpy(&base_oid, &obj_list[mid].oid);
500 base_found = !is_null_oid(&base_oid);
501 break;
502 }
503 }
504 if (!base_found) {
505 /*
506 * The delta base object is itself a delta that
507 * has not been resolved yet.
508 */
509 oidclr(&obj_list[nr].oid, the_repository->hash_algo);
510 add_delta_to_list(nr, null_oid(the_hash_algo), base_offset,
511 delta_data, delta_size);
512 return;
513 }
514 }
515
516 if (resolve_against_held(nr, &base_oid, delta_data, delta_size))
517 return;
518
519 base = repo_read_object_file(the_repository, &base_oid, &type,
520 &base_size);
521 if (!base) {
522 error("failed to read delta-pack base object %s",
523 oid_to_hex(&base_oid));
524 if (!recover)
525 exit(1);
526 has_errors = 1;
527 return;
528 }
529 resolve_delta(nr, type, base, base_size, delta_data, delta_size);
530 free(base);
531 }
532
533 static void unpack_one(unsigned nr)
534 {
535 unsigned shift;
536 unsigned char *pack;
537 unsigned long size, c;
538 enum object_type type;
539
540 obj_list[nr].offset = consumed_bytes;
541
542 pack = fill(1);
543 c = *pack;
544 use(1);
545 type = (c >> 4) & 7;
546 size = (c & 15);
547 shift = 4;
548 while (c & 0x80) {
549 pack = fill(1);
550 c = *pack;
551 use(1);
552 size += (c & 0x7f) << shift;
553 shift += 7;
554 }
555
556 switch (type) {
557 case OBJ_BLOB:
558 if (!dry_run &&
559 size > repo_settings_get_big_file_threshold(the_repository)) {
560 stream_blob(size, nr);
561 return;
562 }
563 /* fallthrough */
564 case OBJ_COMMIT:
565 case OBJ_TREE:
566 case OBJ_TAG:
567 unpack_non_delta_entry(type, size, nr);
568 return;
569 case OBJ_REF_DELTA:
570 case OBJ_OFS_DELTA:
571 unpack_delta_entry(type, size, nr);
572 return;
573 default:
574 error("bad object type %d", type);
575 has_errors = 1;
576 if (recover)
577 return;
578 exit(1);
579 }
580 }
581
582 static void unpack_all(void)
583 {
584 int i;
585 unsigned char *hdr = fill(sizeof(struct pack_header));
586
587 if (get_be32(hdr) != PACK_SIGNATURE)
588 die("bad pack file");
589 hdr += 4;
590 if (!pack_version_ok_native(get_be32(hdr)))
591 die("unknown pack file version %"PRIu32,
592 get_be32(hdr));
593 hdr += 4;
594 nr_objects = get_be32(hdr);
595 use(sizeof(struct pack_header));
596
597 if (!quiet)
598 progress = start_progress(the_repository,
599 _("Unpacking objects"), nr_objects);
600 CALLOC_ARRAY(obj_list, nr_objects);
601 begin_odb_transaction();
602 for (i = 0; i < nr_objects; i++) {
603 unpack_one(i);
604 display_progress(progress, i + 1);
605 }
606 end_odb_transaction();
607 stop_progress(&progress);
608
609 if (delta_list)
610 die("unresolved deltas left after unpacking");
611 }
612
613 int cmd_unpack_objects(int argc,
614 const char **argv,
615 const char *prefix UNUSED,
616 struct repository *repo UNUSED)
617 {
618 int i;
619 struct object_id oid;
620 struct git_hash_ctx tmp_ctx;
621
622 disable_replace_refs();
623
624 git_config(git_default_config, NULL);
625
626 quiet = !isatty(2);
627
628 show_usage_if_asked(argc, argv, unpack_usage);
629
630 for (i = 1 ; i < argc; i++) {
631 const char *arg = argv[i];
632
633 if (*arg == '-') {
634 if (!strcmp(arg, "-n")) {
635 dry_run = 1;
636 continue;
637 }
638 if (!strcmp(arg, "-q")) {
639 quiet = 1;
640 continue;
641 }
642 if (!strcmp(arg, "-r")) {
643 recover = 1;
644 continue;
645 }
646 if (!strcmp(arg, "--strict")) {
647 strict = 1;
648 continue;
649 }
650 if (skip_prefix(arg, "--strict=", &arg)) {
651 strict = 1;
652 fsck_set_msg_types(&fsck_options, arg);
653 continue;
654 }
655 if (skip_prefix(arg, "--pack_header=", &arg)) {
656 if (parse_pack_header_option(arg,
657 buffer, &len) < 0)
658 die(_("bad --pack_header: %s"), arg);
659 continue;
660 }
661 if (skip_prefix(arg, "--max-input-size=", &arg)) {
662 max_input_size = strtoumax(arg, NULL, 10);
663 continue;
664 }
665 usage(unpack_usage);
666 }
667
668 /* We don't take any non-flag arguments now.. Maybe some day */
669 usage(unpack_usage);
670 }
671 the_hash_algo->init_fn(&ctx);
672 unpack_all();
673 git_hash_update(&ctx, buffer, offset);
674 the_hash_algo->init_fn(&tmp_ctx);
675 git_hash_clone(&tmp_ctx, &ctx);
676 git_hash_final_oid(&oid, &tmp_ctx);
677 if (strict) {
678 write_rest();
679 if (fsck_finish(&fsck_options))
680 die(_("fsck error in pack objects"));
681 }
682 if (!hasheq(fill(the_hash_algo->rawsz), oid.hash,
683 the_repository->hash_algo))
684 die("final sha1 did not match");
685 use(the_hash_algo->rawsz);
686
687 /* Write the last part of the buffer to stdout */
688 write_in_full(1, buffer + offset, len);
689
690 /* All done */
691 return has_errors;
692 }