]> git.ipfire.org Git - thirdparty/git.git/blob - builtin/unpack-objects.c
cocci: apply the "object-store.h" part of "the_repository.pending"
[thirdparty/git.git] / builtin / unpack-objects.c
1 #include "builtin.h"
2 #include "cache.h"
3 #include "bulk-checkin.h"
4 #include "config.h"
5 #include "object-store.h"
6 #include "object.h"
7 #include "delta.h"
8 #include "pack.h"
9 #include "blob.h"
10 #include "commit.h"
11 #include "tag.h"
12 #include "tree.h"
13 #include "tree-walk.h"
14 #include "progress.h"
15 #include "decorate.h"
16 #include "fsck.h"
17
18 static int dry_run, quiet, recover, has_errors, strict;
19 static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict]";
20
21 /* We always read in 4kB chunks. */
22 static unsigned char buffer[4096];
23 static unsigned int offset, len;
24 static off_t consumed_bytes;
25 static off_t max_input_size;
26 static git_hash_ctx ctx;
27 static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
28 static struct progress *progress;
29
30 /*
31 * When running under --strict mode, objects whose reachability are
32 * suspect are kept in core without getting written in the object
33 * store.
34 */
35 struct obj_buffer {
36 char *buffer;
37 unsigned long size;
38 };
39
40 static struct decoration obj_decorate;
41
42 static struct obj_buffer *lookup_object_buffer(struct object *base)
43 {
44 return lookup_decoration(&obj_decorate, base);
45 }
46
47 static void add_object_buffer(struct object *object, char *buffer, unsigned long size)
48 {
49 struct obj_buffer *obj;
50 CALLOC_ARRAY(obj, 1);
51 obj->buffer = buffer;
52 obj->size = size;
53 if (add_decoration(&obj_decorate, object, obj))
54 die("object %s tried to add buffer twice!", oid_to_hex(&object->oid));
55 }
56
57 /*
58 * Make sure at least "min" bytes are available in the buffer, and
59 * return the pointer to the buffer.
60 */
61 static void *fill(int min)
62 {
63 if (min <= len)
64 return buffer + offset;
65 if (min > sizeof(buffer))
66 die("cannot fill %d bytes", min);
67 if (offset) {
68 the_hash_algo->update_fn(&ctx, buffer, offset);
69 memmove(buffer, buffer + offset, len);
70 offset = 0;
71 }
72 do {
73 ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len);
74 if (ret <= 0) {
75 if (!ret)
76 die("early EOF");
77 die_errno("read error on input");
78 }
79 len += ret;
80 } while (len < min);
81 return buffer;
82 }
83
84 static void use(int bytes)
85 {
86 if (bytes > len)
87 die("used more bytes than were available");
88 len -= bytes;
89 offset += bytes;
90
91 /* make sure off_t is sufficiently large not to wrap */
92 if (signed_add_overflows(consumed_bytes, bytes))
93 die("pack too large for current definition of off_t");
94 consumed_bytes += bytes;
95 if (max_input_size && consumed_bytes > max_input_size)
96 die(_("pack exceeds maximum allowed size"));
97 display_throughput(progress, consumed_bytes);
98 }
99
100 /*
101 * Decompress zstream from the standard input into a newly
102 * allocated buffer of specified size and return the buffer.
103 * The caller is responsible to free the returned buffer.
104 *
105 * But for dry_run mode, "get_data()" is only used to check the
106 * integrity of data, and the returned buffer is not used at all.
107 * Therefore, in dry_run mode, "get_data()" will release the small
108 * allocated buffer which is reused to hold temporary zstream output
109 * and return NULL instead of returning garbage data.
110 */
111 static void *get_data(unsigned long size)
112 {
113 git_zstream stream;
114 unsigned long bufsize = dry_run && size > 8192 ? 8192 : size;
115 void *buf = xmallocz(bufsize);
116
117 memset(&stream, 0, sizeof(stream));
118
119 stream.next_out = buf;
120 stream.avail_out = bufsize;
121 stream.next_in = fill(1);
122 stream.avail_in = len;
123 git_inflate_init(&stream);
124
125 for (;;) {
126 int ret = git_inflate(&stream, 0);
127 use(len - stream.avail_in);
128 if (stream.total_out == size && ret == Z_STREAM_END)
129 break;
130 if (ret != Z_OK) {
131 error("inflate returned %d", ret);
132 FREE_AND_NULL(buf);
133 if (!recover)
134 exit(1);
135 has_errors = 1;
136 break;
137 }
138 stream.next_in = fill(1);
139 stream.avail_in = len;
140 if (dry_run) {
141 /* reuse the buffer in dry_run mode */
142 stream.next_out = buf;
143 stream.avail_out = bufsize > size - stream.total_out ?
144 size - stream.total_out :
145 bufsize;
146 }
147 }
148 git_inflate_end(&stream);
149 if (dry_run)
150 FREE_AND_NULL(buf);
151 return buf;
152 }
153
154 struct delta_info {
155 struct object_id base_oid;
156 unsigned nr;
157 off_t base_offset;
158 unsigned long size;
159 void *delta;
160 struct delta_info *next;
161 };
162
163 static struct delta_info *delta_list;
164
165 static void add_delta_to_list(unsigned nr, const struct object_id *base_oid,
166 off_t base_offset,
167 void *delta, unsigned long size)
168 {
169 struct delta_info *info = xmalloc(sizeof(*info));
170
171 oidcpy(&info->base_oid, base_oid);
172 info->base_offset = base_offset;
173 info->size = size;
174 info->delta = delta;
175 info->nr = nr;
176 info->next = delta_list;
177 delta_list = info;
178 }
179
180 struct obj_info {
181 off_t offset;
182 struct object_id oid;
183 struct object *obj;
184 };
185
186 /* Remember to update object flag allocation in object.h */
187 #define FLAG_OPEN (1u<<20)
188 #define FLAG_WRITTEN (1u<<21)
189
190 static struct obj_info *obj_list;
191 static unsigned nr_objects;
192
193 /*
194 * Called only from check_object() after it verified this object
195 * is Ok.
196 */
197 static void write_cached_object(struct object *obj, struct obj_buffer *obj_buf)
198 {
199 struct object_id oid;
200
201 if (write_object_file(obj_buf->buffer, obj_buf->size,
202 obj->type, &oid) < 0)
203 die("failed to write object %s", oid_to_hex(&obj->oid));
204 obj->flags |= FLAG_WRITTEN;
205 }
206
207 /*
208 * At the very end of the processing, write_rest() scans the objects
209 * that have reachability requirements and calls this function.
210 * Verify its reachability and validity recursively and write it out.
211 */
212 static int check_object(struct object *obj, enum object_type type,
213 void *data, struct fsck_options *options)
214 {
215 struct obj_buffer *obj_buf;
216
217 if (!obj)
218 return 1;
219
220 if (obj->flags & FLAG_WRITTEN)
221 return 0;
222
223 if (type != OBJ_ANY && obj->type != type)
224 die("object type mismatch");
225
226 if (!(obj->flags & FLAG_OPEN)) {
227 unsigned long size;
228 int type = oid_object_info(the_repository, &obj->oid, &size);
229 if (type != obj->type || type <= 0)
230 die("object of unexpected type");
231 obj->flags |= FLAG_WRITTEN;
232 return 0;
233 }
234
235 obj_buf = lookup_object_buffer(obj);
236 if (!obj_buf)
237 die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid));
238 if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options))
239 die("fsck error in packed object");
240 fsck_options.walk = check_object;
241 if (fsck_walk(obj, NULL, &fsck_options))
242 die("Error on reachable objects of %s", oid_to_hex(&obj->oid));
243 write_cached_object(obj, obj_buf);
244 return 0;
245 }
246
247 static void write_rest(void)
248 {
249 unsigned i;
250 for (i = 0; i < nr_objects; i++) {
251 if (obj_list[i].obj)
252 check_object(obj_list[i].obj, OBJ_ANY, NULL, NULL);
253 }
254 }
255
256 static void added_object(unsigned nr, enum object_type type,
257 void *data, unsigned long size);
258
259 /*
260 * Write out nr-th object from the list, now we know the contents
261 * of it. Under --strict, this buffers structured objects in-core,
262 * to be checked at the end.
263 */
264 static void write_object(unsigned nr, enum object_type type,
265 void *buf, unsigned long size)
266 {
267 if (!strict) {
268 if (write_object_file(buf, size, type,
269 &obj_list[nr].oid) < 0)
270 die("failed to write object");
271 added_object(nr, type, buf, size);
272 free(buf);
273 obj_list[nr].obj = NULL;
274 } else if (type == OBJ_BLOB) {
275 struct blob *blob;
276 if (write_object_file(buf, size, type,
277 &obj_list[nr].oid) < 0)
278 die("failed to write object");
279 added_object(nr, type, buf, size);
280 free(buf);
281
282 blob = lookup_blob(the_repository, &obj_list[nr].oid);
283 if (blob)
284 blob->object.flags |= FLAG_WRITTEN;
285 else
286 die("invalid blob object");
287 obj_list[nr].obj = NULL;
288 } else {
289 struct object *obj;
290 int eaten;
291 hash_object_file(the_hash_algo, buf, size, type,
292 &obj_list[nr].oid);
293 added_object(nr, type, buf, size);
294 obj = parse_object_buffer(the_repository, &obj_list[nr].oid,
295 type, size, buf,
296 &eaten);
297 if (!obj)
298 die("invalid %s", type_name(type));
299 add_object_buffer(obj, buf, size);
300 obj->flags |= FLAG_OPEN;
301 obj_list[nr].obj = obj;
302 }
303 }
304
305 static void resolve_delta(unsigned nr, enum object_type type,
306 void *base, unsigned long base_size,
307 void *delta, unsigned long delta_size)
308 {
309 void *result;
310 unsigned long result_size;
311
312 result = patch_delta(base, base_size,
313 delta, delta_size,
314 &result_size);
315 if (!result)
316 die("failed to apply delta");
317 free(delta);
318 write_object(nr, type, result, result_size);
319 }
320
321 /*
322 * We now know the contents of an object (which is nr-th in the pack);
323 * resolve all the deltified objects that are based on it.
324 */
325 static void added_object(unsigned nr, enum object_type type,
326 void *data, unsigned long size)
327 {
328 struct delta_info **p = &delta_list;
329 struct delta_info *info;
330
331 while ((info = *p) != NULL) {
332 if (oideq(&info->base_oid, &obj_list[nr].oid) ||
333 info->base_offset == obj_list[nr].offset) {
334 *p = info->next;
335 p = &delta_list;
336 resolve_delta(info->nr, type, data, size,
337 info->delta, info->size);
338 free(info);
339 continue;
340 }
341 p = &info->next;
342 }
343 }
344
345 static void unpack_non_delta_entry(enum object_type type, unsigned long size,
346 unsigned nr)
347 {
348 void *buf = get_data(size);
349
350 if (buf)
351 write_object(nr, type, buf, size);
352 }
353
354 struct input_zstream_data {
355 git_zstream *zstream;
356 unsigned char buf[8192];
357 int status;
358 };
359
360 static const void *feed_input_zstream(struct input_stream *in_stream,
361 unsigned long *readlen)
362 {
363 struct input_zstream_data *data = in_stream->data;
364 git_zstream *zstream = data->zstream;
365 void *in = fill(1);
366
367 if (in_stream->is_finished) {
368 *readlen = 0;
369 return NULL;
370 }
371
372 zstream->next_out = data->buf;
373 zstream->avail_out = sizeof(data->buf);
374 zstream->next_in = in;
375 zstream->avail_in = len;
376
377 data->status = git_inflate(zstream, 0);
378
379 in_stream->is_finished = data->status != Z_OK;
380 use(len - zstream->avail_in);
381 *readlen = sizeof(data->buf) - zstream->avail_out;
382
383 return data->buf;
384 }
385
386 static void stream_blob(unsigned long size, unsigned nr)
387 {
388 git_zstream zstream = { 0 };
389 struct input_zstream_data data = { 0 };
390 struct input_stream in_stream = {
391 .read = feed_input_zstream,
392 .data = &data,
393 };
394 struct obj_info *info = &obj_list[nr];
395
396 data.zstream = &zstream;
397 git_inflate_init(&zstream);
398
399 if (stream_loose_object(&in_stream, size, &info->oid))
400 die(_("failed to write object in stream"));
401
402 if (data.status != Z_STREAM_END)
403 die(_("inflate returned (%d)"), data.status);
404 git_inflate_end(&zstream);
405
406 if (strict) {
407 struct blob *blob = lookup_blob(the_repository, &info->oid);
408
409 if (!blob)
410 die(_("invalid blob object from stream"));
411 blob->object.flags |= FLAG_WRITTEN;
412 }
413 info->obj = NULL;
414 }
415
416 static int resolve_against_held(unsigned nr, const struct object_id *base,
417 void *delta_data, unsigned long delta_size)
418 {
419 struct object *obj;
420 struct obj_buffer *obj_buffer;
421 obj = lookup_object(the_repository, base);
422 if (!obj)
423 return 0;
424 obj_buffer = lookup_object_buffer(obj);
425 if (!obj_buffer)
426 return 0;
427 resolve_delta(nr, obj->type, obj_buffer->buffer,
428 obj_buffer->size, delta_data, delta_size);
429 return 1;
430 }
431
432 static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
433 unsigned nr)
434 {
435 void *delta_data, *base;
436 unsigned long base_size;
437 struct object_id base_oid;
438
439 if (type == OBJ_REF_DELTA) {
440 oidread(&base_oid, fill(the_hash_algo->rawsz));
441 use(the_hash_algo->rawsz);
442 delta_data = get_data(delta_size);
443 if (!delta_data)
444 return;
445 if (repo_has_object_file(the_repository, &base_oid))
446 ; /* Ok we have this one */
447 else if (resolve_against_held(nr, &base_oid,
448 delta_data, delta_size))
449 return; /* we are done */
450 else {
451 /* cannot resolve yet --- queue it */
452 oidclr(&obj_list[nr].oid);
453 add_delta_to_list(nr, &base_oid, 0, delta_data, delta_size);
454 return;
455 }
456 } else {
457 unsigned base_found = 0;
458 unsigned char *pack, c;
459 off_t base_offset;
460 unsigned lo, mid, hi;
461
462 pack = fill(1);
463 c = *pack;
464 use(1);
465 base_offset = c & 127;
466 while (c & 128) {
467 base_offset += 1;
468 if (!base_offset || MSB(base_offset, 7))
469 die("offset value overflow for delta base object");
470 pack = fill(1);
471 c = *pack;
472 use(1);
473 base_offset = (base_offset << 7) + (c & 127);
474 }
475 base_offset = obj_list[nr].offset - base_offset;
476 if (base_offset <= 0 || base_offset >= obj_list[nr].offset)
477 die("offset value out of bound for delta base object");
478
479 delta_data = get_data(delta_size);
480 if (!delta_data)
481 return;
482 lo = 0;
483 hi = nr;
484 while (lo < hi) {
485 mid = lo + (hi - lo) / 2;
486 if (base_offset < obj_list[mid].offset) {
487 hi = mid;
488 } else if (base_offset > obj_list[mid].offset) {
489 lo = mid + 1;
490 } else {
491 oidcpy(&base_oid, &obj_list[mid].oid);
492 base_found = !is_null_oid(&base_oid);
493 break;
494 }
495 }
496 if (!base_found) {
497 /*
498 * The delta base object is itself a delta that
499 * has not been resolved yet.
500 */
501 oidclr(&obj_list[nr].oid);
502 add_delta_to_list(nr, null_oid(), base_offset,
503 delta_data, delta_size);
504 return;
505 }
506 }
507
508 if (resolve_against_held(nr, &base_oid, delta_data, delta_size))
509 return;
510
511 base = repo_read_object_file(the_repository, &base_oid, &type,
512 &base_size);
513 if (!base) {
514 error("failed to read delta-pack base object %s",
515 oid_to_hex(&base_oid));
516 if (!recover)
517 exit(1);
518 has_errors = 1;
519 return;
520 }
521 resolve_delta(nr, type, base, base_size, delta_data, delta_size);
522 free(base);
523 }
524
525 static void unpack_one(unsigned nr)
526 {
527 unsigned shift;
528 unsigned char *pack;
529 unsigned long size, c;
530 enum object_type type;
531
532 obj_list[nr].offset = consumed_bytes;
533
534 pack = fill(1);
535 c = *pack;
536 use(1);
537 type = (c >> 4) & 7;
538 size = (c & 15);
539 shift = 4;
540 while (c & 0x80) {
541 pack = fill(1);
542 c = *pack;
543 use(1);
544 size += (c & 0x7f) << shift;
545 shift += 7;
546 }
547
548 switch (type) {
549 case OBJ_BLOB:
550 if (!dry_run && size > big_file_threshold) {
551 stream_blob(size, nr);
552 return;
553 }
554 /* fallthrough */
555 case OBJ_COMMIT:
556 case OBJ_TREE:
557 case OBJ_TAG:
558 unpack_non_delta_entry(type, size, nr);
559 return;
560 case OBJ_REF_DELTA:
561 case OBJ_OFS_DELTA:
562 unpack_delta_entry(type, size, nr);
563 return;
564 default:
565 error("bad object type %d", type);
566 has_errors = 1;
567 if (recover)
568 return;
569 exit(1);
570 }
571 }
572
573 static void unpack_all(void)
574 {
575 int i;
576 struct pack_header *hdr = fill(sizeof(struct pack_header));
577
578 nr_objects = ntohl(hdr->hdr_entries);
579
580 if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
581 die("bad pack file");
582 if (!pack_version_ok(hdr->hdr_version))
583 die("unknown pack file version %"PRIu32,
584 ntohl(hdr->hdr_version));
585 use(sizeof(struct pack_header));
586
587 if (!quiet)
588 progress = start_progress(_("Unpacking objects"), nr_objects);
589 CALLOC_ARRAY(obj_list, nr_objects);
590 begin_odb_transaction();
591 for (i = 0; i < nr_objects; i++) {
592 unpack_one(i);
593 display_progress(progress, i + 1);
594 }
595 end_odb_transaction();
596 stop_progress(&progress);
597
598 if (delta_list)
599 die("unresolved deltas left after unpacking");
600 }
601
602 int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
603 {
604 int i;
605 struct object_id oid;
606
607 read_replace_refs = 0;
608
609 git_config(git_default_config, NULL);
610
611 quiet = !isatty(2);
612
613 for (i = 1 ; i < argc; i++) {
614 const char *arg = argv[i];
615
616 if (*arg == '-') {
617 if (!strcmp(arg, "-n")) {
618 dry_run = 1;
619 continue;
620 }
621 if (!strcmp(arg, "-q")) {
622 quiet = 1;
623 continue;
624 }
625 if (!strcmp(arg, "-r")) {
626 recover = 1;
627 continue;
628 }
629 if (!strcmp(arg, "--strict")) {
630 strict = 1;
631 continue;
632 }
633 if (skip_prefix(arg, "--strict=", &arg)) {
634 strict = 1;
635 fsck_set_msg_types(&fsck_options, arg);
636 continue;
637 }
638 if (starts_with(arg, "--pack_header=")) {
639 struct pack_header *hdr;
640 char *c;
641
642 hdr = (struct pack_header *)buffer;
643 hdr->hdr_signature = htonl(PACK_SIGNATURE);
644 hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10));
645 if (*c != ',')
646 die("bad %s", arg);
647 hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10));
648 if (*c)
649 die("bad %s", arg);
650 len = sizeof(*hdr);
651 continue;
652 }
653 if (skip_prefix(arg, "--max-input-size=", &arg)) {
654 max_input_size = strtoumax(arg, NULL, 10);
655 continue;
656 }
657 usage(unpack_usage);
658 }
659
660 /* We don't take any non-flag arguments now.. Maybe some day */
661 usage(unpack_usage);
662 }
663 the_hash_algo->init_fn(&ctx);
664 unpack_all();
665 the_hash_algo->update_fn(&ctx, buffer, offset);
666 the_hash_algo->final_oid_fn(&oid, &ctx);
667 if (strict) {
668 write_rest();
669 if (fsck_finish(&fsck_options))
670 die(_("fsck error in pack objects"));
671 }
672 if (!hasheq(fill(the_hash_algo->rawsz), oid.hash))
673 die("final sha1 did not match");
674 use(the_hash_algo->rawsz);
675
676 /* Write the last part of the buffer to stdout */
677 while (len) {
678 int ret = xwrite(1, buffer + offset, len);
679 if (ret <= 0)
680 break;
681 len -= ret;
682 offset += ret;
683 }
684
685 /* All done */
686 return has_errors;
687 }