]> git.ipfire.org Git - thirdparty/git.git/blob - reftable/reader.c
submodule-config.c: strengthen URL fsck check
[thirdparty/git.git] / reftable / reader.c
1 /*
2 Copyright 2020 Google LLC
3
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
8
9 #include "reader.h"
10
11 #include "system.h"
12 #include "block.h"
13 #include "constants.h"
14 #include "generic.h"
15 #include "iter.h"
16 #include "record.h"
17 #include "reftable-error.h"
18 #include "reftable-generic.h"
19 #include "tree.h"
20
21 uint64_t block_source_size(struct reftable_block_source *source)
22 {
23 return source->ops->size(source->arg);
24 }
25
26 int block_source_read_block(struct reftable_block_source *source,
27 struct reftable_block *dest, uint64_t off,
28 uint32_t size)
29 {
30 int result = source->ops->read_block(source->arg, dest, off, size);
31 dest->source = *source;
32 return result;
33 }
34
35 void block_source_close(struct reftable_block_source *source)
36 {
37 if (!source->ops) {
38 return;
39 }
40
41 source->ops->close(source->arg);
42 source->ops = NULL;
43 }
44
45 static struct reftable_reader_offsets *
46 reader_offsets_for(struct reftable_reader *r, uint8_t typ)
47 {
48 switch (typ) {
49 case BLOCK_TYPE_REF:
50 return &r->ref_offsets;
51 case BLOCK_TYPE_LOG:
52 return &r->log_offsets;
53 case BLOCK_TYPE_OBJ:
54 return &r->obj_offsets;
55 }
56 abort();
57 }
58
59 static int reader_get_block(struct reftable_reader *r,
60 struct reftable_block *dest, uint64_t off,
61 uint32_t sz)
62 {
63 if (off >= r->size)
64 return 0;
65
66 if (off + sz > r->size) {
67 sz = r->size - off;
68 }
69
70 return block_source_read_block(&r->source, dest, off, sz);
71 }
72
73 uint32_t reftable_reader_hash_id(struct reftable_reader *r)
74 {
75 return r->hash_id;
76 }
77
78 const char *reader_name(struct reftable_reader *r)
79 {
80 return r->name;
81 }
82
83 static int parse_footer(struct reftable_reader *r, uint8_t *footer,
84 uint8_t *header)
85 {
86 uint8_t *f = footer;
87 uint8_t first_block_typ;
88 int err = 0;
89 uint32_t computed_crc;
90 uint32_t file_crc;
91
92 if (memcmp(f, "REFT", 4)) {
93 err = REFTABLE_FORMAT_ERROR;
94 goto done;
95 }
96 f += 4;
97
98 if (memcmp(footer, header, header_size(r->version))) {
99 err = REFTABLE_FORMAT_ERROR;
100 goto done;
101 }
102
103 f++;
104 r->block_size = get_be24(f);
105
106 f += 3;
107 r->min_update_index = get_be64(f);
108 f += 8;
109 r->max_update_index = get_be64(f);
110 f += 8;
111
112 if (r->version == 1) {
113 r->hash_id = GIT_SHA1_FORMAT_ID;
114 } else {
115 r->hash_id = get_be32(f);
116 switch (r->hash_id) {
117 case GIT_SHA1_FORMAT_ID:
118 break;
119 case GIT_SHA256_FORMAT_ID:
120 break;
121 default:
122 err = REFTABLE_FORMAT_ERROR;
123 goto done;
124 }
125 f += 4;
126 }
127
128 r->ref_offsets.index_offset = get_be64(f);
129 f += 8;
130
131 r->obj_offsets.offset = get_be64(f);
132 f += 8;
133
134 r->object_id_len = r->obj_offsets.offset & ((1 << 5) - 1);
135 r->obj_offsets.offset >>= 5;
136
137 r->obj_offsets.index_offset = get_be64(f);
138 f += 8;
139 r->log_offsets.offset = get_be64(f);
140 f += 8;
141 r->log_offsets.index_offset = get_be64(f);
142 f += 8;
143
144 computed_crc = crc32(0, footer, f - footer);
145 file_crc = get_be32(f);
146 f += 4;
147 if (computed_crc != file_crc) {
148 err = REFTABLE_FORMAT_ERROR;
149 goto done;
150 }
151
152 first_block_typ = header[header_size(r->version)];
153 r->ref_offsets.is_present = (first_block_typ == BLOCK_TYPE_REF);
154 r->ref_offsets.offset = 0;
155 r->log_offsets.is_present = (first_block_typ == BLOCK_TYPE_LOG ||
156 r->log_offsets.offset > 0);
157 r->obj_offsets.is_present = r->obj_offsets.offset > 0;
158 if (r->obj_offsets.is_present && !r->object_id_len) {
159 err = REFTABLE_FORMAT_ERROR;
160 goto done;
161 }
162
163 err = 0;
164 done:
165 return err;
166 }
167
168 int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
169 const char *name)
170 {
171 struct reftable_block footer = { NULL };
172 struct reftable_block header = { NULL };
173 int err = 0;
174 uint64_t file_size = block_source_size(source);
175
176 /* Need +1 to read type of first block. */
177 uint32_t read_size = header_size(2) + 1; /* read v2 because it's larger. */
178 memset(r, 0, sizeof(struct reftable_reader));
179
180 if (read_size > file_size) {
181 err = REFTABLE_FORMAT_ERROR;
182 goto done;
183 }
184
185 err = block_source_read_block(source, &header, 0, read_size);
186 if (err != read_size) {
187 err = REFTABLE_IO_ERROR;
188 goto done;
189 }
190
191 if (memcmp(header.data, "REFT", 4)) {
192 err = REFTABLE_FORMAT_ERROR;
193 goto done;
194 }
195 r->version = header.data[4];
196 if (r->version != 1 && r->version != 2) {
197 err = REFTABLE_FORMAT_ERROR;
198 goto done;
199 }
200
201 r->size = file_size - footer_size(r->version);
202 r->source = *source;
203 r->name = xstrdup(name);
204 r->hash_id = 0;
205
206 err = block_source_read_block(source, &footer, r->size,
207 footer_size(r->version));
208 if (err != footer_size(r->version)) {
209 err = REFTABLE_IO_ERROR;
210 goto done;
211 }
212
213 err = parse_footer(r, footer.data, header.data);
214 done:
215 reftable_block_done(&footer);
216 reftable_block_done(&header);
217 return err;
218 }
219
220 struct table_iter {
221 struct reftable_reader *r;
222 uint8_t typ;
223 uint64_t block_off;
224 struct block_iter bi;
225 int is_finished;
226 };
227 #define TABLE_ITER_INIT \
228 { \
229 .bi = {.last_key = STRBUF_INIT } \
230 }
231
232 static void table_iter_copy_from(struct table_iter *dest,
233 struct table_iter *src)
234 {
235 dest->r = src->r;
236 dest->typ = src->typ;
237 dest->block_off = src->block_off;
238 dest->is_finished = src->is_finished;
239 block_iter_copy_from(&dest->bi, &src->bi);
240 }
241
242 static int table_iter_next_in_block(struct table_iter *ti,
243 struct reftable_record *rec)
244 {
245 int res = block_iter_next(&ti->bi, rec);
246 if (res == 0 && reftable_record_type(rec) == BLOCK_TYPE_REF) {
247 rec->u.ref.update_index += ti->r->min_update_index;
248 }
249
250 return res;
251 }
252
253 static void table_iter_block_done(struct table_iter *ti)
254 {
255 if (!ti->bi.br) {
256 return;
257 }
258 reftable_block_done(&ti->bi.br->block);
259 FREE_AND_NULL(ti->bi.br);
260
261 ti->bi.last_key.len = 0;
262 ti->bi.next_off = 0;
263 }
264
265 static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off,
266 int version)
267 {
268 int32_t result = 0;
269
270 if (off == 0) {
271 data += header_size(version);
272 }
273
274 *typ = data[0];
275 if (reftable_is_block_type(*typ)) {
276 result = get_be24(data + 1);
277 }
278 return result;
279 }
280
281 int reader_init_block_reader(struct reftable_reader *r, struct block_reader *br,
282 uint64_t next_off, uint8_t want_typ)
283 {
284 int32_t guess_block_size = r->block_size ? r->block_size :
285 DEFAULT_BLOCK_SIZE;
286 struct reftable_block block = { NULL };
287 uint8_t block_typ = 0;
288 int err = 0;
289 uint32_t header_off = next_off ? 0 : header_size(r->version);
290 int32_t block_size = 0;
291
292 if (next_off >= r->size)
293 return 1;
294
295 err = reader_get_block(r, &block, next_off, guess_block_size);
296 if (err < 0)
297 goto done;
298
299 block_size = extract_block_size(block.data, &block_typ, next_off,
300 r->version);
301 if (block_size < 0) {
302 err = block_size;
303 goto done;
304 }
305 if (want_typ != BLOCK_TYPE_ANY && block_typ != want_typ) {
306 err = 1;
307 goto done;
308 }
309
310 if (block_size > guess_block_size) {
311 reftable_block_done(&block);
312 err = reader_get_block(r, &block, next_off, block_size);
313 if (err < 0) {
314 goto done;
315 }
316 }
317
318 err = block_reader_init(br, &block, header_off, r->block_size,
319 hash_size(r->hash_id));
320 done:
321 reftable_block_done(&block);
322
323 return err;
324 }
325
326 static int table_iter_next_block(struct table_iter *dest,
327 struct table_iter *src)
328 {
329 uint64_t next_block_off = src->block_off + src->bi.br->full_block_size;
330 struct block_reader br = { 0 };
331 int err = 0;
332
333 dest->r = src->r;
334 dest->typ = src->typ;
335 dest->block_off = next_block_off;
336
337 err = reader_init_block_reader(src->r, &br, next_block_off, src->typ);
338 if (err > 0) {
339 dest->is_finished = 1;
340 return 1;
341 }
342 if (err != 0)
343 return err;
344 else {
345 struct block_reader *brp =
346 reftable_malloc(sizeof(struct block_reader));
347 *brp = br;
348
349 dest->is_finished = 0;
350 block_reader_start(brp, &dest->bi);
351 }
352 return 0;
353 }
354
355 static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
356 {
357 if (reftable_record_type(rec) != ti->typ)
358 return REFTABLE_API_ERROR;
359
360 while (1) {
361 struct table_iter next = TABLE_ITER_INIT;
362 int err = 0;
363 if (ti->is_finished) {
364 return 1;
365 }
366
367 err = table_iter_next_in_block(ti, rec);
368 if (err <= 0) {
369 return err;
370 }
371
372 err = table_iter_next_block(&next, ti);
373 if (err != 0) {
374 ti->is_finished = 1;
375 }
376 table_iter_block_done(ti);
377 if (err != 0) {
378 return err;
379 }
380 table_iter_copy_from(ti, &next);
381 block_iter_close(&next.bi);
382 }
383 }
384
385 static int table_iter_next_void(void *ti, struct reftable_record *rec)
386 {
387 return table_iter_next(ti, rec);
388 }
389
390 static void table_iter_close(void *p)
391 {
392 struct table_iter *ti = p;
393 table_iter_block_done(ti);
394 block_iter_close(&ti->bi);
395 }
396
397 static struct reftable_iterator_vtable table_iter_vtable = {
398 .next = &table_iter_next_void,
399 .close = &table_iter_close,
400 };
401
402 static void iterator_from_table_iter(struct reftable_iterator *it,
403 struct table_iter *ti)
404 {
405 assert(!it->ops);
406 it->iter_arg = ti;
407 it->ops = &table_iter_vtable;
408 }
409
410 static int reader_table_iter_at(struct reftable_reader *r,
411 struct table_iter *ti, uint64_t off,
412 uint8_t typ)
413 {
414 struct block_reader br = { 0 };
415 struct block_reader *brp = NULL;
416
417 int err = reader_init_block_reader(r, &br, off, typ);
418 if (err != 0)
419 return err;
420
421 brp = reftable_malloc(sizeof(struct block_reader));
422 *brp = br;
423 ti->r = r;
424 ti->typ = block_reader_type(brp);
425 ti->block_off = off;
426 block_reader_start(brp, &ti->bi);
427 return 0;
428 }
429
430 static int reader_start(struct reftable_reader *r, struct table_iter *ti,
431 uint8_t typ, int index)
432 {
433 struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
434 uint64_t off = offs->offset;
435 if (index) {
436 off = offs->index_offset;
437 if (off == 0) {
438 return 1;
439 }
440 typ = BLOCK_TYPE_INDEX;
441 }
442
443 return reader_table_iter_at(r, ti, off, typ);
444 }
445
446 static int reader_seek_linear(struct table_iter *ti,
447 struct reftable_record *want)
448 {
449 struct reftable_record rec =
450 reftable_new_record(reftable_record_type(want));
451 struct strbuf want_key = STRBUF_INIT;
452 struct strbuf got_key = STRBUF_INIT;
453 struct table_iter next = TABLE_ITER_INIT;
454 int err = -1;
455
456 reftable_record_key(want, &want_key);
457
458 while (1) {
459 err = table_iter_next_block(&next, ti);
460 if (err < 0)
461 goto done;
462
463 if (err > 0) {
464 break;
465 }
466
467 err = block_reader_first_key(next.bi.br, &got_key);
468 if (err < 0)
469 goto done;
470
471 if (strbuf_cmp(&got_key, &want_key) > 0) {
472 table_iter_block_done(&next);
473 break;
474 }
475
476 table_iter_block_done(ti);
477 table_iter_copy_from(ti, &next);
478 }
479
480 err = block_iter_seek(&ti->bi, &want_key);
481 if (err < 0)
482 goto done;
483 err = 0;
484
485 done:
486 block_iter_close(&next.bi);
487 reftable_record_release(&rec);
488 strbuf_release(&want_key);
489 strbuf_release(&got_key);
490 return err;
491 }
492
493 static int reader_seek_indexed(struct reftable_reader *r,
494 struct reftable_iterator *it,
495 struct reftable_record *rec)
496 {
497 struct reftable_record want_index = {
498 .type = BLOCK_TYPE_INDEX, .u.idx = { .last_key = STRBUF_INIT }
499 };
500 struct reftable_record index_result = {
501 .type = BLOCK_TYPE_INDEX,
502 .u.idx = { .last_key = STRBUF_INIT },
503 };
504 struct table_iter index_iter = TABLE_ITER_INIT;
505 struct table_iter next = TABLE_ITER_INIT;
506 int err = 0;
507
508 reftable_record_key(rec, &want_index.u.idx.last_key);
509 err = reader_start(r, &index_iter, reftable_record_type(rec), 1);
510 if (err < 0)
511 goto done;
512
513 err = reader_seek_linear(&index_iter, &want_index);
514 while (1) {
515 err = table_iter_next(&index_iter, &index_result);
516 table_iter_block_done(&index_iter);
517 if (err != 0)
518 goto done;
519
520 err = reader_table_iter_at(r, &next, index_result.u.idx.offset,
521 0);
522 if (err != 0)
523 goto done;
524
525 err = block_iter_seek(&next.bi, &want_index.u.idx.last_key);
526 if (err < 0)
527 goto done;
528
529 if (next.typ == reftable_record_type(rec)) {
530 err = 0;
531 break;
532 }
533
534 if (next.typ != BLOCK_TYPE_INDEX) {
535 err = REFTABLE_FORMAT_ERROR;
536 break;
537 }
538
539 table_iter_copy_from(&index_iter, &next);
540 }
541
542 if (err == 0) {
543 struct table_iter empty = TABLE_ITER_INIT;
544 struct table_iter *malloced =
545 reftable_calloc(sizeof(struct table_iter));
546 *malloced = empty;
547 table_iter_copy_from(malloced, &next);
548 iterator_from_table_iter(it, malloced);
549 }
550 done:
551 block_iter_close(&next.bi);
552 table_iter_close(&index_iter);
553 reftable_record_release(&want_index);
554 reftable_record_release(&index_result);
555 return err;
556 }
557
558 static int reader_seek_internal(struct reftable_reader *r,
559 struct reftable_iterator *it,
560 struct reftable_record *rec)
561 {
562 struct reftable_reader_offsets *offs =
563 reader_offsets_for(r, reftable_record_type(rec));
564 uint64_t idx = offs->index_offset;
565 struct table_iter ti = TABLE_ITER_INIT;
566 int err = 0;
567 if (idx > 0)
568 return reader_seek_indexed(r, it, rec);
569
570 err = reader_start(r, &ti, reftable_record_type(rec), 0);
571 if (err < 0)
572 return err;
573 err = reader_seek_linear(&ti, rec);
574 if (err < 0)
575 return err;
576 else {
577 struct table_iter *p =
578 reftable_malloc(sizeof(struct table_iter));
579 *p = ti;
580 iterator_from_table_iter(it, p);
581 }
582
583 return 0;
584 }
585
586 static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it,
587 struct reftable_record *rec)
588 {
589 uint8_t typ = reftable_record_type(rec);
590
591 struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
592 if (!offs->is_present) {
593 iterator_set_empty(it);
594 return 0;
595 }
596
597 return reader_seek_internal(r, it, rec);
598 }
599
600 int reftable_reader_seek_ref(struct reftable_reader *r,
601 struct reftable_iterator *it, const char *name)
602 {
603 struct reftable_record rec = {
604 .type = BLOCK_TYPE_REF,
605 .u.ref = {
606 .refname = (char *)name,
607 },
608 };
609 return reader_seek(r, it, &rec);
610 }
611
612 int reftable_reader_seek_log_at(struct reftable_reader *r,
613 struct reftable_iterator *it, const char *name,
614 uint64_t update_index)
615 {
616 struct reftable_record rec = { .type = BLOCK_TYPE_LOG,
617 .u.log = {
618 .refname = (char *)name,
619 .update_index = update_index,
620 } };
621 return reader_seek(r, it, &rec);
622 }
623
624 int reftable_reader_seek_log(struct reftable_reader *r,
625 struct reftable_iterator *it, const char *name)
626 {
627 uint64_t max = ~((uint64_t)0);
628 return reftable_reader_seek_log_at(r, it, name, max);
629 }
630
631 void reader_close(struct reftable_reader *r)
632 {
633 block_source_close(&r->source);
634 FREE_AND_NULL(r->name);
635 }
636
637 int reftable_new_reader(struct reftable_reader **p,
638 struct reftable_block_source *src, char const *name)
639 {
640 struct reftable_reader *rd =
641 reftable_calloc(sizeof(struct reftable_reader));
642 int err = init_reader(rd, src, name);
643 if (err == 0) {
644 *p = rd;
645 } else {
646 block_source_close(src);
647 reftable_free(rd);
648 }
649 return err;
650 }
651
652 void reftable_reader_free(struct reftable_reader *r)
653 {
654 if (!r)
655 return;
656 reader_close(r);
657 reftable_free(r);
658 }
659
660 static int reftable_reader_refs_for_indexed(struct reftable_reader *r,
661 struct reftable_iterator *it,
662 uint8_t *oid)
663 {
664 struct reftable_record want = {
665 .type = BLOCK_TYPE_OBJ,
666 .u.obj = {
667 .hash_prefix = oid,
668 .hash_prefix_len = r->object_id_len,
669 },
670 };
671 struct reftable_iterator oit = { NULL };
672 struct reftable_record got = {
673 .type = BLOCK_TYPE_OBJ,
674 .u.obj = { 0 },
675 };
676 int err = 0;
677 struct indexed_table_ref_iter *itr = NULL;
678
679 /* Look through the reverse index. */
680 err = reader_seek(r, &oit, &want);
681 if (err != 0)
682 goto done;
683
684 /* read out the reftable_obj_record */
685 err = iterator_next(&oit, &got);
686 if (err < 0)
687 goto done;
688
689 if (err > 0 || memcmp(want.u.obj.hash_prefix, got.u.obj.hash_prefix,
690 r->object_id_len)) {
691 /* didn't find it; return empty iterator */
692 iterator_set_empty(it);
693 err = 0;
694 goto done;
695 }
696
697 err = new_indexed_table_ref_iter(&itr, r, oid, hash_size(r->hash_id),
698 got.u.obj.offsets,
699 got.u.obj.offset_len);
700 if (err < 0)
701 goto done;
702 got.u.obj.offsets = NULL;
703 iterator_from_indexed_table_ref_iter(it, itr);
704
705 done:
706 reftable_iterator_destroy(&oit);
707 reftable_record_release(&got);
708 return err;
709 }
710
711 static int reftable_reader_refs_for_unindexed(struct reftable_reader *r,
712 struct reftable_iterator *it,
713 uint8_t *oid)
714 {
715 struct table_iter ti_empty = TABLE_ITER_INIT;
716 struct table_iter *ti = reftable_calloc(sizeof(struct table_iter));
717 struct filtering_ref_iterator *filter = NULL;
718 struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT;
719 int oid_len = hash_size(r->hash_id);
720 int err;
721
722 *ti = ti_empty;
723 err = reader_start(r, ti, BLOCK_TYPE_REF, 0);
724 if (err < 0) {
725 reftable_free(ti);
726 return err;
727 }
728
729 filter = reftable_malloc(sizeof(struct filtering_ref_iterator));
730 *filter = empty;
731
732 strbuf_add(&filter->oid, oid, oid_len);
733 reftable_table_from_reader(&filter->tab, r);
734 filter->double_check = 0;
735 iterator_from_table_iter(&filter->it, ti);
736
737 iterator_from_filtering_ref_iterator(it, filter);
738 return 0;
739 }
740
741 int reftable_reader_refs_for(struct reftable_reader *r,
742 struct reftable_iterator *it, uint8_t *oid)
743 {
744 if (r->obj_offsets.is_present)
745 return reftable_reader_refs_for_indexed(r, it, oid);
746 return reftable_reader_refs_for_unindexed(r, it, oid);
747 }
748
749 uint64_t reftable_reader_max_update_index(struct reftable_reader *r)
750 {
751 return r->max_update_index;
752 }
753
754 uint64_t reftable_reader_min_update_index(struct reftable_reader *r)
755 {
756 return r->min_update_index;
757 }
758
759 /* generic table interface. */
760
761 static int reftable_reader_seek_void(void *tab, struct reftable_iterator *it,
762 struct reftable_record *rec)
763 {
764 return reader_seek(tab, it, rec);
765 }
766
767 static uint32_t reftable_reader_hash_id_void(void *tab)
768 {
769 return reftable_reader_hash_id(tab);
770 }
771
772 static uint64_t reftable_reader_min_update_index_void(void *tab)
773 {
774 return reftable_reader_min_update_index(tab);
775 }
776
777 static uint64_t reftable_reader_max_update_index_void(void *tab)
778 {
779 return reftable_reader_max_update_index(tab);
780 }
781
782 static struct reftable_table_vtable reader_vtable = {
783 .seek_record = reftable_reader_seek_void,
784 .hash_id = reftable_reader_hash_id_void,
785 .min_update_index = reftable_reader_min_update_index_void,
786 .max_update_index = reftable_reader_max_update_index_void,
787 };
788
789 void reftable_table_from_reader(struct reftable_table *tab,
790 struct reftable_reader *reader)
791 {
792 assert(!tab->ops);
793 tab->ops = &reader_vtable;
794 tab->table_arg = reader;
795 }
796
797
798 int reftable_reader_print_file(const char *tablename)
799 {
800 struct reftable_block_source src = { NULL };
801 int err = reftable_block_source_from_file(&src, tablename);
802 struct reftable_reader *r = NULL;
803 struct reftable_table tab = { NULL };
804 if (err < 0)
805 goto done;
806
807 err = reftable_new_reader(&r, &src, tablename);
808 if (err < 0)
809 goto done;
810
811 reftable_table_from_reader(&tab, r);
812 err = reftable_table_print(&tab);
813 done:
814 reftable_reader_free(r);
815 return err;
816 }