]> git.ipfire.org Git - thirdparty/git.git/blob - reftable/reader.c
commit: unify logic to avoid multiple scissors lines when merging
[thirdparty/git.git] / reftable / reader.c
1 /*
2 Copyright 2020 Google LLC
3
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
8
9 #include "reader.h"
10
11 #include "system.h"
12 #include "block.h"
13 #include "constants.h"
14 #include "generic.h"
15 #include "iter.h"
16 #include "record.h"
17 #include "reftable-error.h"
18 #include "reftable-generic.h"
19
20 uint64_t block_source_size(struct reftable_block_source *source)
21 {
22 return source->ops->size(source->arg);
23 }
24
25 int block_source_read_block(struct reftable_block_source *source,
26 struct reftable_block *dest, uint64_t off,
27 uint32_t size)
28 {
29 int result = source->ops->read_block(source->arg, dest, off, size);
30 dest->source = *source;
31 return result;
32 }
33
34 void block_source_close(struct reftable_block_source *source)
35 {
36 if (!source->ops) {
37 return;
38 }
39
40 source->ops->close(source->arg);
41 source->ops = NULL;
42 }
43
44 static struct reftable_reader_offsets *
45 reader_offsets_for(struct reftable_reader *r, uint8_t typ)
46 {
47 switch (typ) {
48 case BLOCK_TYPE_REF:
49 return &r->ref_offsets;
50 case BLOCK_TYPE_LOG:
51 return &r->log_offsets;
52 case BLOCK_TYPE_OBJ:
53 return &r->obj_offsets;
54 }
55 abort();
56 }
57
58 static int reader_get_block(struct reftable_reader *r,
59 struct reftable_block *dest, uint64_t off,
60 uint32_t sz)
61 {
62 if (off >= r->size)
63 return 0;
64
65 if (off + sz > r->size) {
66 sz = r->size - off;
67 }
68
69 return block_source_read_block(&r->source, dest, off, sz);
70 }
71
72 uint32_t reftable_reader_hash_id(struct reftable_reader *r)
73 {
74 return r->hash_id;
75 }
76
77 const char *reader_name(struct reftable_reader *r)
78 {
79 return r->name;
80 }
81
82 static int parse_footer(struct reftable_reader *r, uint8_t *footer,
83 uint8_t *header)
84 {
85 uint8_t *f = footer;
86 uint8_t first_block_typ;
87 int err = 0;
88 uint32_t computed_crc;
89 uint32_t file_crc;
90
91 if (memcmp(f, "REFT", 4)) {
92 err = REFTABLE_FORMAT_ERROR;
93 goto done;
94 }
95 f += 4;
96
97 if (memcmp(footer, header, header_size(r->version))) {
98 err = REFTABLE_FORMAT_ERROR;
99 goto done;
100 }
101
102 f++;
103 r->block_size = get_be24(f);
104
105 f += 3;
106 r->min_update_index = get_be64(f);
107 f += 8;
108 r->max_update_index = get_be64(f);
109 f += 8;
110
111 if (r->version == 1) {
112 r->hash_id = GIT_SHA1_FORMAT_ID;
113 } else {
114 r->hash_id = get_be32(f);
115 switch (r->hash_id) {
116 case GIT_SHA1_FORMAT_ID:
117 break;
118 case GIT_SHA256_FORMAT_ID:
119 break;
120 default:
121 err = REFTABLE_FORMAT_ERROR;
122 goto done;
123 }
124 f += 4;
125 }
126
127 r->ref_offsets.index_offset = get_be64(f);
128 f += 8;
129
130 r->obj_offsets.offset = get_be64(f);
131 f += 8;
132
133 r->object_id_len = r->obj_offsets.offset & ((1 << 5) - 1);
134 r->obj_offsets.offset >>= 5;
135
136 r->obj_offsets.index_offset = get_be64(f);
137 f += 8;
138 r->log_offsets.offset = get_be64(f);
139 f += 8;
140 r->log_offsets.index_offset = get_be64(f);
141 f += 8;
142
143 computed_crc = crc32(0, footer, f - footer);
144 file_crc = get_be32(f);
145 f += 4;
146 if (computed_crc != file_crc) {
147 err = REFTABLE_FORMAT_ERROR;
148 goto done;
149 }
150
151 first_block_typ = header[header_size(r->version)];
152 r->ref_offsets.is_present = (first_block_typ == BLOCK_TYPE_REF);
153 r->ref_offsets.offset = 0;
154 r->log_offsets.is_present = (first_block_typ == BLOCK_TYPE_LOG ||
155 r->log_offsets.offset > 0);
156 r->obj_offsets.is_present = r->obj_offsets.offset > 0;
157 if (r->obj_offsets.is_present && !r->object_id_len) {
158 err = REFTABLE_FORMAT_ERROR;
159 goto done;
160 }
161
162 err = 0;
163 done:
164 return err;
165 }
166
167 int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
168 const char *name)
169 {
170 struct reftable_block footer = { NULL };
171 struct reftable_block header = { NULL };
172 int err = 0;
173 uint64_t file_size = block_source_size(source);
174
175 /* Need +1 to read type of first block. */
176 uint32_t read_size = header_size(2) + 1; /* read v2 because it's larger. */
177 memset(r, 0, sizeof(struct reftable_reader));
178
179 if (read_size > file_size) {
180 err = REFTABLE_FORMAT_ERROR;
181 goto done;
182 }
183
184 err = block_source_read_block(source, &header, 0, read_size);
185 if (err != read_size) {
186 err = REFTABLE_IO_ERROR;
187 goto done;
188 }
189
190 if (memcmp(header.data, "REFT", 4)) {
191 err = REFTABLE_FORMAT_ERROR;
192 goto done;
193 }
194 r->version = header.data[4];
195 if (r->version != 1 && r->version != 2) {
196 err = REFTABLE_FORMAT_ERROR;
197 goto done;
198 }
199
200 r->size = file_size - footer_size(r->version);
201 r->source = *source;
202 r->name = xstrdup(name);
203 r->hash_id = 0;
204
205 err = block_source_read_block(source, &footer, r->size,
206 footer_size(r->version));
207 if (err != footer_size(r->version)) {
208 err = REFTABLE_IO_ERROR;
209 goto done;
210 }
211
212 err = parse_footer(r, footer.data, header.data);
213 done:
214 reftable_block_done(&footer);
215 reftable_block_done(&header);
216 return err;
217 }
218
219 struct table_iter {
220 struct reftable_reader *r;
221 uint8_t typ;
222 uint64_t block_off;
223 struct block_iter bi;
224 int is_finished;
225 };
226 #define TABLE_ITER_INIT { \
227 .bi = BLOCK_ITER_INIT \
228 }
229
230 static void table_iter_copy_from(struct table_iter *dest,
231 struct table_iter *src)
232 {
233 dest->r = src->r;
234 dest->typ = src->typ;
235 dest->block_off = src->block_off;
236 dest->is_finished = src->is_finished;
237 block_iter_copy_from(&dest->bi, &src->bi);
238 }
239
240 static int table_iter_next_in_block(struct table_iter *ti,
241 struct reftable_record *rec)
242 {
243 int res = block_iter_next(&ti->bi, rec);
244 if (res == 0 && reftable_record_type(rec) == BLOCK_TYPE_REF) {
245 rec->u.ref.update_index += ti->r->min_update_index;
246 }
247
248 return res;
249 }
250
251 static void table_iter_block_done(struct table_iter *ti)
252 {
253 if (!ti->bi.br) {
254 return;
255 }
256 reftable_block_done(&ti->bi.br->block);
257 FREE_AND_NULL(ti->bi.br);
258
259 ti->bi.last_key.len = 0;
260 ti->bi.next_off = 0;
261 }
262
263 static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off,
264 int version)
265 {
266 int32_t result = 0;
267
268 if (off == 0) {
269 data += header_size(version);
270 }
271
272 *typ = data[0];
273 if (reftable_is_block_type(*typ)) {
274 result = get_be24(data + 1);
275 }
276 return result;
277 }
278
279 int reader_init_block_reader(struct reftable_reader *r, struct block_reader *br,
280 uint64_t next_off, uint8_t want_typ)
281 {
282 int32_t guess_block_size = r->block_size ? r->block_size :
283 DEFAULT_BLOCK_SIZE;
284 struct reftable_block block = { NULL };
285 uint8_t block_typ = 0;
286 int err = 0;
287 uint32_t header_off = next_off ? 0 : header_size(r->version);
288 int32_t block_size = 0;
289
290 if (next_off >= r->size)
291 return 1;
292
293 err = reader_get_block(r, &block, next_off, guess_block_size);
294 if (err < 0)
295 goto done;
296
297 block_size = extract_block_size(block.data, &block_typ, next_off,
298 r->version);
299 if (block_size < 0) {
300 err = block_size;
301 goto done;
302 }
303 if (want_typ != BLOCK_TYPE_ANY && block_typ != want_typ) {
304 err = 1;
305 goto done;
306 }
307
308 if (block_size > guess_block_size) {
309 reftable_block_done(&block);
310 err = reader_get_block(r, &block, next_off, block_size);
311 if (err < 0) {
312 goto done;
313 }
314 }
315
316 err = block_reader_init(br, &block, header_off, r->block_size,
317 hash_size(r->hash_id));
318 done:
319 reftable_block_done(&block);
320
321 return err;
322 }
323
324 static int table_iter_next_block(struct table_iter *dest,
325 struct table_iter *src)
326 {
327 uint64_t next_block_off = src->block_off + src->bi.br->full_block_size;
328 struct block_reader br = { 0 };
329 int err = 0;
330
331 dest->r = src->r;
332 dest->typ = src->typ;
333 dest->block_off = next_block_off;
334
335 err = reader_init_block_reader(src->r, &br, next_block_off, src->typ);
336 if (err > 0) {
337 dest->is_finished = 1;
338 return 1;
339 }
340 if (err != 0)
341 return err;
342 else {
343 struct block_reader *brp =
344 reftable_malloc(sizeof(struct block_reader));
345 *brp = br;
346
347 dest->is_finished = 0;
348 block_reader_start(brp, &dest->bi);
349 }
350 return 0;
351 }
352
353 static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
354 {
355 if (reftable_record_type(rec) != ti->typ)
356 return REFTABLE_API_ERROR;
357
358 while (1) {
359 struct table_iter next = TABLE_ITER_INIT;
360 int err = 0;
361 if (ti->is_finished) {
362 return 1;
363 }
364
365 err = table_iter_next_in_block(ti, rec);
366 if (err <= 0) {
367 return err;
368 }
369
370 err = table_iter_next_block(&next, ti);
371 if (err != 0) {
372 ti->is_finished = 1;
373 }
374 table_iter_block_done(ti);
375 if (err != 0) {
376 return err;
377 }
378 table_iter_copy_from(ti, &next);
379 block_iter_close(&next.bi);
380 }
381 }
382
383 static int table_iter_next_void(void *ti, struct reftable_record *rec)
384 {
385 return table_iter_next(ti, rec);
386 }
387
388 static void table_iter_close(void *p)
389 {
390 struct table_iter *ti = p;
391 table_iter_block_done(ti);
392 block_iter_close(&ti->bi);
393 }
394
395 static struct reftable_iterator_vtable table_iter_vtable = {
396 .next = &table_iter_next_void,
397 .close = &table_iter_close,
398 };
399
400 static void iterator_from_table_iter(struct reftable_iterator *it,
401 struct table_iter *ti)
402 {
403 assert(!it->ops);
404 it->iter_arg = ti;
405 it->ops = &table_iter_vtable;
406 }
407
408 static int reader_table_iter_at(struct reftable_reader *r,
409 struct table_iter *ti, uint64_t off,
410 uint8_t typ)
411 {
412 struct block_reader br = { 0 };
413 struct block_reader *brp = NULL;
414
415 int err = reader_init_block_reader(r, &br, off, typ);
416 if (err != 0)
417 return err;
418
419 brp = reftable_malloc(sizeof(struct block_reader));
420 *brp = br;
421 ti->r = r;
422 ti->typ = block_reader_type(brp);
423 ti->block_off = off;
424 block_reader_start(brp, &ti->bi);
425 return 0;
426 }
427
428 static int reader_start(struct reftable_reader *r, struct table_iter *ti,
429 uint8_t typ, int index)
430 {
431 struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
432 uint64_t off = offs->offset;
433 if (index) {
434 off = offs->index_offset;
435 if (off == 0) {
436 return 1;
437 }
438 typ = BLOCK_TYPE_INDEX;
439 }
440
441 return reader_table_iter_at(r, ti, off, typ);
442 }
443
444 static int reader_seek_linear(struct table_iter *ti,
445 struct reftable_record *want)
446 {
447 struct reftable_record rec =
448 reftable_new_record(reftable_record_type(want));
449 struct strbuf want_key = STRBUF_INIT;
450 struct strbuf got_key = STRBUF_INIT;
451 struct table_iter next = TABLE_ITER_INIT;
452 int err = -1;
453
454 reftable_record_key(want, &want_key);
455
456 while (1) {
457 err = table_iter_next_block(&next, ti);
458 if (err < 0)
459 goto done;
460
461 if (err > 0) {
462 break;
463 }
464
465 err = block_reader_first_key(next.bi.br, &got_key);
466 if (err < 0)
467 goto done;
468
469 if (strbuf_cmp(&got_key, &want_key) > 0) {
470 table_iter_block_done(&next);
471 break;
472 }
473
474 table_iter_block_done(ti);
475 table_iter_copy_from(ti, &next);
476 }
477
478 err = block_iter_seek(&ti->bi, &want_key);
479 if (err < 0)
480 goto done;
481 err = 0;
482
483 done:
484 block_iter_close(&next.bi);
485 reftable_record_release(&rec);
486 strbuf_release(&want_key);
487 strbuf_release(&got_key);
488 return err;
489 }
490
491 static int reader_seek_indexed(struct reftable_reader *r,
492 struct reftable_iterator *it,
493 struct reftable_record *rec)
494 {
495 struct reftable_record want_index = {
496 .type = BLOCK_TYPE_INDEX, .u.idx = { .last_key = STRBUF_INIT }
497 };
498 struct reftable_record index_result = {
499 .type = BLOCK_TYPE_INDEX,
500 .u.idx = { .last_key = STRBUF_INIT },
501 };
502 struct table_iter index_iter = TABLE_ITER_INIT;
503 struct table_iter next = TABLE_ITER_INIT;
504 int err = 0;
505
506 reftable_record_key(rec, &want_index.u.idx.last_key);
507 err = reader_start(r, &index_iter, reftable_record_type(rec), 1);
508 if (err < 0)
509 goto done;
510
511 err = reader_seek_linear(&index_iter, &want_index);
512 while (1) {
513 err = table_iter_next(&index_iter, &index_result);
514 table_iter_block_done(&index_iter);
515 if (err != 0)
516 goto done;
517
518 err = reader_table_iter_at(r, &next, index_result.u.idx.offset,
519 0);
520 if (err != 0)
521 goto done;
522
523 err = block_iter_seek(&next.bi, &want_index.u.idx.last_key);
524 if (err < 0)
525 goto done;
526
527 if (next.typ == reftable_record_type(rec)) {
528 err = 0;
529 break;
530 }
531
532 if (next.typ != BLOCK_TYPE_INDEX) {
533 err = REFTABLE_FORMAT_ERROR;
534 break;
535 }
536
537 table_iter_copy_from(&index_iter, &next);
538 }
539
540 if (err == 0) {
541 struct table_iter empty = TABLE_ITER_INIT;
542 struct table_iter *malloced =
543 reftable_calloc(sizeof(struct table_iter));
544 *malloced = empty;
545 table_iter_copy_from(malloced, &next);
546 iterator_from_table_iter(it, malloced);
547 }
548 done:
549 block_iter_close(&next.bi);
550 table_iter_close(&index_iter);
551 reftable_record_release(&want_index);
552 reftable_record_release(&index_result);
553 return err;
554 }
555
556 static int reader_seek_internal(struct reftable_reader *r,
557 struct reftable_iterator *it,
558 struct reftable_record *rec)
559 {
560 struct reftable_reader_offsets *offs =
561 reader_offsets_for(r, reftable_record_type(rec));
562 uint64_t idx = offs->index_offset;
563 struct table_iter ti = TABLE_ITER_INIT;
564 int err = 0;
565 if (idx > 0)
566 return reader_seek_indexed(r, it, rec);
567
568 err = reader_start(r, &ti, reftable_record_type(rec), 0);
569 if (err < 0)
570 return err;
571 err = reader_seek_linear(&ti, rec);
572 if (err < 0)
573 return err;
574 else {
575 struct table_iter *p =
576 reftable_malloc(sizeof(struct table_iter));
577 *p = ti;
578 iterator_from_table_iter(it, p);
579 }
580
581 return 0;
582 }
583
584 static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it,
585 struct reftable_record *rec)
586 {
587 uint8_t typ = reftable_record_type(rec);
588
589 struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
590 if (!offs->is_present) {
591 iterator_set_empty(it);
592 return 0;
593 }
594
595 return reader_seek_internal(r, it, rec);
596 }
597
598 int reftable_reader_seek_ref(struct reftable_reader *r,
599 struct reftable_iterator *it, const char *name)
600 {
601 struct reftable_record rec = {
602 .type = BLOCK_TYPE_REF,
603 .u.ref = {
604 .refname = (char *)name,
605 },
606 };
607 return reader_seek(r, it, &rec);
608 }
609
610 int reftable_reader_seek_log_at(struct reftable_reader *r,
611 struct reftable_iterator *it, const char *name,
612 uint64_t update_index)
613 {
614 struct reftable_record rec = { .type = BLOCK_TYPE_LOG,
615 .u.log = {
616 .refname = (char *)name,
617 .update_index = update_index,
618 } };
619 return reader_seek(r, it, &rec);
620 }
621
622 int reftable_reader_seek_log(struct reftable_reader *r,
623 struct reftable_iterator *it, const char *name)
624 {
625 uint64_t max = ~((uint64_t)0);
626 return reftable_reader_seek_log_at(r, it, name, max);
627 }
628
629 void reader_close(struct reftable_reader *r)
630 {
631 block_source_close(&r->source);
632 FREE_AND_NULL(r->name);
633 }
634
635 int reftable_new_reader(struct reftable_reader **p,
636 struct reftable_block_source *src, char const *name)
637 {
638 struct reftable_reader *rd =
639 reftable_calloc(sizeof(struct reftable_reader));
640 int err = init_reader(rd, src, name);
641 if (err == 0) {
642 *p = rd;
643 } else {
644 block_source_close(src);
645 reftable_free(rd);
646 }
647 return err;
648 }
649
650 void reftable_reader_free(struct reftable_reader *r)
651 {
652 if (!r)
653 return;
654 reader_close(r);
655 reftable_free(r);
656 }
657
658 static int reftable_reader_refs_for_indexed(struct reftable_reader *r,
659 struct reftable_iterator *it,
660 uint8_t *oid)
661 {
662 struct reftable_record want = {
663 .type = BLOCK_TYPE_OBJ,
664 .u.obj = {
665 .hash_prefix = oid,
666 .hash_prefix_len = r->object_id_len,
667 },
668 };
669 struct reftable_iterator oit = { NULL };
670 struct reftable_record got = {
671 .type = BLOCK_TYPE_OBJ,
672 .u.obj = { 0 },
673 };
674 int err = 0;
675 struct indexed_table_ref_iter *itr = NULL;
676
677 /* Look through the reverse index. */
678 err = reader_seek(r, &oit, &want);
679 if (err != 0)
680 goto done;
681
682 /* read out the reftable_obj_record */
683 err = iterator_next(&oit, &got);
684 if (err < 0)
685 goto done;
686
687 if (err > 0 || memcmp(want.u.obj.hash_prefix, got.u.obj.hash_prefix,
688 r->object_id_len)) {
689 /* didn't find it; return empty iterator */
690 iterator_set_empty(it);
691 err = 0;
692 goto done;
693 }
694
695 err = new_indexed_table_ref_iter(&itr, r, oid, hash_size(r->hash_id),
696 got.u.obj.offsets,
697 got.u.obj.offset_len);
698 if (err < 0)
699 goto done;
700 got.u.obj.offsets = NULL;
701 iterator_from_indexed_table_ref_iter(it, itr);
702
703 done:
704 reftable_iterator_destroy(&oit);
705 reftable_record_release(&got);
706 return err;
707 }
708
709 static int reftable_reader_refs_for_unindexed(struct reftable_reader *r,
710 struct reftable_iterator *it,
711 uint8_t *oid)
712 {
713 struct table_iter ti_empty = TABLE_ITER_INIT;
714 struct table_iter *ti = reftable_calloc(sizeof(struct table_iter));
715 struct filtering_ref_iterator *filter = NULL;
716 struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT;
717 int oid_len = hash_size(r->hash_id);
718 int err;
719
720 *ti = ti_empty;
721 err = reader_start(r, ti, BLOCK_TYPE_REF, 0);
722 if (err < 0) {
723 reftable_free(ti);
724 return err;
725 }
726
727 filter = reftable_malloc(sizeof(struct filtering_ref_iterator));
728 *filter = empty;
729
730 strbuf_add(&filter->oid, oid, oid_len);
731 reftable_table_from_reader(&filter->tab, r);
732 filter->double_check = 0;
733 iterator_from_table_iter(&filter->it, ti);
734
735 iterator_from_filtering_ref_iterator(it, filter);
736 return 0;
737 }
738
739 int reftable_reader_refs_for(struct reftable_reader *r,
740 struct reftable_iterator *it, uint8_t *oid)
741 {
742 if (r->obj_offsets.is_present)
743 return reftable_reader_refs_for_indexed(r, it, oid);
744 return reftable_reader_refs_for_unindexed(r, it, oid);
745 }
746
747 uint64_t reftable_reader_max_update_index(struct reftable_reader *r)
748 {
749 return r->max_update_index;
750 }
751
752 uint64_t reftable_reader_min_update_index(struct reftable_reader *r)
753 {
754 return r->min_update_index;
755 }
756
757 /* generic table interface. */
758
759 static int reftable_reader_seek_void(void *tab, struct reftable_iterator *it,
760 struct reftable_record *rec)
761 {
762 return reader_seek(tab, it, rec);
763 }
764
765 static uint32_t reftable_reader_hash_id_void(void *tab)
766 {
767 return reftable_reader_hash_id(tab);
768 }
769
770 static uint64_t reftable_reader_min_update_index_void(void *tab)
771 {
772 return reftable_reader_min_update_index(tab);
773 }
774
775 static uint64_t reftable_reader_max_update_index_void(void *tab)
776 {
777 return reftable_reader_max_update_index(tab);
778 }
779
780 static struct reftable_table_vtable reader_vtable = {
781 .seek_record = reftable_reader_seek_void,
782 .hash_id = reftable_reader_hash_id_void,
783 .min_update_index = reftable_reader_min_update_index_void,
784 .max_update_index = reftable_reader_max_update_index_void,
785 };
786
787 void reftable_table_from_reader(struct reftable_table *tab,
788 struct reftable_reader *reader)
789 {
790 assert(!tab->ops);
791 tab->ops = &reader_vtable;
792 tab->table_arg = reader;
793 }
794
795
796 int reftable_reader_print_file(const char *tablename)
797 {
798 struct reftable_block_source src = { NULL };
799 int err = reftable_block_source_from_file(&src, tablename);
800 struct reftable_reader *r = NULL;
801 struct reftable_table tab = { NULL };
802 if (err < 0)
803 goto done;
804
805 err = reftable_new_reader(&r, &src, tablename);
806 if (err < 0)
807 goto done;
808
809 reftable_table_from_reader(&tab, r);
810 err = reftable_table_print(&tab);
811 done:
812 reftable_reader_free(r);
813 return err;
814 }