]> git.ipfire.org Git - thirdparty/git.git/blob - reftable/reader.c
reftable: handle null refnames in reftable_ref_record_equal
[thirdparty/git.git] / reftable / reader.c
1 /*
2 Copyright 2020 Google LLC
3
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
8
9 #include "reader.h"
10
11 #include "system.h"
12 #include "block.h"
13 #include "constants.h"
14 #include "generic.h"
15 #include "iter.h"
16 #include "record.h"
17 #include "reftable-error.h"
18 #include "reftable-generic.h"
19 #include "tree.h"
20
21 uint64_t block_source_size(struct reftable_block_source *source)
22 {
23 return source->ops->size(source->arg);
24 }
25
26 int block_source_read_block(struct reftable_block_source *source,
27 struct reftable_block *dest, uint64_t off,
28 uint32_t size)
29 {
30 int result = source->ops->read_block(source->arg, dest, off, size);
31 dest->source = *source;
32 return result;
33 }
34
35 void block_source_close(struct reftable_block_source *source)
36 {
37 if (!source->ops) {
38 return;
39 }
40
41 source->ops->close(source->arg);
42 source->ops = NULL;
43 }
44
45 static struct reftable_reader_offsets *
46 reader_offsets_for(struct reftable_reader *r, uint8_t typ)
47 {
48 switch (typ) {
49 case BLOCK_TYPE_REF:
50 return &r->ref_offsets;
51 case BLOCK_TYPE_LOG:
52 return &r->log_offsets;
53 case BLOCK_TYPE_OBJ:
54 return &r->obj_offsets;
55 }
56 abort();
57 }
58
59 static int reader_get_block(struct reftable_reader *r,
60 struct reftable_block *dest, uint64_t off,
61 uint32_t sz)
62 {
63 if (off >= r->size)
64 return 0;
65
66 if (off + sz > r->size) {
67 sz = r->size - off;
68 }
69
70 return block_source_read_block(&r->source, dest, off, sz);
71 }
72
73 uint32_t reftable_reader_hash_id(struct reftable_reader *r)
74 {
75 return r->hash_id;
76 }
77
78 const char *reader_name(struct reftable_reader *r)
79 {
80 return r->name;
81 }
82
83 static int parse_footer(struct reftable_reader *r, uint8_t *footer,
84 uint8_t *header)
85 {
86 uint8_t *f = footer;
87 uint8_t first_block_typ;
88 int err = 0;
89 uint32_t computed_crc;
90 uint32_t file_crc;
91
92 if (memcmp(f, "REFT", 4)) {
93 err = REFTABLE_FORMAT_ERROR;
94 goto done;
95 }
96 f += 4;
97
98 if (memcmp(footer, header, header_size(r->version))) {
99 err = REFTABLE_FORMAT_ERROR;
100 goto done;
101 }
102
103 f++;
104 r->block_size = get_be24(f);
105
106 f += 3;
107 r->min_update_index = get_be64(f);
108 f += 8;
109 r->max_update_index = get_be64(f);
110 f += 8;
111
112 if (r->version == 1) {
113 r->hash_id = GIT_SHA1_FORMAT_ID;
114 } else {
115 r->hash_id = get_be32(f);
116 switch (r->hash_id) {
117 case GIT_SHA1_FORMAT_ID:
118 break;
119 case GIT_SHA256_FORMAT_ID:
120 break;
121 default:
122 err = REFTABLE_FORMAT_ERROR;
123 goto done;
124 }
125 f += 4;
126 }
127
128 r->ref_offsets.index_offset = get_be64(f);
129 f += 8;
130
131 r->obj_offsets.offset = get_be64(f);
132 f += 8;
133
134 r->object_id_len = r->obj_offsets.offset & ((1 << 5) - 1);
135 r->obj_offsets.offset >>= 5;
136
137 r->obj_offsets.index_offset = get_be64(f);
138 f += 8;
139 r->log_offsets.offset = get_be64(f);
140 f += 8;
141 r->log_offsets.index_offset = get_be64(f);
142 f += 8;
143
144 computed_crc = crc32(0, footer, f - footer);
145 file_crc = get_be32(f);
146 f += 4;
147 if (computed_crc != file_crc) {
148 err = REFTABLE_FORMAT_ERROR;
149 goto done;
150 }
151
152 first_block_typ = header[header_size(r->version)];
153 r->ref_offsets.is_present = (first_block_typ == BLOCK_TYPE_REF);
154 r->ref_offsets.offset = 0;
155 r->log_offsets.is_present = (first_block_typ == BLOCK_TYPE_LOG ||
156 r->log_offsets.offset > 0);
157 r->obj_offsets.is_present = r->obj_offsets.offset > 0;
158 err = 0;
159 done:
160 return err;
161 }
162
163 int init_reader(struct reftable_reader *r, struct reftable_block_source *source,
164 const char *name)
165 {
166 struct reftable_block footer = { NULL };
167 struct reftable_block header = { NULL };
168 int err = 0;
169 uint64_t file_size = block_source_size(source);
170
171 /* Need +1 to read type of first block. */
172 uint32_t read_size = header_size(2) + 1; /* read v2 because it's larger. */
173 memset(r, 0, sizeof(struct reftable_reader));
174
175 if (read_size > file_size) {
176 err = REFTABLE_FORMAT_ERROR;
177 goto done;
178 }
179
180 err = block_source_read_block(source, &header, 0, read_size);
181 if (err != read_size) {
182 err = REFTABLE_IO_ERROR;
183 goto done;
184 }
185
186 if (memcmp(header.data, "REFT", 4)) {
187 err = REFTABLE_FORMAT_ERROR;
188 goto done;
189 }
190 r->version = header.data[4];
191 if (r->version != 1 && r->version != 2) {
192 err = REFTABLE_FORMAT_ERROR;
193 goto done;
194 }
195
196 r->size = file_size - footer_size(r->version);
197 r->source = *source;
198 r->name = xstrdup(name);
199 r->hash_id = 0;
200
201 err = block_source_read_block(source, &footer, r->size,
202 footer_size(r->version));
203 if (err != footer_size(r->version)) {
204 err = REFTABLE_IO_ERROR;
205 goto done;
206 }
207
208 err = parse_footer(r, footer.data, header.data);
209 done:
210 reftable_block_done(&footer);
211 reftable_block_done(&header);
212 return err;
213 }
214
215 struct table_iter {
216 struct reftable_reader *r;
217 uint8_t typ;
218 uint64_t block_off;
219 struct block_iter bi;
220 int is_finished;
221 };
222 #define TABLE_ITER_INIT \
223 { \
224 .bi = {.last_key = STRBUF_INIT } \
225 }
226
227 static void table_iter_copy_from(struct table_iter *dest,
228 struct table_iter *src)
229 {
230 dest->r = src->r;
231 dest->typ = src->typ;
232 dest->block_off = src->block_off;
233 dest->is_finished = src->is_finished;
234 block_iter_copy_from(&dest->bi, &src->bi);
235 }
236
237 static int table_iter_next_in_block(struct table_iter *ti,
238 struct reftable_record *rec)
239 {
240 int res = block_iter_next(&ti->bi, rec);
241 if (res == 0 && reftable_record_type(rec) == BLOCK_TYPE_REF) {
242 ((struct reftable_ref_record *)rec->data)->update_index +=
243 ti->r->min_update_index;
244 }
245
246 return res;
247 }
248
249 static void table_iter_block_done(struct table_iter *ti)
250 {
251 if (!ti->bi.br) {
252 return;
253 }
254 reftable_block_done(&ti->bi.br->block);
255 FREE_AND_NULL(ti->bi.br);
256
257 ti->bi.last_key.len = 0;
258 ti->bi.next_off = 0;
259 }
260
261 static int32_t extract_block_size(uint8_t *data, uint8_t *typ, uint64_t off,
262 int version)
263 {
264 int32_t result = 0;
265
266 if (off == 0) {
267 data += header_size(version);
268 }
269
270 *typ = data[0];
271 if (reftable_is_block_type(*typ)) {
272 result = get_be24(data + 1);
273 }
274 return result;
275 }
276
277 int reader_init_block_reader(struct reftable_reader *r, struct block_reader *br,
278 uint64_t next_off, uint8_t want_typ)
279 {
280 int32_t guess_block_size = r->block_size ? r->block_size :
281 DEFAULT_BLOCK_SIZE;
282 struct reftable_block block = { NULL };
283 uint8_t block_typ = 0;
284 int err = 0;
285 uint32_t header_off = next_off ? 0 : header_size(r->version);
286 int32_t block_size = 0;
287
288 if (next_off >= r->size)
289 return 1;
290
291 err = reader_get_block(r, &block, next_off, guess_block_size);
292 if (err < 0)
293 goto done;
294
295 block_size = extract_block_size(block.data, &block_typ, next_off,
296 r->version);
297 if (block_size < 0) {
298 err = block_size;
299 goto done;
300 }
301 if (want_typ != BLOCK_TYPE_ANY && block_typ != want_typ) {
302 err = 1;
303 goto done;
304 }
305
306 if (block_size > guess_block_size) {
307 reftable_block_done(&block);
308 err = reader_get_block(r, &block, next_off, block_size);
309 if (err < 0) {
310 goto done;
311 }
312 }
313
314 err = block_reader_init(br, &block, header_off, r->block_size,
315 hash_size(r->hash_id));
316 done:
317 reftable_block_done(&block);
318
319 return err;
320 }
321
322 static int table_iter_next_block(struct table_iter *dest,
323 struct table_iter *src)
324 {
325 uint64_t next_block_off = src->block_off + src->bi.br->full_block_size;
326 struct block_reader br = { 0 };
327 int err = 0;
328
329 dest->r = src->r;
330 dest->typ = src->typ;
331 dest->block_off = next_block_off;
332
333 err = reader_init_block_reader(src->r, &br, next_block_off, src->typ);
334 if (err > 0) {
335 dest->is_finished = 1;
336 return 1;
337 }
338 if (err != 0)
339 return err;
340 else {
341 struct block_reader *brp =
342 reftable_malloc(sizeof(struct block_reader));
343 *brp = br;
344
345 dest->is_finished = 0;
346 block_reader_start(brp, &dest->bi);
347 }
348 return 0;
349 }
350
351 static int table_iter_next(struct table_iter *ti, struct reftable_record *rec)
352 {
353 if (reftable_record_type(rec) != ti->typ)
354 return REFTABLE_API_ERROR;
355
356 while (1) {
357 struct table_iter next = TABLE_ITER_INIT;
358 int err = 0;
359 if (ti->is_finished) {
360 return 1;
361 }
362
363 err = table_iter_next_in_block(ti, rec);
364 if (err <= 0) {
365 return err;
366 }
367
368 err = table_iter_next_block(&next, ti);
369 if (err != 0) {
370 ti->is_finished = 1;
371 }
372 table_iter_block_done(ti);
373 if (err != 0) {
374 return err;
375 }
376 table_iter_copy_from(ti, &next);
377 block_iter_close(&next.bi);
378 }
379 }
380
381 static int table_iter_next_void(void *ti, struct reftable_record *rec)
382 {
383 return table_iter_next(ti, rec);
384 }
385
386 static void table_iter_close(void *p)
387 {
388 struct table_iter *ti = p;
389 table_iter_block_done(ti);
390 block_iter_close(&ti->bi);
391 }
392
393 static struct reftable_iterator_vtable table_iter_vtable = {
394 .next = &table_iter_next_void,
395 .close = &table_iter_close,
396 };
397
398 static void iterator_from_table_iter(struct reftable_iterator *it,
399 struct table_iter *ti)
400 {
401 assert(!it->ops);
402 it->iter_arg = ti;
403 it->ops = &table_iter_vtable;
404 }
405
406 static int reader_table_iter_at(struct reftable_reader *r,
407 struct table_iter *ti, uint64_t off,
408 uint8_t typ)
409 {
410 struct block_reader br = { 0 };
411 struct block_reader *brp = NULL;
412
413 int err = reader_init_block_reader(r, &br, off, typ);
414 if (err != 0)
415 return err;
416
417 brp = reftable_malloc(sizeof(struct block_reader));
418 *brp = br;
419 ti->r = r;
420 ti->typ = block_reader_type(brp);
421 ti->block_off = off;
422 block_reader_start(brp, &ti->bi);
423 return 0;
424 }
425
426 static int reader_start(struct reftable_reader *r, struct table_iter *ti,
427 uint8_t typ, int index)
428 {
429 struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
430 uint64_t off = offs->offset;
431 if (index) {
432 off = offs->index_offset;
433 if (off == 0) {
434 return 1;
435 }
436 typ = BLOCK_TYPE_INDEX;
437 }
438
439 return reader_table_iter_at(r, ti, off, typ);
440 }
441
442 static int reader_seek_linear(struct reftable_reader *r, struct table_iter *ti,
443 struct reftable_record *want)
444 {
445 struct reftable_record rec =
446 reftable_new_record(reftable_record_type(want));
447 struct strbuf want_key = STRBUF_INIT;
448 struct strbuf got_key = STRBUF_INIT;
449 struct table_iter next = TABLE_ITER_INIT;
450 int err = -1;
451
452 reftable_record_key(want, &want_key);
453
454 while (1) {
455 err = table_iter_next_block(&next, ti);
456 if (err < 0)
457 goto done;
458
459 if (err > 0) {
460 break;
461 }
462
463 err = block_reader_first_key(next.bi.br, &got_key);
464 if (err < 0)
465 goto done;
466
467 if (strbuf_cmp(&got_key, &want_key) > 0) {
468 table_iter_block_done(&next);
469 break;
470 }
471
472 table_iter_block_done(ti);
473 table_iter_copy_from(ti, &next);
474 }
475
476 err = block_iter_seek(&ti->bi, &want_key);
477 if (err < 0)
478 goto done;
479 err = 0;
480
481 done:
482 block_iter_close(&next.bi);
483 reftable_record_destroy(&rec);
484 strbuf_release(&want_key);
485 strbuf_release(&got_key);
486 return err;
487 }
488
489 static int reader_seek_indexed(struct reftable_reader *r,
490 struct reftable_iterator *it,
491 struct reftable_record *rec)
492 {
493 struct reftable_index_record want_index = { .last_key = STRBUF_INIT };
494 struct reftable_record want_index_rec = { NULL };
495 struct reftable_index_record index_result = { .last_key = STRBUF_INIT };
496 struct reftable_record index_result_rec = { NULL };
497 struct table_iter index_iter = TABLE_ITER_INIT;
498 struct table_iter next = TABLE_ITER_INIT;
499 int err = 0;
500
501 reftable_record_key(rec, &want_index.last_key);
502 reftable_record_from_index(&want_index_rec, &want_index);
503 reftable_record_from_index(&index_result_rec, &index_result);
504
505 err = reader_start(r, &index_iter, reftable_record_type(rec), 1);
506 if (err < 0)
507 goto done;
508
509 err = reader_seek_linear(r, &index_iter, &want_index_rec);
510 while (1) {
511 err = table_iter_next(&index_iter, &index_result_rec);
512 table_iter_block_done(&index_iter);
513 if (err != 0)
514 goto done;
515
516 err = reader_table_iter_at(r, &next, index_result.offset, 0);
517 if (err != 0)
518 goto done;
519
520 err = block_iter_seek(&next.bi, &want_index.last_key);
521 if (err < 0)
522 goto done;
523
524 if (next.typ == reftable_record_type(rec)) {
525 err = 0;
526 break;
527 }
528
529 if (next.typ != BLOCK_TYPE_INDEX) {
530 err = REFTABLE_FORMAT_ERROR;
531 break;
532 }
533
534 table_iter_copy_from(&index_iter, &next);
535 }
536
537 if (err == 0) {
538 struct table_iter empty = TABLE_ITER_INIT;
539 struct table_iter *malloced =
540 reftable_calloc(sizeof(struct table_iter));
541 *malloced = empty;
542 table_iter_copy_from(malloced, &next);
543 iterator_from_table_iter(it, malloced);
544 }
545 done:
546 block_iter_close(&next.bi);
547 table_iter_close(&index_iter);
548 reftable_record_release(&want_index_rec);
549 reftable_record_release(&index_result_rec);
550 return err;
551 }
552
553 static int reader_seek_internal(struct reftable_reader *r,
554 struct reftable_iterator *it,
555 struct reftable_record *rec)
556 {
557 struct reftable_reader_offsets *offs =
558 reader_offsets_for(r, reftable_record_type(rec));
559 uint64_t idx = offs->index_offset;
560 struct table_iter ti = TABLE_ITER_INIT;
561 int err = 0;
562 if (idx > 0)
563 return reader_seek_indexed(r, it, rec);
564
565 err = reader_start(r, &ti, reftable_record_type(rec), 0);
566 if (err < 0)
567 return err;
568 err = reader_seek_linear(r, &ti, rec);
569 if (err < 0)
570 return err;
571 else {
572 struct table_iter *p =
573 reftable_malloc(sizeof(struct table_iter));
574 *p = ti;
575 iterator_from_table_iter(it, p);
576 }
577
578 return 0;
579 }
580
581 static int reader_seek(struct reftable_reader *r, struct reftable_iterator *it,
582 struct reftable_record *rec)
583 {
584 uint8_t typ = reftable_record_type(rec);
585
586 struct reftable_reader_offsets *offs = reader_offsets_for(r, typ);
587 if (!offs->is_present) {
588 iterator_set_empty(it);
589 return 0;
590 }
591
592 return reader_seek_internal(r, it, rec);
593 }
594
595 int reftable_reader_seek_ref(struct reftable_reader *r,
596 struct reftable_iterator *it, const char *name)
597 {
598 struct reftable_ref_record ref = {
599 .refname = (char *)name,
600 };
601 struct reftable_record rec = { NULL };
602 reftable_record_from_ref(&rec, &ref);
603 return reader_seek(r, it, &rec);
604 }
605
606 int reftable_reader_seek_log_at(struct reftable_reader *r,
607 struct reftable_iterator *it, const char *name,
608 uint64_t update_index)
609 {
610 struct reftable_log_record log = {
611 .refname = (char *)name,
612 .update_index = update_index,
613 };
614 struct reftable_record rec = { NULL };
615 reftable_record_from_log(&rec, &log);
616 return reader_seek(r, it, &rec);
617 }
618
619 int reftable_reader_seek_log(struct reftable_reader *r,
620 struct reftable_iterator *it, const char *name)
621 {
622 uint64_t max = ~((uint64_t)0);
623 return reftable_reader_seek_log_at(r, it, name, max);
624 }
625
626 void reader_close(struct reftable_reader *r)
627 {
628 block_source_close(&r->source);
629 FREE_AND_NULL(r->name);
630 }
631
632 int reftable_new_reader(struct reftable_reader **p,
633 struct reftable_block_source *src, char const *name)
634 {
635 struct reftable_reader *rd =
636 reftable_calloc(sizeof(struct reftable_reader));
637 int err = init_reader(rd, src, name);
638 if (err == 0) {
639 *p = rd;
640 } else {
641 block_source_close(src);
642 reftable_free(rd);
643 }
644 return err;
645 }
646
647 void reftable_reader_free(struct reftable_reader *r)
648 {
649 if (!r)
650 return;
651 reader_close(r);
652 reftable_free(r);
653 }
654
655 static int reftable_reader_refs_for_indexed(struct reftable_reader *r,
656 struct reftable_iterator *it,
657 uint8_t *oid)
658 {
659 struct reftable_obj_record want = {
660 .hash_prefix = oid,
661 .hash_prefix_len = r->object_id_len,
662 };
663 struct reftable_record want_rec = { NULL };
664 struct reftable_iterator oit = { NULL };
665 struct reftable_obj_record got = { NULL };
666 struct reftable_record got_rec = { NULL };
667 int err = 0;
668 struct indexed_table_ref_iter *itr = NULL;
669
670 /* Look through the reverse index. */
671 reftable_record_from_obj(&want_rec, &want);
672 err = reader_seek(r, &oit, &want_rec);
673 if (err != 0)
674 goto done;
675
676 /* read out the reftable_obj_record */
677 reftable_record_from_obj(&got_rec, &got);
678 err = iterator_next(&oit, &got_rec);
679 if (err < 0)
680 goto done;
681
682 if (err > 0 ||
683 memcmp(want.hash_prefix, got.hash_prefix, r->object_id_len)) {
684 /* didn't find it; return empty iterator */
685 iterator_set_empty(it);
686 err = 0;
687 goto done;
688 }
689
690 err = new_indexed_table_ref_iter(&itr, r, oid, hash_size(r->hash_id),
691 got.offsets, got.offset_len);
692 if (err < 0)
693 goto done;
694 got.offsets = NULL;
695 iterator_from_indexed_table_ref_iter(it, itr);
696
697 done:
698 reftable_iterator_destroy(&oit);
699 reftable_record_release(&got_rec);
700 return err;
701 }
702
703 static int reftable_reader_refs_for_unindexed(struct reftable_reader *r,
704 struct reftable_iterator *it,
705 uint8_t *oid)
706 {
707 struct table_iter ti_empty = TABLE_ITER_INIT;
708 struct table_iter *ti = reftable_calloc(sizeof(struct table_iter));
709 struct filtering_ref_iterator *filter = NULL;
710 struct filtering_ref_iterator empty = FILTERING_REF_ITERATOR_INIT;
711 int oid_len = hash_size(r->hash_id);
712 int err;
713
714 *ti = ti_empty;
715 err = reader_start(r, ti, BLOCK_TYPE_REF, 0);
716 if (err < 0) {
717 reftable_free(ti);
718 return err;
719 }
720
721 filter = reftable_malloc(sizeof(struct filtering_ref_iterator));
722 *filter = empty;
723
724 strbuf_add(&filter->oid, oid, oid_len);
725 reftable_table_from_reader(&filter->tab, r);
726 filter->double_check = 0;
727 iterator_from_table_iter(&filter->it, ti);
728
729 iterator_from_filtering_ref_iterator(it, filter);
730 return 0;
731 }
732
733 int reftable_reader_refs_for(struct reftable_reader *r,
734 struct reftable_iterator *it, uint8_t *oid)
735 {
736 if (r->obj_offsets.is_present)
737 return reftable_reader_refs_for_indexed(r, it, oid);
738 return reftable_reader_refs_for_unindexed(r, it, oid);
739 }
740
741 uint64_t reftable_reader_max_update_index(struct reftable_reader *r)
742 {
743 return r->max_update_index;
744 }
745
746 uint64_t reftable_reader_min_update_index(struct reftable_reader *r)
747 {
748 return r->min_update_index;
749 }
750
751 /* generic table interface. */
752
753 static int reftable_reader_seek_void(void *tab, struct reftable_iterator *it,
754 struct reftable_record *rec)
755 {
756 return reader_seek(tab, it, rec);
757 }
758
759 static uint32_t reftable_reader_hash_id_void(void *tab)
760 {
761 return reftable_reader_hash_id(tab);
762 }
763
764 static uint64_t reftable_reader_min_update_index_void(void *tab)
765 {
766 return reftable_reader_min_update_index(tab);
767 }
768
769 static uint64_t reftable_reader_max_update_index_void(void *tab)
770 {
771 return reftable_reader_max_update_index(tab);
772 }
773
774 static struct reftable_table_vtable reader_vtable = {
775 .seek_record = reftable_reader_seek_void,
776 .hash_id = reftable_reader_hash_id_void,
777 .min_update_index = reftable_reader_min_update_index_void,
778 .max_update_index = reftable_reader_max_update_index_void,
779 };
780
781 void reftable_table_from_reader(struct reftable_table *tab,
782 struct reftable_reader *reader)
783 {
784 assert(!tab->ops);
785 tab->ops = &reader_vtable;
786 tab->table_arg = reader;
787 }
788
789
790 int reftable_reader_print_file(const char *tablename)
791 {
792 struct reftable_block_source src = { NULL };
793 int err = reftable_block_source_from_file(&src, tablename);
794 struct reftable_reader *r = NULL;
795 struct reftable_table tab = { NULL };
796 if (err < 0)
797 goto done;
798
799 err = reftable_new_reader(&r, &src, tablename);
800 if (err < 0)
801 goto done;
802
803 reftable_table_from_reader(&tab, r);
804 err = reftable_table_print(&tab);
805 done:
806 reftable_reader_free(r);
807 return err;
808 }