]> git.ipfire.org Git - thirdparty/git.git/blob - reftable/merged.c
The 19th batch
[thirdparty/git.git] / reftable / merged.c
1 /*
2 Copyright 2020 Google LLC
3
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
8
9 #include "merged.h"
10
11 #include "constants.h"
12 #include "iter.h"
13 #include "pq.h"
14 #include "reader.h"
15 #include "record.h"
16 #include "reftable-merged.h"
17 #include "reftable-error.h"
18 #include "system.h"
19
20 struct merged_subiter {
21 struct reftable_iterator iter;
22 struct reftable_record rec;
23 };
24
25 struct merged_iter {
26 struct merged_subiter *subiters;
27 struct merged_iter_pqueue pq;
28 size_t subiters_len;
29 int suppress_deletions;
30 ssize_t advance_index;
31 };
32
33 static void merged_iter_init(struct merged_iter *mi,
34 struct reftable_merged_table *mt,
35 uint8_t typ)
36 {
37 memset(mi, 0, sizeof(*mi));
38 mi->advance_index = -1;
39 mi->suppress_deletions = mt->suppress_deletions;
40
41 REFTABLE_CALLOC_ARRAY(mi->subiters, mt->readers_len);
42 for (size_t i = 0; i < mt->readers_len; i++) {
43 reftable_record_init(&mi->subiters[i].rec, typ);
44 reader_init_iter(mt->readers[i], &mi->subiters[i].iter, typ);
45 }
46 mi->subiters_len = mt->readers_len;
47 }
48
49 static void merged_iter_close(void *p)
50 {
51 struct merged_iter *mi = p;
52
53 merged_iter_pqueue_release(&mi->pq);
54 for (size_t i = 0; i < mi->subiters_len; i++) {
55 reftable_iterator_destroy(&mi->subiters[i].iter);
56 reftable_record_release(&mi->subiters[i].rec);
57 }
58 reftable_free(mi->subiters);
59 }
60
61 static int merged_iter_advance_subiter(struct merged_iter *mi, size_t idx)
62 {
63 struct pq_entry e = {
64 .index = idx,
65 .rec = &mi->subiters[idx].rec,
66 };
67 int err;
68
69 err = iterator_next(&mi->subiters[idx].iter, &mi->subiters[idx].rec);
70 if (err)
71 return err;
72
73 merged_iter_pqueue_add(&mi->pq, &e);
74 return 0;
75 }
76
77 static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want)
78 {
79 int err;
80
81 mi->advance_index = -1;
82
83 for (size_t i = 0; i < mi->subiters_len; i++) {
84 err = iterator_seek(&mi->subiters[i].iter, want);
85 if (err < 0)
86 return err;
87 if (err > 0)
88 continue;
89
90 err = merged_iter_advance_subiter(mi, i);
91 if (err < 0)
92 return err;
93 }
94
95 return 0;
96 }
97
98 static int merged_iter_next_entry(struct merged_iter *mi,
99 struct reftable_record *rec)
100 {
101 struct pq_entry entry = { 0 };
102 int err = 0, empty;
103
104 empty = merged_iter_pqueue_is_empty(mi->pq);
105
106 if (mi->advance_index >= 0) {
107 /*
108 * When there are no pqueue entries then we only have a single
109 * subiter left. There is no need to use the pqueue in that
110 * case anymore as we know that the subiter will return entries
111 * in the correct order already.
112 *
113 * While this may sound like a very specific edge case, it may
114 * happen more frequently than you think. Most repositories
115 * will end up having a single large base table that contains
116 * most of the refs. It's thus likely that we exhaust all
117 * subiters but the one from that base ref.
118 */
119 if (empty)
120 return iterator_next(&mi->subiters[mi->advance_index].iter,
121 rec);
122
123 err = merged_iter_advance_subiter(mi, mi->advance_index);
124 if (err < 0)
125 return err;
126 if (!err)
127 empty = 0;
128 mi->advance_index = -1;
129 }
130
131 if (empty)
132 return 1;
133
134 entry = merged_iter_pqueue_remove(&mi->pq);
135
136 /*
137 One can also use reftable as datacenter-local storage, where the ref
138 database is maintained in globally consistent database (eg.
139 CockroachDB or Spanner). In this scenario, replication delays together
140 with compaction may cause newer tables to contain older entries. In
141 such a deployment, the loop below must be changed to collect all
142 entries for the same key, and return new the newest one.
143 */
144 while (!merged_iter_pqueue_is_empty(mi->pq)) {
145 struct pq_entry top = merged_iter_pqueue_top(mi->pq);
146 int cmp;
147
148 cmp = reftable_record_cmp(top.rec, entry.rec);
149 if (cmp > 0)
150 break;
151
152 merged_iter_pqueue_remove(&mi->pq);
153 err = merged_iter_advance_subiter(mi, top.index);
154 if (err < 0)
155 return err;
156 }
157
158 mi->advance_index = entry.index;
159 SWAP(*rec, *entry.rec);
160 return 0;
161 }
162
163 static int merged_iter_seek_void(void *it, struct reftable_record *want)
164 {
165 return merged_iter_seek(it, want);
166 }
167
168 static int merged_iter_next_void(void *p, struct reftable_record *rec)
169 {
170 struct merged_iter *mi = p;
171 while (1) {
172 int err = merged_iter_next_entry(mi, rec);
173 if (err)
174 return err;
175 if (mi->suppress_deletions && reftable_record_is_deletion(rec))
176 continue;
177 return 0;
178 }
179 }
180
181 static struct reftable_iterator_vtable merged_iter_vtable = {
182 .seek = merged_iter_seek_void,
183 .next = &merged_iter_next_void,
184 .close = &merged_iter_close,
185 };
186
187 static void iterator_from_merged_iter(struct reftable_iterator *it,
188 struct merged_iter *mi)
189 {
190 assert(!it->ops);
191 it->iter_arg = mi;
192 it->ops = &merged_iter_vtable;
193 }
194
195 int reftable_merged_table_new(struct reftable_merged_table **dest,
196 struct reftable_reader **readers, size_t n,
197 uint32_t hash_id)
198 {
199 struct reftable_merged_table *m = NULL;
200 uint64_t last_max = 0;
201 uint64_t first_min = 0;
202
203 for (size_t i = 0; i < n; i++) {
204 uint64_t min = reftable_reader_min_update_index(readers[i]);
205 uint64_t max = reftable_reader_max_update_index(readers[i]);
206
207 if (reftable_reader_hash_id(readers[i]) != hash_id) {
208 return REFTABLE_FORMAT_ERROR;
209 }
210 if (i == 0 || min < first_min) {
211 first_min = min;
212 }
213 if (i == 0 || max > last_max) {
214 last_max = max;
215 }
216 }
217
218 REFTABLE_CALLOC_ARRAY(m, 1);
219 m->readers = readers;
220 m->readers_len = n;
221 m->min = first_min;
222 m->max = last_max;
223 m->hash_id = hash_id;
224 *dest = m;
225 return 0;
226 }
227
228 void reftable_merged_table_free(struct reftable_merged_table *mt)
229 {
230 if (!mt)
231 return;
232 reftable_free(mt);
233 }
234
235 uint64_t
236 reftable_merged_table_max_update_index(struct reftable_merged_table *mt)
237 {
238 return mt->max;
239 }
240
241 uint64_t
242 reftable_merged_table_min_update_index(struct reftable_merged_table *mt)
243 {
244 return mt->min;
245 }
246
247 void merged_table_init_iter(struct reftable_merged_table *mt,
248 struct reftable_iterator *it,
249 uint8_t typ)
250 {
251 struct merged_iter *mi = reftable_malloc(sizeof(*mi));
252 merged_iter_init(mi, mt, typ);
253 iterator_from_merged_iter(it, mi);
254 }
255
256 void reftable_merged_table_init_ref_iterator(struct reftable_merged_table *mt,
257 struct reftable_iterator *it)
258 {
259 merged_table_init_iter(mt, it, BLOCK_TYPE_REF);
260 }
261
262 void reftable_merged_table_init_log_iterator(struct reftable_merged_table *mt,
263 struct reftable_iterator *it)
264 {
265 merged_table_init_iter(mt, it, BLOCK_TYPE_LOG);
266 }
267
268 uint32_t reftable_merged_table_hash_id(struct reftable_merged_table *mt)
269 {
270 return mt->hash_id;
271 }