]>
Commit | Line | Data |
---|---|---|
e581fd72 HWN |
1 | /* |
2 | Copyright 2020 Google LLC | |
3 | ||
4 | Use of this source code is governed by a BSD-style | |
5 | license that can be found in the LICENSE file or at | |
6 | https://developers.google.com/open-source/licenses/bsd | |
7 | */ | |
8 | ||
9 | #include "block.h" | |
10 | ||
11 | #include "blocksource.h" | |
12 | #include "constants.h" | |
13 | #include "record.h" | |
14 | #include "reftable-error.h" | |
15 | #include "system.h" | |
16 | #include <zlib.h> | |
17 | ||
18 | int header_size(int version) | |
19 | { | |
20 | switch (version) { | |
21 | case 1: | |
22 | return 24; | |
23 | case 2: | |
24 | return 28; | |
25 | } | |
26 | abort(); | |
27 | } | |
28 | ||
29 | int footer_size(int version) | |
30 | { | |
31 | switch (version) { | |
32 | case 1: | |
33 | return 68; | |
34 | case 2: | |
35 | return 72; | |
36 | } | |
37 | abort(); | |
38 | } | |
39 | ||
40 | static int block_writer_register_restart(struct block_writer *w, int n, | |
41 | int is_restart, struct strbuf *key) | |
42 | { | |
43 | int rlen = w->restart_len; | |
44 | if (rlen >= MAX_RESTARTS) { | |
45 | is_restart = 0; | |
46 | } | |
47 | ||
48 | if (is_restart) { | |
49 | rlen++; | |
50 | } | |
51 | if (2 + 3 * rlen + n > w->block_size - w->next) | |
52 | return -1; | |
53 | if (is_restart) { | |
f6b58c1b | 54 | REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap); |
e581fd72 HWN |
55 | w->restarts[w->restart_len++] = w->next; |
56 | } | |
57 | ||
58 | w->next += n; | |
59 | ||
60 | strbuf_reset(&w->last_key); | |
61 | strbuf_addbuf(&w->last_key, key); | |
62 | w->entries++; | |
63 | return 0; | |
64 | } | |
65 | ||
66 | void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf, | |
67 | uint32_t block_size, uint32_t header_off, int hash_size) | |
68 | { | |
69 | bw->buf = buf; | |
70 | bw->hash_size = hash_size; | |
71 | bw->block_size = block_size; | |
72 | bw->header_off = header_off; | |
73 | bw->buf[header_off] = typ; | |
74 | bw->next = header_off + 4; | |
75 | bw->restart_interval = 16; | |
76 | bw->entries = 0; | |
77 | bw->restart_len = 0; | |
78 | bw->last_key.len = 0; | |
79 | } | |
80 | ||
81 | uint8_t block_writer_type(struct block_writer *bw) | |
82 | { | |
83 | return bw->buf[bw->header_off]; | |
84 | } | |
85 | ||
45c2fcc2 HWN |
86 | /* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on |
87 | success. Returns REFTABLE_API_ERROR if attempting to write a record with | |
88 | empty key. */ | |
e581fd72 HWN |
89 | int block_writer_add(struct block_writer *w, struct reftable_record *rec) |
90 | { | |
91 | struct strbuf empty = STRBUF_INIT; | |
92 | struct strbuf last = | |
93 | w->entries % w->restart_interval == 0 ? empty : w->last_key; | |
94 | struct string_view out = { | |
95 | .buf = w->buf + w->next, | |
96 | .len = w->block_size - w->next, | |
97 | }; | |
98 | ||
99 | struct string_view start = out; | |
100 | ||
101 | int is_restart = 0; | |
102 | struct strbuf key = STRBUF_INIT; | |
103 | int n = 0; | |
45c2fcc2 | 104 | int err = -1; |
e581fd72 HWN |
105 | |
106 | reftable_record_key(rec, &key); | |
45c2fcc2 HWN |
107 | if (!key.len) { |
108 | err = REFTABLE_API_ERROR; | |
109 | goto done; | |
110 | } | |
111 | ||
e581fd72 HWN |
112 | n = reftable_encode_key(&is_restart, out, last, key, |
113 | reftable_record_val_type(rec)); | |
114 | if (n < 0) | |
115 | goto done; | |
116 | string_view_consume(&out, n); | |
117 | ||
118 | n = reftable_record_encode(rec, out, w->hash_size); | |
119 | if (n < 0) | |
120 | goto done; | |
121 | string_view_consume(&out, n); | |
122 | ||
45c2fcc2 HWN |
123 | err = block_writer_register_restart(w, start.len - out.len, is_restart, |
124 | &key); | |
e581fd72 HWN |
125 | done: |
126 | strbuf_release(&key); | |
45c2fcc2 | 127 | return err; |
e581fd72 HWN |
128 | } |
129 | ||
130 | int block_writer_finish(struct block_writer *w) | |
131 | { | |
132 | int i; | |
133 | for (i = 0; i < w->restart_len; i++) { | |
134 | put_be24(w->buf + w->next, w->restarts[i]); | |
135 | w->next += 3; | |
136 | } | |
137 | ||
138 | put_be16(w->buf + w->next, w->restart_len); | |
139 | w->next += 2; | |
140 | put_be24(w->buf + 1 + w->header_off, w->next); | |
141 | ||
142 | if (block_writer_type(w) == BLOCK_TYPE_LOG) { | |
143 | int block_header_skip = 4 + w->header_off; | |
144 | uLongf src_len = w->next - block_header_skip; | |
145 | uLongf dest_cap = src_len * 1.001 + 12; | |
b4ff12c8 PS |
146 | uint8_t *compressed; |
147 | ||
148 | REFTABLE_ALLOC_ARRAY(compressed, dest_cap); | |
e581fd72 | 149 | |
e581fd72 HWN |
150 | while (1) { |
151 | uLongf out_dest_len = dest_cap; | |
152 | int zresult = compress2(compressed, &out_dest_len, | |
153 | w->buf + block_header_skip, | |
154 | src_len, 9); | |
155 | if (zresult == Z_BUF_ERROR && dest_cap < LONG_MAX) { | |
156 | dest_cap *= 2; | |
157 | compressed = | |
158 | reftable_realloc(compressed, dest_cap); | |
159 | if (compressed) | |
160 | continue; | |
161 | } | |
162 | ||
163 | if (Z_OK != zresult) { | |
164 | reftable_free(compressed); | |
165 | return REFTABLE_ZLIB_ERROR; | |
166 | } | |
167 | ||
168 | memcpy(w->buf + block_header_skip, compressed, | |
169 | out_dest_len); | |
170 | w->next = out_dest_len + block_header_skip; | |
171 | reftable_free(compressed); | |
172 | break; | |
173 | } | |
174 | } | |
175 | return w->next; | |
176 | } | |
177 | ||
e581fd72 HWN |
178 | int block_reader_init(struct block_reader *br, struct reftable_block *block, |
179 | uint32_t header_off, uint32_t table_block_size, | |
180 | int hash_size) | |
181 | { | |
182 | uint32_t full_block_size = table_block_size; | |
183 | uint8_t typ = block->data[header_off]; | |
184 | uint32_t sz = get_be24(block->data + header_off + 1); | |
24d4d38c | 185 | int err = 0; |
e581fd72 HWN |
186 | uint16_t restart_count = 0; |
187 | uint32_t restart_start = 0; | |
188 | uint8_t *restart_bytes = NULL; | |
189 | ||
b00bcb7c PS |
190 | reftable_block_done(&br->block); |
191 | ||
24d4d38c HWN |
192 | if (!reftable_is_block_type(typ)) { |
193 | err = REFTABLE_FORMAT_ERROR; | |
194 | goto done; | |
195 | } | |
e581fd72 HWN |
196 | |
197 | if (typ == BLOCK_TYPE_LOG) { | |
198 | int block_header_skip = 4 + header_off; | |
199 | uLongf dst_len = sz - block_header_skip; /* total size of dest | |
200 | buffer. */ | |
201 | uLongf src_len = block->len - block_header_skip; | |
b4ff12c8 PS |
202 | |
203 | /* Log blocks specify the *uncompressed* size in their header. */ | |
dd347bbc PS |
204 | REFTABLE_ALLOC_GROW(br->uncompressed_data, sz, |
205 | br->uncompressed_cap); | |
e581fd72 HWN |
206 | |
207 | /* Copy over the block header verbatim. It's not compressed. */ | |
dd347bbc | 208 | memcpy(br->uncompressed_data, block->data, block_header_skip); |
e581fd72 HWN |
209 | |
210 | /* Uncompress */ | |
211 | if (Z_OK != | |
dd347bbc | 212 | uncompress2(br->uncompressed_data + block_header_skip, &dst_len, |
e581fd72 | 213 | block->data + block_header_skip, &src_len)) { |
24d4d38c HWN |
214 | err = REFTABLE_ZLIB_ERROR; |
215 | goto done; | |
e581fd72 HWN |
216 | } |
217 | ||
24d4d38c HWN |
218 | if (dst_len + block_header_skip != sz) { |
219 | err = REFTABLE_FORMAT_ERROR; | |
220 | goto done; | |
221 | } | |
e581fd72 HWN |
222 | |
223 | /* We're done with the input data. */ | |
224 | reftable_block_done(block); | |
dd347bbc | 225 | block->data = br->uncompressed_data; |
e581fd72 | 226 | block->len = sz; |
e581fd72 HWN |
227 | full_block_size = src_len + block_header_skip; |
228 | } else if (full_block_size == 0) { | |
229 | full_block_size = sz; | |
230 | } else if (sz < full_block_size && sz < block->len && | |
231 | block->data[sz] != 0) { | |
232 | /* If the block is smaller than the full block size, it is | |
233 | padded (data followed by '\0') or the next block is | |
234 | unaligned. */ | |
235 | full_block_size = sz; | |
236 | } | |
237 | ||
238 | restart_count = get_be16(block->data + sz - 2); | |
239 | restart_start = sz - 2 - 3 * restart_count; | |
240 | restart_bytes = block->data + restart_start; | |
241 | ||
242 | /* transfer ownership. */ | |
243 | br->block = *block; | |
244 | block->data = NULL; | |
245 | block->len = 0; | |
246 | ||
247 | br->hash_size = hash_size; | |
248 | br->block_len = restart_start; | |
249 | br->full_block_size = full_block_size; | |
250 | br->header_off = header_off; | |
251 | br->restart_count = restart_count; | |
252 | br->restart_bytes = restart_bytes; | |
253 | ||
24d4d38c | 254 | done: |
24d4d38c | 255 | return err; |
e581fd72 HWN |
256 | } |
257 | ||
b371221a PS |
258 | void block_reader_release(struct block_reader *br) |
259 | { | |
dd347bbc | 260 | reftable_free(br->uncompressed_data); |
b371221a PS |
261 | reftable_block_done(&br->block); |
262 | } | |
263 | ||
bcdc586d | 264 | uint8_t block_reader_type(const struct block_reader *r) |
aac8c03c PS |
265 | { |
266 | return r->block.data[r->header_off]; | |
267 | } | |
268 | ||
bcdc586d | 269 | int block_reader_first_key(const struct block_reader *br, struct strbuf *key) |
aac8c03c PS |
270 | { |
271 | int off = br->header_off + 4, n; | |
272 | struct string_view in = { | |
273 | .buf = br->block.data + off, | |
274 | .len = br->block_len - off, | |
275 | }; | |
276 | uint8_t extra = 0; | |
277 | ||
278 | strbuf_reset(key); | |
279 | ||
280 | n = reftable_decode_key(key, &extra, in); | |
281 | if (n < 0) | |
282 | return n; | |
283 | if (!key->len) | |
284 | return REFTABLE_FORMAT_ERROR; | |
285 | ||
286 | return 0; | |
287 | } | |
288 | ||
bcdc586d | 289 | static uint32_t block_reader_restart_offset(const struct block_reader *br, int i) |
e581fd72 HWN |
290 | { |
291 | return get_be24(br->restart_bytes + 3 * i); | |
292 | } | |
293 | ||
bcdc586d | 294 | void block_iter_seek_start(struct block_iter *it, const struct block_reader *br) |
e581fd72 | 295 | { |
bcdc586d PS |
296 | it->block = br->block.data; |
297 | it->block_len = br->block_len; | |
298 | it->hash_size = br->hash_size; | |
e581fd72 HWN |
299 | strbuf_reset(&it->last_key); |
300 | it->next_off = br->header_off + 4; | |
301 | } | |
302 | ||
77307a61 | 303 | struct restart_needle_less_args { |
e581fd72 | 304 | int error; |
77307a61 | 305 | struct strbuf needle; |
bcdc586d | 306 | const struct block_reader *reader; |
e581fd72 HWN |
307 | }; |
308 | ||
77307a61 | 309 | static int restart_needle_less(size_t idx, void *_args) |
e581fd72 | 310 | { |
77307a61 PS |
311 | struct restart_needle_less_args *args = _args; |
312 | uint32_t off = block_reader_restart_offset(args->reader, idx); | |
e581fd72 | 313 | struct string_view in = { |
77307a61 PS |
314 | .buf = args->reader->block.data + off, |
315 | .len = args->reader->block_len - off, | |
e581fd72 | 316 | }; |
d51d8cc3 PS |
317 | uint64_t prefix_len, suffix_len; |
318 | uint8_t extra; | |
319 | int n; | |
77307a61 PS |
320 | |
321 | /* | |
d51d8cc3 PS |
322 | * Records at restart points are stored without prefix compression, so |
323 | * there is no need to fully decode the record key here. This removes | |
324 | * the need for allocating memory. | |
77307a61 | 325 | */ |
d51d8cc3 PS |
326 | n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra); |
327 | if (n < 0 || prefix_len) { | |
77307a61 | 328 | args->error = 1; |
e581fd72 HWN |
329 | return -1; |
330 | } | |
331 | ||
d51d8cc3 PS |
332 | string_view_consume(&in, n); |
333 | if (suffix_len > in.len) { | |
334 | args->error = 1; | |
335 | return -1; | |
336 | } | |
337 | ||
338 | n = memcmp(args->needle.buf, in.buf, | |
339 | args->needle.len < suffix_len ? args->needle.len : suffix_len); | |
340 | if (n) | |
341 | return n < 0; | |
342 | return args->needle.len < suffix_len; | |
e581fd72 HWN |
343 | } |
344 | ||
bcdc586d | 345 | void block_iter_copy_from(struct block_iter *dest, const struct block_iter *src) |
e581fd72 | 346 | { |
bcdc586d PS |
347 | dest->block = src->block; |
348 | dest->block_len = src->block_len; | |
349 | dest->hash_size = src->hash_size; | |
e581fd72 HWN |
350 | dest->next_off = src->next_off; |
351 | strbuf_reset(&dest->last_key); | |
352 | strbuf_addbuf(&dest->last_key, &src->last_key); | |
353 | } | |
354 | ||
355 | int block_iter_next(struct block_iter *it, struct reftable_record *rec) | |
356 | { | |
357 | struct string_view in = { | |
bcdc586d PS |
358 | .buf = (unsigned char *) it->block + it->next_off, |
359 | .len = it->block_len - it->next_off, | |
e581fd72 HWN |
360 | }; |
361 | struct string_view start = in; | |
e581fd72 HWN |
362 | uint8_t extra = 0; |
363 | int n = 0; | |
364 | ||
bcdc586d | 365 | if (it->next_off >= it->block_len) |
e581fd72 HWN |
366 | return 1; |
367 | ||
daf4f43d | 368 | n = reftable_decode_key(&it->last_key, &extra, in); |
e581fd72 HWN |
369 | if (n < 0) |
370 | return -1; | |
daf4f43d | 371 | if (!it->last_key.len) |
45c2fcc2 HWN |
372 | return REFTABLE_FORMAT_ERROR; |
373 | ||
e581fd72 | 374 | string_view_consume(&in, n); |
bcdc586d | 375 | n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size, |
7b8abc4d | 376 | &it->scratch); |
e581fd72 HWN |
377 | if (n < 0) |
378 | return -1; | |
379 | string_view_consume(&in, n); | |
380 | ||
e581fd72 | 381 | it->next_off += start.len - in.len; |
e581fd72 HWN |
382 | return 0; |
383 | } | |
384 | ||
bcdc586d PS |
385 | void block_iter_reset(struct block_iter *it) |
386 | { | |
387 | strbuf_reset(&it->last_key); | |
388 | it->next_off = 0; | |
389 | it->block = NULL; | |
390 | it->block_len = 0; | |
391 | it->hash_size = 0; | |
392 | } | |
393 | ||
e581fd72 HWN |
394 | void block_iter_close(struct block_iter *it) |
395 | { | |
396 | strbuf_release(&it->last_key); | |
7b8abc4d | 397 | strbuf_release(&it->scratch); |
e581fd72 HWN |
398 | } |
399 | ||
bcdc586d | 400 | int block_iter_seek_key(struct block_iter *it, const struct block_reader *br, |
42c7bdc3 | 401 | struct strbuf *want) |
e581fd72 | 402 | { |
77307a61 PS |
403 | struct restart_needle_less_args args = { |
404 | .needle = *want, | |
405 | .reader = br, | |
e581fd72 | 406 | }; |
a8305bc6 | 407 | struct block_iter next = BLOCK_ITER_INIT; |
3ddef475 | 408 | struct reftable_record rec; |
3e7b36d1 PS |
409 | int err = 0; |
410 | size_t i; | |
e581fd72 | 411 | |
77307a61 PS |
412 | /* |
413 | * Perform a binary search over the block's restart points, which | |
414 | * avoids doing a linear scan over the whole block. Like this, we | |
415 | * identify the section of the block that should contain our key. | |
416 | * | |
417 | * Note that we explicitly search for the first restart point _greater_ | |
418 | * than the sought-after record, not _greater or equal_ to it. In case | |
419 | * the sought-after record is located directly at the restart point we | |
420 | * would otherwise start doing the linear search at the preceding | |
421 | * restart point. While that works alright, we would end up scanning | |
422 | * too many record. | |
423 | */ | |
424 | i = binsearch(br->restart_count, &restart_needle_less, &args); | |
f9e88544 PS |
425 | if (args.error) { |
426 | err = REFTABLE_FORMAT_ERROR; | |
427 | goto done; | |
428 | } | |
77307a61 PS |
429 | |
430 | /* | |
431 | * Now there are multiple cases: | |
432 | * | |
433 | * - `i == 0`: The wanted record is smaller than the record found at | |
434 | * the first restart point. As the first restart point is the first | |
435 | * record in the block, our wanted record cannot be located in this | |
436 | * block at all. We still need to position the iterator so that the | |
437 | * next call to `block_iter_next()` will yield an end-of-iterator | |
438 | * signal. | |
439 | * | |
440 | * - `i == restart_count`: The wanted record was not found at any of | |
441 | * the restart points. As there is no restart point at the end of | |
442 | * the section the record may thus be contained in the last block. | |
443 | * | |
444 | * - `i > 0`: The wanted record must be contained in the section | |
445 | * before the found restart point. We thus do a linear search | |
446 | * starting from the preceding restart point. | |
447 | */ | |
3ddef475 PS |
448 | if (i > 0) |
449 | it->next_off = block_reader_restart_offset(br, i - 1); | |
450 | else | |
e581fd72 | 451 | it->next_off = br->header_off + 4; |
bcdc586d PS |
452 | it->block = br->block.data; |
453 | it->block_len = br->block_len; | |
454 | it->hash_size = br->hash_size; | |
3ddef475 PS |
455 | |
456 | reftable_record_init(&rec, block_reader_type(br)); | |
e581fd72 | 457 | |
77307a61 PS |
458 | /* |
459 | * We're looking for the last entry less than the wanted key so that | |
460 | * the next call to `block_reader_next()` would yield the wanted | |
461 | * record. We thus don't want to position our reader at the sought | |
462 | * after record, but one before. To do so, we have to go one entry too | |
463 | * far and then back up. | |
464 | */ | |
e581fd72 HWN |
465 | while (1) { |
466 | block_iter_copy_from(&next, it); | |
467 | err = block_iter_next(&next, &rec); | |
468 | if (err < 0) | |
469 | goto done; | |
77307a61 | 470 | if (err > 0) { |
e581fd72 HWN |
471 | err = 0; |
472 | goto done; | |
473 | } | |
474 | ||
77307a61 PS |
475 | /* |
476 | * Check whether the current key is greater or equal to the | |
477 | * sought-after key. In case it is greater we know that the | |
478 | * record does not exist in the block and can thus abort early. | |
479 | * In case it is equal to the sought-after key we have found | |
480 | * the desired record. | |
481 | */ | |
482 | reftable_record_key(&rec, &it->last_key); | |
483 | if (strbuf_cmp(&it->last_key, want) >= 0) | |
484 | goto done; | |
485 | ||
e581fd72 HWN |
486 | block_iter_copy_from(it, &next); |
487 | } | |
488 | ||
489 | done: | |
c0cadb05 | 490 | block_iter_close(&next); |
66c0daba | 491 | reftable_record_release(&rec); |
e581fd72 HWN |
492 | |
493 | return err; | |
494 | } | |
495 | ||
496 | void block_writer_release(struct block_writer *bw) | |
497 | { | |
498 | FREE_AND_NULL(bw->restarts); | |
499 | strbuf_release(&bw->last_key); | |
500 | /* the block is not owned. */ | |
501 | } | |
502 | ||
503 | void reftable_block_done(struct reftable_block *blockp) | |
504 | { | |
505 | struct reftable_block_source source = blockp->source; | |
506 | if (blockp && source.ops) | |
507 | source.ops->return_block(source.arg, blockp); | |
508 | blockp->data = NULL; | |
509 | blockp->len = 0; | |
510 | blockp->source.ops = NULL; | |
511 | blockp->source.arg = NULL; | |
512 | } |