]> git.ipfire.org Git - thirdparty/git.git/blob - reftable/block.c
The fifteenth batch
[thirdparty/git.git] / reftable / block.c
1 /*
2 Copyright 2020 Google LLC
3
4 Use of this source code is governed by a BSD-style
5 license that can be found in the LICENSE file or at
6 https://developers.google.com/open-source/licenses/bsd
7 */
8
9 #include "block.h"
10
11 #include "blocksource.h"
12 #include "constants.h"
13 #include "record.h"
14 #include "reftable-error.h"
15 #include "system.h"
16 #include <zlib.h>
17
18 int header_size(int version)
19 {
20 switch (version) {
21 case 1:
22 return 24;
23 case 2:
24 return 28;
25 }
26 abort();
27 }
28
29 int footer_size(int version)
30 {
31 switch (version) {
32 case 1:
33 return 68;
34 case 2:
35 return 72;
36 }
37 abort();
38 }
39
40 static int block_writer_register_restart(struct block_writer *w, int n,
41 int is_restart, struct strbuf *key)
42 {
43 int rlen = w->restart_len;
44 if (rlen >= MAX_RESTARTS) {
45 is_restart = 0;
46 }
47
48 if (is_restart) {
49 rlen++;
50 }
51 if (2 + 3 * rlen + n > w->block_size - w->next)
52 return -1;
53 if (is_restart) {
54 REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap);
55 w->restarts[w->restart_len++] = w->next;
56 }
57
58 w->next += n;
59
60 strbuf_reset(&w->last_key);
61 strbuf_addbuf(&w->last_key, key);
62 w->entries++;
63 return 0;
64 }
65
66 void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
67 uint32_t block_size, uint32_t header_off, int hash_size)
68 {
69 bw->buf = buf;
70 bw->hash_size = hash_size;
71 bw->block_size = block_size;
72 bw->header_off = header_off;
73 bw->buf[header_off] = typ;
74 bw->next = header_off + 4;
75 bw->restart_interval = 16;
76 bw->entries = 0;
77 bw->restart_len = 0;
78 bw->last_key.len = 0;
79 if (!bw->zstream) {
80 REFTABLE_CALLOC_ARRAY(bw->zstream, 1);
81 deflateInit(bw->zstream, 9);
82 }
83 }
84
85 uint8_t block_writer_type(struct block_writer *bw)
86 {
87 return bw->buf[bw->header_off];
88 }
89
90 /* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
91 success. Returns REFTABLE_API_ERROR if attempting to write a record with
92 empty key. */
93 int block_writer_add(struct block_writer *w, struct reftable_record *rec)
94 {
95 struct strbuf empty = STRBUF_INIT;
96 struct strbuf last =
97 w->entries % w->restart_interval == 0 ? empty : w->last_key;
98 struct string_view out = {
99 .buf = w->buf + w->next,
100 .len = w->block_size - w->next,
101 };
102
103 struct string_view start = out;
104
105 int is_restart = 0;
106 struct strbuf key = STRBUF_INIT;
107 int n = 0;
108 int err = -1;
109
110 reftable_record_key(rec, &key);
111 if (!key.len) {
112 err = REFTABLE_API_ERROR;
113 goto done;
114 }
115
116 n = reftable_encode_key(&is_restart, out, last, key,
117 reftable_record_val_type(rec));
118 if (n < 0)
119 goto done;
120 string_view_consume(&out, n);
121
122 n = reftable_record_encode(rec, out, w->hash_size);
123 if (n < 0)
124 goto done;
125 string_view_consume(&out, n);
126
127 err = block_writer_register_restart(w, start.len - out.len, is_restart,
128 &key);
129 done:
130 strbuf_release(&key);
131 return err;
132 }
133
134 int block_writer_finish(struct block_writer *w)
135 {
136 int i;
137 for (i = 0; i < w->restart_len; i++) {
138 put_be24(w->buf + w->next, w->restarts[i]);
139 w->next += 3;
140 }
141
142 put_be16(w->buf + w->next, w->restart_len);
143 w->next += 2;
144 put_be24(w->buf + 1 + w->header_off, w->next);
145
146 /*
147 * Log records are stored zlib-compressed. Note that the compression
148 * also spans over the restart points we have just written.
149 */
150 if (block_writer_type(w) == BLOCK_TYPE_LOG) {
151 int block_header_skip = 4 + w->header_off;
152 uLongf src_len = w->next - block_header_skip, compressed_len;
153 int ret;
154
155 ret = deflateReset(w->zstream);
156 if (ret != Z_OK)
157 return REFTABLE_ZLIB_ERROR;
158
159 /*
160 * Precompute the upper bound of how many bytes the compressed
161 * data may end up with. Combined with `Z_FINISH`, `deflate()`
162 * is guaranteed to return `Z_STREAM_END`.
163 */
164 compressed_len = deflateBound(w->zstream, src_len);
165 REFTABLE_ALLOC_GROW(w->compressed, compressed_len, w->compressed_cap);
166
167 w->zstream->next_out = w->compressed;
168 w->zstream->avail_out = compressed_len;
169 w->zstream->next_in = w->buf + block_header_skip;
170 w->zstream->avail_in = src_len;
171
172 /*
173 * We want to perform all decompression in a single step, which
174 * is why we can pass Z_FINISH here. As we have precomputed the
175 * deflated buffer's size via `deflateBound()` this function is
176 * guaranteed to succeed according to the zlib documentation.
177 */
178 ret = deflate(w->zstream, Z_FINISH);
179 if (ret != Z_STREAM_END)
180 return REFTABLE_ZLIB_ERROR;
181
182 /*
183 * Overwrite the uncompressed data we have already written and
184 * adjust the `next` pointer to point right after the
185 * compressed data.
186 */
187 memcpy(w->buf + block_header_skip, w->compressed,
188 w->zstream->total_out);
189 w->next = w->zstream->total_out + block_header_skip;
190 }
191
192 return w->next;
193 }
194
195 int block_reader_init(struct block_reader *br, struct reftable_block *block,
196 uint32_t header_off, uint32_t table_block_size,
197 int hash_size)
198 {
199 uint32_t full_block_size = table_block_size;
200 uint8_t typ = block->data[header_off];
201 uint32_t sz = get_be24(block->data + header_off + 1);
202 int err = 0;
203 uint16_t restart_count = 0;
204 uint32_t restart_start = 0;
205 uint8_t *restart_bytes = NULL;
206
207 reftable_block_done(&br->block);
208
209 if (!reftable_is_block_type(typ)) {
210 err = REFTABLE_FORMAT_ERROR;
211 goto done;
212 }
213
214 if (typ == BLOCK_TYPE_LOG) {
215 uint32_t block_header_skip = 4 + header_off;
216 uLong dst_len = sz - block_header_skip;
217 uLong src_len = block->len - block_header_skip;
218
219 /* Log blocks specify the *uncompressed* size in their header. */
220 REFTABLE_ALLOC_GROW(br->uncompressed_data, sz,
221 br->uncompressed_cap);
222
223 /* Copy over the block header verbatim. It's not compressed. */
224 memcpy(br->uncompressed_data, block->data, block_header_skip);
225
226 if (!br->zstream) {
227 REFTABLE_CALLOC_ARRAY(br->zstream, 1);
228 err = inflateInit(br->zstream);
229 } else {
230 err = inflateReset(br->zstream);
231 }
232 if (err != Z_OK) {
233 err = REFTABLE_ZLIB_ERROR;
234 goto done;
235 }
236
237 br->zstream->next_in = block->data + block_header_skip;
238 br->zstream->avail_in = src_len;
239 br->zstream->next_out = br->uncompressed_data + block_header_skip;
240 br->zstream->avail_out = dst_len;
241
242 /*
243 * We know both input as well as output size, and we know that
244 * the sizes should never be bigger than `uInt_MAX` because
245 * blocks can at most be 16MB large. We can thus use `Z_FINISH`
246 * here to instruct zlib to inflate the data in one go, which
247 * is more efficient than using `Z_NO_FLUSH`.
248 */
249 err = inflate(br->zstream, Z_FINISH);
250 if (err != Z_STREAM_END) {
251 err = REFTABLE_ZLIB_ERROR;
252 goto done;
253 }
254 err = 0;
255
256 if (br->zstream->total_out + block_header_skip != sz) {
257 err = REFTABLE_FORMAT_ERROR;
258 goto done;
259 }
260
261 /* We're done with the input data. */
262 reftable_block_done(block);
263 block->data = br->uncompressed_data;
264 block->len = sz;
265 full_block_size = src_len + block_header_skip - br->zstream->avail_in;
266 } else if (full_block_size == 0) {
267 full_block_size = sz;
268 } else if (sz < full_block_size && sz < block->len &&
269 block->data[sz] != 0) {
270 /* If the block is smaller than the full block size, it is
271 padded (data followed by '\0') or the next block is
272 unaligned. */
273 full_block_size = sz;
274 }
275
276 restart_count = get_be16(block->data + sz - 2);
277 restart_start = sz - 2 - 3 * restart_count;
278 restart_bytes = block->data + restart_start;
279
280 /* transfer ownership. */
281 br->block = *block;
282 block->data = NULL;
283 block->len = 0;
284
285 br->hash_size = hash_size;
286 br->block_len = restart_start;
287 br->full_block_size = full_block_size;
288 br->header_off = header_off;
289 br->restart_count = restart_count;
290 br->restart_bytes = restart_bytes;
291
292 done:
293 return err;
294 }
295
296 void block_reader_release(struct block_reader *br)
297 {
298 inflateEnd(br->zstream);
299 reftable_free(br->zstream);
300 reftable_free(br->uncompressed_data);
301 reftable_block_done(&br->block);
302 }
303
304 uint8_t block_reader_type(const struct block_reader *r)
305 {
306 return r->block.data[r->header_off];
307 }
308
309 int block_reader_first_key(const struct block_reader *br, struct strbuf *key)
310 {
311 int off = br->header_off + 4, n;
312 struct string_view in = {
313 .buf = br->block.data + off,
314 .len = br->block_len - off,
315 };
316 uint8_t extra = 0;
317
318 strbuf_reset(key);
319
320 n = reftable_decode_key(key, &extra, in);
321 if (n < 0)
322 return n;
323 if (!key->len)
324 return REFTABLE_FORMAT_ERROR;
325
326 return 0;
327 }
328
329 static uint32_t block_reader_restart_offset(const struct block_reader *br, size_t idx)
330 {
331 return get_be24(br->restart_bytes + 3 * idx);
332 }
333
334 void block_iter_seek_start(struct block_iter *it, const struct block_reader *br)
335 {
336 it->block = br->block.data;
337 it->block_len = br->block_len;
338 it->hash_size = br->hash_size;
339 strbuf_reset(&it->last_key);
340 it->next_off = br->header_off + 4;
341 }
342
343 struct restart_needle_less_args {
344 int error;
345 struct strbuf needle;
346 const struct block_reader *reader;
347 };
348
349 static int restart_needle_less(size_t idx, void *_args)
350 {
351 struct restart_needle_less_args *args = _args;
352 uint32_t off = block_reader_restart_offset(args->reader, idx);
353 struct string_view in = {
354 .buf = args->reader->block.data + off,
355 .len = args->reader->block_len - off,
356 };
357 uint64_t prefix_len, suffix_len;
358 uint8_t extra;
359 int n;
360
361 /*
362 * Records at restart points are stored without prefix compression, so
363 * there is no need to fully decode the record key here. This removes
364 * the need for allocating memory.
365 */
366 n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
367 if (n < 0 || prefix_len) {
368 args->error = 1;
369 return -1;
370 }
371
372 string_view_consume(&in, n);
373 if (suffix_len > in.len) {
374 args->error = 1;
375 return -1;
376 }
377
378 n = memcmp(args->needle.buf, in.buf,
379 args->needle.len < suffix_len ? args->needle.len : suffix_len);
380 if (n)
381 return n < 0;
382 return args->needle.len < suffix_len;
383 }
384
385 int block_iter_next(struct block_iter *it, struct reftable_record *rec)
386 {
387 struct string_view in = {
388 .buf = (unsigned char *) it->block + it->next_off,
389 .len = it->block_len - it->next_off,
390 };
391 struct string_view start = in;
392 uint8_t extra = 0;
393 int n = 0;
394
395 if (it->next_off >= it->block_len)
396 return 1;
397
398 n = reftable_decode_key(&it->last_key, &extra, in);
399 if (n < 0)
400 return -1;
401 if (!it->last_key.len)
402 return REFTABLE_FORMAT_ERROR;
403
404 string_view_consume(&in, n);
405 n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size,
406 &it->scratch);
407 if (n < 0)
408 return -1;
409 string_view_consume(&in, n);
410
411 it->next_off += start.len - in.len;
412 return 0;
413 }
414
415 void block_iter_reset(struct block_iter *it)
416 {
417 strbuf_reset(&it->last_key);
418 it->next_off = 0;
419 it->block = NULL;
420 it->block_len = 0;
421 it->hash_size = 0;
422 }
423
424 void block_iter_close(struct block_iter *it)
425 {
426 strbuf_release(&it->last_key);
427 strbuf_release(&it->scratch);
428 }
429
430 int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
431 struct strbuf *want)
432 {
433 struct restart_needle_less_args args = {
434 .needle = *want,
435 .reader = br,
436 };
437 struct reftable_record rec;
438 int err = 0;
439 size_t i;
440
441 /*
442 * Perform a binary search over the block's restart points, which
443 * avoids doing a linear scan over the whole block. Like this, we
444 * identify the section of the block that should contain our key.
445 *
446 * Note that we explicitly search for the first restart point _greater_
447 * than the sought-after record, not _greater or equal_ to it. In case
448 * the sought-after record is located directly at the restart point we
449 * would otherwise start doing the linear search at the preceding
450 * restart point. While that works alright, we would end up scanning
451 * too many record.
452 */
453 i = binsearch(br->restart_count, &restart_needle_less, &args);
454 if (args.error) {
455 err = REFTABLE_FORMAT_ERROR;
456 goto done;
457 }
458
459 /*
460 * Now there are multiple cases:
461 *
462 * - `i == 0`: The wanted record is smaller than the record found at
463 * the first restart point. As the first restart point is the first
464 * record in the block, our wanted record cannot be located in this
465 * block at all. We still need to position the iterator so that the
466 * next call to `block_iter_next()` will yield an end-of-iterator
467 * signal.
468 *
469 * - `i == restart_count`: The wanted record was not found at any of
470 * the restart points. As there is no restart point at the end of
471 * the section the record may thus be contained in the last block.
472 *
473 * - `i > 0`: The wanted record must be contained in the section
474 * before the found restart point. We thus do a linear search
475 * starting from the preceding restart point.
476 */
477 if (i > 0)
478 it->next_off = block_reader_restart_offset(br, i - 1);
479 else
480 it->next_off = br->header_off + 4;
481 it->block = br->block.data;
482 it->block_len = br->block_len;
483 it->hash_size = br->hash_size;
484
485 reftable_record_init(&rec, block_reader_type(br));
486
487 /*
488 * We're looking for the last entry less than the wanted key so that
489 * the next call to `block_reader_next()` would yield the wanted
490 * record. We thus don't want to position our reader at the sought
491 * after record, but one before. To do so, we have to go one entry too
492 * far and then back up.
493 */
494 while (1) {
495 size_t prev_off = it->next_off;
496
497 err = block_iter_next(it, &rec);
498 if (err < 0)
499 goto done;
500 if (err > 0) {
501 it->next_off = prev_off;
502 err = 0;
503 goto done;
504 }
505
506 /*
507 * Check whether the current key is greater or equal to the
508 * sought-after key. In case it is greater we know that the
509 * record does not exist in the block and can thus abort early.
510 * In case it is equal to the sought-after key we have found
511 * the desired record.
512 *
513 * Note that we store the next record's key record directly in
514 * `last_key` without restoring the key of the preceding record
515 * in case we need to go one record back. This is safe to do as
516 * `block_iter_next()` would return the ref whose key is equal
517 * to `last_key` now, and naturally all keys share a prefix
518 * with themselves.
519 */
520 reftable_record_key(&rec, &it->last_key);
521 if (strbuf_cmp(&it->last_key, want) >= 0) {
522 it->next_off = prev_off;
523 goto done;
524 }
525 }
526
527 done:
528 reftable_record_release(&rec);
529 return err;
530 }
531
532 void block_writer_release(struct block_writer *bw)
533 {
534 deflateEnd(bw->zstream);
535 FREE_AND_NULL(bw->zstream);
536 FREE_AND_NULL(bw->restarts);
537 FREE_AND_NULL(bw->compressed);
538 strbuf_release(&bw->last_key);
539 /* the block is not owned. */
540 }
541
542 void reftable_block_done(struct reftable_block *blockp)
543 {
544 struct reftable_block_source source = blockp->source;
545 if (blockp && source.ops)
546 source.ops->return_block(source.arg, blockp);
547 blockp->data = NULL;
548 blockp->len = 0;
549 blockp->source.ops = NULL;
550 blockp->source.arg = NULL;
551 }