]> git.ipfire.org Git - thirdparty/git.git/blame - reftable/block.c
reftable/block: reuse uncompressed blocks
[thirdparty/git.git] / reftable / block.c
CommitLineData
e581fd72
HWN
1/*
2Copyright 2020 Google LLC
3
4Use of this source code is governed by a BSD-style
5license that can be found in the LICENSE file or at
6https://developers.google.com/open-source/licenses/bsd
7*/
8
9#include "block.h"
10
11#include "blocksource.h"
12#include "constants.h"
13#include "record.h"
14#include "reftable-error.h"
15#include "system.h"
16#include <zlib.h>
17
18int header_size(int version)
19{
20 switch (version) {
21 case 1:
22 return 24;
23 case 2:
24 return 28;
25 }
26 abort();
27}
28
29int footer_size(int version)
30{
31 switch (version) {
32 case 1:
33 return 68;
34 case 2:
35 return 72;
36 }
37 abort();
38}
39
40static int block_writer_register_restart(struct block_writer *w, int n,
41 int is_restart, struct strbuf *key)
42{
43 int rlen = w->restart_len;
44 if (rlen >= MAX_RESTARTS) {
45 is_restart = 0;
46 }
47
48 if (is_restart) {
49 rlen++;
50 }
51 if (2 + 3 * rlen + n > w->block_size - w->next)
52 return -1;
53 if (is_restart) {
f6b58c1b 54 REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap);
e581fd72
HWN
55 w->restarts[w->restart_len++] = w->next;
56 }
57
58 w->next += n;
59
60 strbuf_reset(&w->last_key);
61 strbuf_addbuf(&w->last_key, key);
62 w->entries++;
63 return 0;
64}
65
66void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
67 uint32_t block_size, uint32_t header_off, int hash_size)
68{
69 bw->buf = buf;
70 bw->hash_size = hash_size;
71 bw->block_size = block_size;
72 bw->header_off = header_off;
73 bw->buf[header_off] = typ;
74 bw->next = header_off + 4;
75 bw->restart_interval = 16;
76 bw->entries = 0;
77 bw->restart_len = 0;
78 bw->last_key.len = 0;
79}
80
81uint8_t block_writer_type(struct block_writer *bw)
82{
83 return bw->buf[bw->header_off];
84}
85
45c2fcc2
HWN
86/* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
87 success. Returns REFTABLE_API_ERROR if attempting to write a record with
88 empty key. */
e581fd72
HWN
89int block_writer_add(struct block_writer *w, struct reftable_record *rec)
90{
91 struct strbuf empty = STRBUF_INIT;
92 struct strbuf last =
93 w->entries % w->restart_interval == 0 ? empty : w->last_key;
94 struct string_view out = {
95 .buf = w->buf + w->next,
96 .len = w->block_size - w->next,
97 };
98
99 struct string_view start = out;
100
101 int is_restart = 0;
102 struct strbuf key = STRBUF_INIT;
103 int n = 0;
45c2fcc2 104 int err = -1;
e581fd72
HWN
105
106 reftable_record_key(rec, &key);
45c2fcc2
HWN
107 if (!key.len) {
108 err = REFTABLE_API_ERROR;
109 goto done;
110 }
111
e581fd72
HWN
112 n = reftable_encode_key(&is_restart, out, last, key,
113 reftable_record_val_type(rec));
114 if (n < 0)
115 goto done;
116 string_view_consume(&out, n);
117
118 n = reftable_record_encode(rec, out, w->hash_size);
119 if (n < 0)
120 goto done;
121 string_view_consume(&out, n);
122
45c2fcc2
HWN
123 err = block_writer_register_restart(w, start.len - out.len, is_restart,
124 &key);
e581fd72
HWN
125done:
126 strbuf_release(&key);
45c2fcc2 127 return err;
e581fd72
HWN
128}
129
130int block_writer_finish(struct block_writer *w)
131{
132 int i;
133 for (i = 0; i < w->restart_len; i++) {
134 put_be24(w->buf + w->next, w->restarts[i]);
135 w->next += 3;
136 }
137
138 put_be16(w->buf + w->next, w->restart_len);
139 w->next += 2;
140 put_be24(w->buf + 1 + w->header_off, w->next);
141
142 if (block_writer_type(w) == BLOCK_TYPE_LOG) {
143 int block_header_skip = 4 + w->header_off;
144 uLongf src_len = w->next - block_header_skip;
145 uLongf dest_cap = src_len * 1.001 + 12;
b4ff12c8
PS
146 uint8_t *compressed;
147
148 REFTABLE_ALLOC_ARRAY(compressed, dest_cap);
e581fd72 149
e581fd72
HWN
150 while (1) {
151 uLongf out_dest_len = dest_cap;
152 int zresult = compress2(compressed, &out_dest_len,
153 w->buf + block_header_skip,
154 src_len, 9);
155 if (zresult == Z_BUF_ERROR && dest_cap < LONG_MAX) {
156 dest_cap *= 2;
157 compressed =
158 reftable_realloc(compressed, dest_cap);
159 if (compressed)
160 continue;
161 }
162
163 if (Z_OK != zresult) {
164 reftable_free(compressed);
165 return REFTABLE_ZLIB_ERROR;
166 }
167
168 memcpy(w->buf + block_header_skip, compressed,
169 out_dest_len);
170 w->next = out_dest_len + block_header_skip;
171 reftable_free(compressed);
172 break;
173 }
174 }
175 return w->next;
176}
177
e581fd72
HWN
178int block_reader_init(struct block_reader *br, struct reftable_block *block,
179 uint32_t header_off, uint32_t table_block_size,
180 int hash_size)
181{
182 uint32_t full_block_size = table_block_size;
183 uint8_t typ = block->data[header_off];
184 uint32_t sz = get_be24(block->data + header_off + 1);
24d4d38c 185 int err = 0;
e581fd72
HWN
186 uint16_t restart_count = 0;
187 uint32_t restart_start = 0;
188 uint8_t *restart_bytes = NULL;
189
b00bcb7c
PS
190 reftable_block_done(&br->block);
191
24d4d38c
HWN
192 if (!reftable_is_block_type(typ)) {
193 err = REFTABLE_FORMAT_ERROR;
194 goto done;
195 }
e581fd72
HWN
196
197 if (typ == BLOCK_TYPE_LOG) {
198 int block_header_skip = 4 + header_off;
199 uLongf dst_len = sz - block_header_skip; /* total size of dest
200 buffer. */
201 uLongf src_len = block->len - block_header_skip;
b4ff12c8
PS
202
203 /* Log blocks specify the *uncompressed* size in their header. */
dd347bbc
PS
204 REFTABLE_ALLOC_GROW(br->uncompressed_data, sz,
205 br->uncompressed_cap);
e581fd72
HWN
206
207 /* Copy over the block header verbatim. It's not compressed. */
dd347bbc 208 memcpy(br->uncompressed_data, block->data, block_header_skip);
e581fd72
HWN
209
210 /* Uncompress */
211 if (Z_OK !=
dd347bbc 212 uncompress2(br->uncompressed_data + block_header_skip, &dst_len,
e581fd72 213 block->data + block_header_skip, &src_len)) {
24d4d38c
HWN
214 err = REFTABLE_ZLIB_ERROR;
215 goto done;
e581fd72
HWN
216 }
217
24d4d38c
HWN
218 if (dst_len + block_header_skip != sz) {
219 err = REFTABLE_FORMAT_ERROR;
220 goto done;
221 }
e581fd72
HWN
222
223 /* We're done with the input data. */
224 reftable_block_done(block);
dd347bbc 225 block->data = br->uncompressed_data;
e581fd72 226 block->len = sz;
e581fd72
HWN
227 full_block_size = src_len + block_header_skip;
228 } else if (full_block_size == 0) {
229 full_block_size = sz;
230 } else if (sz < full_block_size && sz < block->len &&
231 block->data[sz] != 0) {
232 /* If the block is smaller than the full block size, it is
233 padded (data followed by '\0') or the next block is
234 unaligned. */
235 full_block_size = sz;
236 }
237
238 restart_count = get_be16(block->data + sz - 2);
239 restart_start = sz - 2 - 3 * restart_count;
240 restart_bytes = block->data + restart_start;
241
242 /* transfer ownership. */
243 br->block = *block;
244 block->data = NULL;
245 block->len = 0;
246
247 br->hash_size = hash_size;
248 br->block_len = restart_start;
249 br->full_block_size = full_block_size;
250 br->header_off = header_off;
251 br->restart_count = restart_count;
252 br->restart_bytes = restart_bytes;
253
24d4d38c 254done:
24d4d38c 255 return err;
e581fd72
HWN
256}
257
b371221a
PS
258void block_reader_release(struct block_reader *br)
259{
dd347bbc 260 reftable_free(br->uncompressed_data);
b371221a
PS
261 reftable_block_done(&br->block);
262}
263
bcdc586d 264uint8_t block_reader_type(const struct block_reader *r)
aac8c03c
PS
265{
266 return r->block.data[r->header_off];
267}
268
bcdc586d 269int block_reader_first_key(const struct block_reader *br, struct strbuf *key)
aac8c03c
PS
270{
271 int off = br->header_off + 4, n;
272 struct string_view in = {
273 .buf = br->block.data + off,
274 .len = br->block_len - off,
275 };
276 uint8_t extra = 0;
277
278 strbuf_reset(key);
279
280 n = reftable_decode_key(key, &extra, in);
281 if (n < 0)
282 return n;
283 if (!key->len)
284 return REFTABLE_FORMAT_ERROR;
285
286 return 0;
287}
288
bcdc586d 289static uint32_t block_reader_restart_offset(const struct block_reader *br, int i)
e581fd72
HWN
290{
291 return get_be24(br->restart_bytes + 3 * i);
292}
293
bcdc586d 294void block_iter_seek_start(struct block_iter *it, const struct block_reader *br)
e581fd72 295{
bcdc586d
PS
296 it->block = br->block.data;
297 it->block_len = br->block_len;
298 it->hash_size = br->hash_size;
e581fd72
HWN
299 strbuf_reset(&it->last_key);
300 it->next_off = br->header_off + 4;
301}
302
77307a61 303struct restart_needle_less_args {
e581fd72 304 int error;
77307a61 305 struct strbuf needle;
bcdc586d 306 const struct block_reader *reader;
e581fd72
HWN
307};
308
77307a61 309static int restart_needle_less(size_t idx, void *_args)
e581fd72 310{
77307a61
PS
311 struct restart_needle_less_args *args = _args;
312 uint32_t off = block_reader_restart_offset(args->reader, idx);
e581fd72 313 struct string_view in = {
77307a61
PS
314 .buf = args->reader->block.data + off,
315 .len = args->reader->block_len - off,
e581fd72 316 };
d51d8cc3
PS
317 uint64_t prefix_len, suffix_len;
318 uint8_t extra;
319 int n;
77307a61
PS
320
321 /*
d51d8cc3
PS
322 * Records at restart points are stored without prefix compression, so
323 * there is no need to fully decode the record key here. This removes
324 * the need for allocating memory.
77307a61 325 */
d51d8cc3
PS
326 n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
327 if (n < 0 || prefix_len) {
77307a61 328 args->error = 1;
e581fd72
HWN
329 return -1;
330 }
331
d51d8cc3
PS
332 string_view_consume(&in, n);
333 if (suffix_len > in.len) {
334 args->error = 1;
335 return -1;
336 }
337
338 n = memcmp(args->needle.buf, in.buf,
339 args->needle.len < suffix_len ? args->needle.len : suffix_len);
340 if (n)
341 return n < 0;
342 return args->needle.len < suffix_len;
e581fd72
HWN
343}
344
bcdc586d 345void block_iter_copy_from(struct block_iter *dest, const struct block_iter *src)
e581fd72 346{
bcdc586d
PS
347 dest->block = src->block;
348 dest->block_len = src->block_len;
349 dest->hash_size = src->hash_size;
e581fd72
HWN
350 dest->next_off = src->next_off;
351 strbuf_reset(&dest->last_key);
352 strbuf_addbuf(&dest->last_key, &src->last_key);
353}
354
355int block_iter_next(struct block_iter *it, struct reftable_record *rec)
356{
357 struct string_view in = {
bcdc586d
PS
358 .buf = (unsigned char *) it->block + it->next_off,
359 .len = it->block_len - it->next_off,
e581fd72
HWN
360 };
361 struct string_view start = in;
e581fd72
HWN
362 uint8_t extra = 0;
363 int n = 0;
364
bcdc586d 365 if (it->next_off >= it->block_len)
e581fd72
HWN
366 return 1;
367
daf4f43d 368 n = reftable_decode_key(&it->last_key, &extra, in);
e581fd72
HWN
369 if (n < 0)
370 return -1;
daf4f43d 371 if (!it->last_key.len)
45c2fcc2
HWN
372 return REFTABLE_FORMAT_ERROR;
373
e581fd72 374 string_view_consume(&in, n);
bcdc586d 375 n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size,
7b8abc4d 376 &it->scratch);
e581fd72
HWN
377 if (n < 0)
378 return -1;
379 string_view_consume(&in, n);
380
e581fd72 381 it->next_off += start.len - in.len;
e581fd72
HWN
382 return 0;
383}
384
bcdc586d
PS
385void block_iter_reset(struct block_iter *it)
386{
387 strbuf_reset(&it->last_key);
388 it->next_off = 0;
389 it->block = NULL;
390 it->block_len = 0;
391 it->hash_size = 0;
392}
393
e581fd72
HWN
394void block_iter_close(struct block_iter *it)
395{
396 strbuf_release(&it->last_key);
7b8abc4d 397 strbuf_release(&it->scratch);
e581fd72
HWN
398}
399
bcdc586d 400int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
42c7bdc3 401 struct strbuf *want)
e581fd72 402{
77307a61
PS
403 struct restart_needle_less_args args = {
404 .needle = *want,
405 .reader = br,
e581fd72 406 };
a8305bc6 407 struct block_iter next = BLOCK_ITER_INIT;
3ddef475 408 struct reftable_record rec;
3e7b36d1
PS
409 int err = 0;
410 size_t i;
e581fd72 411
77307a61
PS
412 /*
413 * Perform a binary search over the block's restart points, which
414 * avoids doing a linear scan over the whole block. Like this, we
415 * identify the section of the block that should contain our key.
416 *
417 * Note that we explicitly search for the first restart point _greater_
418 * than the sought-after record, not _greater or equal_ to it. In case
419 * the sought-after record is located directly at the restart point we
420 * would otherwise start doing the linear search at the preceding
421 * restart point. While that works alright, we would end up scanning
422 * too many record.
423 */
424 i = binsearch(br->restart_count, &restart_needle_less, &args);
f9e88544
PS
425 if (args.error) {
426 err = REFTABLE_FORMAT_ERROR;
427 goto done;
428 }
77307a61
PS
429
430 /*
431 * Now there are multiple cases:
432 *
433 * - `i == 0`: The wanted record is smaller than the record found at
434 * the first restart point. As the first restart point is the first
435 * record in the block, our wanted record cannot be located in this
436 * block at all. We still need to position the iterator so that the
437 * next call to `block_iter_next()` will yield an end-of-iterator
438 * signal.
439 *
440 * - `i == restart_count`: The wanted record was not found at any of
441 * the restart points. As there is no restart point at the end of
442 * the section the record may thus be contained in the last block.
443 *
444 * - `i > 0`: The wanted record must be contained in the section
445 * before the found restart point. We thus do a linear search
446 * starting from the preceding restart point.
447 */
3ddef475
PS
448 if (i > 0)
449 it->next_off = block_reader_restart_offset(br, i - 1);
450 else
e581fd72 451 it->next_off = br->header_off + 4;
bcdc586d
PS
452 it->block = br->block.data;
453 it->block_len = br->block_len;
454 it->hash_size = br->hash_size;
3ddef475
PS
455
456 reftable_record_init(&rec, block_reader_type(br));
e581fd72 457
77307a61
PS
458 /*
459 * We're looking for the last entry less than the wanted key so that
460 * the next call to `block_reader_next()` would yield the wanted
461 * record. We thus don't want to position our reader at the sought
462 * after record, but one before. To do so, we have to go one entry too
463 * far and then back up.
464 */
e581fd72
HWN
465 while (1) {
466 block_iter_copy_from(&next, it);
467 err = block_iter_next(&next, &rec);
468 if (err < 0)
469 goto done;
77307a61 470 if (err > 0) {
e581fd72
HWN
471 err = 0;
472 goto done;
473 }
474
77307a61
PS
475 /*
476 * Check whether the current key is greater or equal to the
477 * sought-after key. In case it is greater we know that the
478 * record does not exist in the block and can thus abort early.
479 * In case it is equal to the sought-after key we have found
480 * the desired record.
481 */
482 reftable_record_key(&rec, &it->last_key);
483 if (strbuf_cmp(&it->last_key, want) >= 0)
484 goto done;
485
e581fd72
HWN
486 block_iter_copy_from(it, &next);
487 }
488
489done:
c0cadb05 490 block_iter_close(&next);
66c0daba 491 reftable_record_release(&rec);
e581fd72
HWN
492
493 return err;
494}
495
496void block_writer_release(struct block_writer *bw)
497{
498 FREE_AND_NULL(bw->restarts);
499 strbuf_release(&bw->last_key);
500 /* the block is not owned. */
501}
502
503void reftable_block_done(struct reftable_block *blockp)
504{
505 struct reftable_block_source source = blockp->source;
506 if (blockp && source.ops)
507 source.ops->return_block(source.arg, blockp);
508 blockp->data = NULL;
509 blockp->len = 0;
510 blockp->source.ops = NULL;
511 blockp->source.arg = NULL;
512}