]>
Commit | Line | Data |
---|---|---|
e581fd72 HWN |
1 | /* |
2 | Copyright 2020 Google LLC | |
3 | ||
4 | Use of this source code is governed by a BSD-style | |
5 | license that can be found in the LICENSE file or at | |
6 | https://developers.google.com/open-source/licenses/bsd | |
7 | */ | |
8 | ||
9 | #include "block.h" | |
10 | ||
11 | #include "blocksource.h" | |
12 | #include "constants.h" | |
13 | #include "record.h" | |
14 | #include "reftable-error.h" | |
15 | #include "system.h" | |
16 | #include <zlib.h> | |
17 | ||
18 | int header_size(int version) | |
19 | { | |
20 | switch (version) { | |
21 | case 1: | |
22 | return 24; | |
23 | case 2: | |
24 | return 28; | |
25 | } | |
26 | abort(); | |
27 | } | |
28 | ||
29 | int footer_size(int version) | |
30 | { | |
31 | switch (version) { | |
32 | case 1: | |
33 | return 68; | |
34 | case 2: | |
35 | return 72; | |
36 | } | |
37 | abort(); | |
38 | } | |
39 | ||
40 | static int block_writer_register_restart(struct block_writer *w, int n, | |
41 | int is_restart, struct strbuf *key) | |
42 | { | |
43 | int rlen = w->restart_len; | |
44 | if (rlen >= MAX_RESTARTS) { | |
45 | is_restart = 0; | |
46 | } | |
47 | ||
48 | if (is_restart) { | |
49 | rlen++; | |
50 | } | |
51 | if (2 + 3 * rlen + n > w->block_size - w->next) | |
52 | return -1; | |
53 | if (is_restart) { | |
54 | if (w->restart_len == w->restart_cap) { | |
55 | w->restart_cap = w->restart_cap * 2 + 1; | |
56 | w->restarts = reftable_realloc( | |
57 | w->restarts, sizeof(uint32_t) * w->restart_cap); | |
58 | } | |
59 | ||
60 | w->restarts[w->restart_len++] = w->next; | |
61 | } | |
62 | ||
63 | w->next += n; | |
64 | ||
65 | strbuf_reset(&w->last_key); | |
66 | strbuf_addbuf(&w->last_key, key); | |
67 | w->entries++; | |
68 | return 0; | |
69 | } | |
70 | ||
71 | void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf, | |
72 | uint32_t block_size, uint32_t header_off, int hash_size) | |
73 | { | |
74 | bw->buf = buf; | |
75 | bw->hash_size = hash_size; | |
76 | bw->block_size = block_size; | |
77 | bw->header_off = header_off; | |
78 | bw->buf[header_off] = typ; | |
79 | bw->next = header_off + 4; | |
80 | bw->restart_interval = 16; | |
81 | bw->entries = 0; | |
82 | bw->restart_len = 0; | |
83 | bw->last_key.len = 0; | |
84 | } | |
85 | ||
86 | uint8_t block_writer_type(struct block_writer *bw) | |
87 | { | |
88 | return bw->buf[bw->header_off]; | |
89 | } | |
90 | ||
45c2fcc2 HWN |
91 | /* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on |
92 | success. Returns REFTABLE_API_ERROR if attempting to write a record with | |
93 | empty key. */ | |
e581fd72 HWN |
94 | int block_writer_add(struct block_writer *w, struct reftable_record *rec) |
95 | { | |
96 | struct strbuf empty = STRBUF_INIT; | |
97 | struct strbuf last = | |
98 | w->entries % w->restart_interval == 0 ? empty : w->last_key; | |
99 | struct string_view out = { | |
100 | .buf = w->buf + w->next, | |
101 | .len = w->block_size - w->next, | |
102 | }; | |
103 | ||
104 | struct string_view start = out; | |
105 | ||
106 | int is_restart = 0; | |
107 | struct strbuf key = STRBUF_INIT; | |
108 | int n = 0; | |
45c2fcc2 | 109 | int err = -1; |
e581fd72 HWN |
110 | |
111 | reftable_record_key(rec, &key); | |
45c2fcc2 HWN |
112 | if (!key.len) { |
113 | err = REFTABLE_API_ERROR; | |
114 | goto done; | |
115 | } | |
116 | ||
e581fd72 HWN |
117 | n = reftable_encode_key(&is_restart, out, last, key, |
118 | reftable_record_val_type(rec)); | |
119 | if (n < 0) | |
120 | goto done; | |
121 | string_view_consume(&out, n); | |
122 | ||
123 | n = reftable_record_encode(rec, out, w->hash_size); | |
124 | if (n < 0) | |
125 | goto done; | |
126 | string_view_consume(&out, n); | |
127 | ||
45c2fcc2 HWN |
128 | err = block_writer_register_restart(w, start.len - out.len, is_restart, |
129 | &key); | |
e581fd72 HWN |
130 | done: |
131 | strbuf_release(&key); | |
45c2fcc2 | 132 | return err; |
e581fd72 HWN |
133 | } |
134 | ||
135 | int block_writer_finish(struct block_writer *w) | |
136 | { | |
137 | int i; | |
138 | for (i = 0; i < w->restart_len; i++) { | |
139 | put_be24(w->buf + w->next, w->restarts[i]); | |
140 | w->next += 3; | |
141 | } | |
142 | ||
143 | put_be16(w->buf + w->next, w->restart_len); | |
144 | w->next += 2; | |
145 | put_be24(w->buf + 1 + w->header_off, w->next); | |
146 | ||
147 | if (block_writer_type(w) == BLOCK_TYPE_LOG) { | |
148 | int block_header_skip = 4 + w->header_off; | |
149 | uLongf src_len = w->next - block_header_skip; | |
150 | uLongf dest_cap = src_len * 1.001 + 12; | |
151 | ||
152 | uint8_t *compressed = reftable_malloc(dest_cap); | |
153 | while (1) { | |
154 | uLongf out_dest_len = dest_cap; | |
155 | int zresult = compress2(compressed, &out_dest_len, | |
156 | w->buf + block_header_skip, | |
157 | src_len, 9); | |
158 | if (zresult == Z_BUF_ERROR && dest_cap < LONG_MAX) { | |
159 | dest_cap *= 2; | |
160 | compressed = | |
161 | reftable_realloc(compressed, dest_cap); | |
162 | if (compressed) | |
163 | continue; | |
164 | } | |
165 | ||
166 | if (Z_OK != zresult) { | |
167 | reftable_free(compressed); | |
168 | return REFTABLE_ZLIB_ERROR; | |
169 | } | |
170 | ||
171 | memcpy(w->buf + block_header_skip, compressed, | |
172 | out_dest_len); | |
173 | w->next = out_dest_len + block_header_skip; | |
174 | reftable_free(compressed); | |
175 | break; | |
176 | } | |
177 | } | |
178 | return w->next; | |
179 | } | |
180 | ||
181 | uint8_t block_reader_type(struct block_reader *r) | |
182 | { | |
183 | return r->block.data[r->header_off]; | |
184 | } | |
185 | ||
186 | int block_reader_init(struct block_reader *br, struct reftable_block *block, | |
187 | uint32_t header_off, uint32_t table_block_size, | |
188 | int hash_size) | |
189 | { | |
190 | uint32_t full_block_size = table_block_size; | |
191 | uint8_t typ = block->data[header_off]; | |
192 | uint32_t sz = get_be24(block->data + header_off + 1); | |
24d4d38c | 193 | int err = 0; |
e581fd72 HWN |
194 | uint16_t restart_count = 0; |
195 | uint32_t restart_start = 0; | |
196 | uint8_t *restart_bytes = NULL; | |
24d4d38c | 197 | uint8_t *uncompressed = NULL; |
e581fd72 | 198 | |
24d4d38c HWN |
199 | if (!reftable_is_block_type(typ)) { |
200 | err = REFTABLE_FORMAT_ERROR; | |
201 | goto done; | |
202 | } | |
e581fd72 HWN |
203 | |
204 | if (typ == BLOCK_TYPE_LOG) { | |
205 | int block_header_skip = 4 + header_off; | |
206 | uLongf dst_len = sz - block_header_skip; /* total size of dest | |
207 | buffer. */ | |
208 | uLongf src_len = block->len - block_header_skip; | |
209 | /* Log blocks specify the *uncompressed* size in their header. | |
210 | */ | |
24d4d38c | 211 | uncompressed = reftable_malloc(sz); |
e581fd72 HWN |
212 | |
213 | /* Copy over the block header verbatim. It's not compressed. */ | |
214 | memcpy(uncompressed, block->data, block_header_skip); | |
215 | ||
216 | /* Uncompress */ | |
217 | if (Z_OK != | |
218 | uncompress2(uncompressed + block_header_skip, &dst_len, | |
219 | block->data + block_header_skip, &src_len)) { | |
24d4d38c HWN |
220 | err = REFTABLE_ZLIB_ERROR; |
221 | goto done; | |
e581fd72 HWN |
222 | } |
223 | ||
24d4d38c HWN |
224 | if (dst_len + block_header_skip != sz) { |
225 | err = REFTABLE_FORMAT_ERROR; | |
226 | goto done; | |
227 | } | |
e581fd72 HWN |
228 | |
229 | /* We're done with the input data. */ | |
230 | reftable_block_done(block); | |
231 | block->data = uncompressed; | |
24d4d38c | 232 | uncompressed = NULL; |
e581fd72 HWN |
233 | block->len = sz; |
234 | block->source = malloc_block_source(); | |
235 | full_block_size = src_len + block_header_skip; | |
236 | } else if (full_block_size == 0) { | |
237 | full_block_size = sz; | |
238 | } else if (sz < full_block_size && sz < block->len && | |
239 | block->data[sz] != 0) { | |
240 | /* If the block is smaller than the full block size, it is | |
241 | padded (data followed by '\0') or the next block is | |
242 | unaligned. */ | |
243 | full_block_size = sz; | |
244 | } | |
245 | ||
246 | restart_count = get_be16(block->data + sz - 2); | |
247 | restart_start = sz - 2 - 3 * restart_count; | |
248 | restart_bytes = block->data + restart_start; | |
249 | ||
250 | /* transfer ownership. */ | |
251 | br->block = *block; | |
252 | block->data = NULL; | |
253 | block->len = 0; | |
254 | ||
255 | br->hash_size = hash_size; | |
256 | br->block_len = restart_start; | |
257 | br->full_block_size = full_block_size; | |
258 | br->header_off = header_off; | |
259 | br->restart_count = restart_count; | |
260 | br->restart_bytes = restart_bytes; | |
261 | ||
24d4d38c HWN |
262 | done: |
263 | reftable_free(uncompressed); | |
264 | return err; | |
e581fd72 HWN |
265 | } |
266 | ||
267 | static uint32_t block_reader_restart_offset(struct block_reader *br, int i) | |
268 | { | |
269 | return get_be24(br->restart_bytes + 3 * i); | |
270 | } | |
271 | ||
272 | void block_reader_start(struct block_reader *br, struct block_iter *it) | |
273 | { | |
274 | it->br = br; | |
275 | strbuf_reset(&it->last_key); | |
276 | it->next_off = br->header_off + 4; | |
277 | } | |
278 | ||
279 | struct restart_find_args { | |
280 | int error; | |
281 | struct strbuf key; | |
282 | struct block_reader *r; | |
283 | }; | |
284 | ||
285 | static int restart_key_less(size_t idx, void *args) | |
286 | { | |
287 | struct restart_find_args *a = args; | |
288 | uint32_t off = block_reader_restart_offset(a->r, idx); | |
289 | struct string_view in = { | |
290 | .buf = a->r->block.data + off, | |
291 | .len = a->r->block_len - off, | |
292 | }; | |
293 | ||
294 | /* the restart key is verbatim in the block, so this could avoid the | |
295 | alloc for decoding the key */ | |
296 | struct strbuf rkey = STRBUF_INIT; | |
297 | struct strbuf last_key = STRBUF_INIT; | |
298 | uint8_t unused_extra; | |
299 | int n = reftable_decode_key(&rkey, &unused_extra, last_key, in); | |
300 | int result; | |
301 | if (n < 0) { | |
302 | a->error = 1; | |
303 | return -1; | |
304 | } | |
305 | ||
306 | result = strbuf_cmp(&a->key, &rkey); | |
307 | strbuf_release(&rkey); | |
308 | return result; | |
309 | } | |
310 | ||
311 | void block_iter_copy_from(struct block_iter *dest, struct block_iter *src) | |
312 | { | |
313 | dest->br = src->br; | |
314 | dest->next_off = src->next_off; | |
315 | strbuf_reset(&dest->last_key); | |
316 | strbuf_addbuf(&dest->last_key, &src->last_key); | |
317 | } | |
318 | ||
319 | int block_iter_next(struct block_iter *it, struct reftable_record *rec) | |
320 | { | |
321 | struct string_view in = { | |
322 | .buf = it->br->block.data + it->next_off, | |
323 | .len = it->br->block_len - it->next_off, | |
324 | }; | |
325 | struct string_view start = in; | |
326 | struct strbuf key = STRBUF_INIT; | |
327 | uint8_t extra = 0; | |
328 | int n = 0; | |
329 | ||
330 | if (it->next_off >= it->br->block_len) | |
331 | return 1; | |
332 | ||
333 | n = reftable_decode_key(&key, &extra, it->last_key, in); | |
334 | if (n < 0) | |
335 | return -1; | |
336 | ||
45c2fcc2 HWN |
337 | if (!key.len) |
338 | return REFTABLE_FORMAT_ERROR; | |
339 | ||
e581fd72 HWN |
340 | string_view_consume(&in, n); |
341 | n = reftable_record_decode(rec, key, extra, in, it->br->hash_size); | |
342 | if (n < 0) | |
343 | return -1; | |
344 | string_view_consume(&in, n); | |
345 | ||
346 | strbuf_reset(&it->last_key); | |
347 | strbuf_addbuf(&it->last_key, &key); | |
348 | it->next_off += start.len - in.len; | |
349 | strbuf_release(&key); | |
350 | return 0; | |
351 | } | |
352 | ||
353 | int block_reader_first_key(struct block_reader *br, struct strbuf *key) | |
354 | { | |
355 | struct strbuf empty = STRBUF_INIT; | |
356 | int off = br->header_off + 4; | |
357 | struct string_view in = { | |
358 | .buf = br->block.data + off, | |
359 | .len = br->block_len - off, | |
360 | }; | |
361 | ||
362 | uint8_t extra = 0; | |
363 | int n = reftable_decode_key(key, &extra, empty, in); | |
364 | if (n < 0) | |
365 | return n; | |
45c2fcc2 HWN |
366 | if (!key->len) |
367 | return REFTABLE_FORMAT_ERROR; | |
e581fd72 HWN |
368 | |
369 | return 0; | |
370 | } | |
371 | ||
372 | int block_iter_seek(struct block_iter *it, struct strbuf *want) | |
373 | { | |
374 | return block_reader_seek(it->br, it, want); | |
375 | } | |
376 | ||
377 | void block_iter_close(struct block_iter *it) | |
378 | { | |
379 | strbuf_release(&it->last_key); | |
380 | } | |
381 | ||
382 | int block_reader_seek(struct block_reader *br, struct block_iter *it, | |
383 | struct strbuf *want) | |
384 | { | |
385 | struct restart_find_args args = { | |
386 | .key = *want, | |
387 | .r = br, | |
388 | }; | |
389 | struct reftable_record rec = reftable_new_record(block_reader_type(br)); | |
390 | struct strbuf key = STRBUF_INIT; | |
391 | int err = 0; | |
392 | struct block_iter next = { | |
393 | .last_key = STRBUF_INIT, | |
394 | }; | |
395 | ||
396 | int i = binsearch(br->restart_count, &restart_key_less, &args); | |
397 | if (args.error) { | |
398 | err = REFTABLE_FORMAT_ERROR; | |
399 | goto done; | |
400 | } | |
401 | ||
402 | it->br = br; | |
403 | if (i > 0) { | |
404 | i--; | |
405 | it->next_off = block_reader_restart_offset(br, i); | |
406 | } else { | |
407 | it->next_off = br->header_off + 4; | |
408 | } | |
409 | ||
410 | /* We're looking for the last entry less/equal than the wanted key, so | |
411 | we have to go one entry too far and then back up. | |
412 | */ | |
413 | while (1) { | |
414 | block_iter_copy_from(&next, it); | |
415 | err = block_iter_next(&next, &rec); | |
416 | if (err < 0) | |
417 | goto done; | |
418 | ||
419 | reftable_record_key(&rec, &key); | |
420 | if (err > 0 || strbuf_cmp(&key, want) >= 0) { | |
421 | err = 0; | |
422 | goto done; | |
423 | } | |
424 | ||
425 | block_iter_copy_from(it, &next); | |
426 | } | |
427 | ||
428 | done: | |
429 | strbuf_release(&key); | |
430 | strbuf_release(&next.last_key); | |
66c0daba | 431 | reftable_record_release(&rec); |
e581fd72 HWN |
432 | |
433 | return err; | |
434 | } | |
435 | ||
436 | void block_writer_release(struct block_writer *bw) | |
437 | { | |
438 | FREE_AND_NULL(bw->restarts); | |
439 | strbuf_release(&bw->last_key); | |
440 | /* the block is not owned. */ | |
441 | } | |
442 | ||
443 | void reftable_block_done(struct reftable_block *blockp) | |
444 | { | |
445 | struct reftable_block_source source = blockp->source; | |
446 | if (blockp && source.ops) | |
447 | source.ops->return_block(source.arg, blockp); | |
448 | blockp->data = NULL; | |
449 | blockp->len = 0; | |
450 | blockp->source.ops = NULL; | |
451 | blockp->source.arg = NULL; | |
452 | } |