]> git.ipfire.org Git - thirdparty/git.git/commitdiff
reftable/block: open-code call to `uncompress2()`
authorPatrick Steinhardt <ps@pks.im>
Mon, 8 Apr 2024 12:16:59 +0000 (14:16 +0200)
committerJunio C Hamano <gitster@pobox.com>
Mon, 15 Apr 2024 17:36:09 +0000 (10:36 -0700)
The reftable format stores log blocks in a compressed format. Thus,
whenever we want to read such a block we first need to decompress it.
This is done by calling the convenience function `uncompress2()` of the
zlib library, which is a simple wrapper that manages the lifecycle of
the `zstream` structure for us.

While nice for one-off inflation of data, when iterating through reflogs
we will likely end up inflating many such log blocks. This requires us
to reallocate the state of the `zstream` every single time, which adds
up over time. It would thus be great to reuse the `zstream` instead of
discarding it after every inflation.

Open-code the call to `uncompress2()` such that we can start reusing the
`zstream` in the subsequent commit. Note that our open-coded variant is
different from `uncompress2()` in two ways:

  - We do not loop around `inflate()` until we have processed all input.
    As our input is limited by the maximum block size, which is 16MB, we
    should not hit limits of `inflate()`.

  - We use `Z_FINISH` instead of `Z_NO_FLUSH`. Quoting the `inflate()`
    documentation: "inflate() should normally be called until it returns
    Z_STREAM_END or an error. However if all decompression is to be
    performed in a single step (a single call of inflate), the parameter
    flush should be set to Z_FINISH."

    Furthermore, "Z_FINISH also informs inflate to not maintain a
    sliding window if the stream completes, which reduces inflate's
    memory footprint."

Other than that this commit is expected to be functionally equivalent
and does not yet reuse the `zstream`.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
reftable/block.c

index 9460273290f28348ff1477108a50d8ed924dd24c..435922b569ccca9ef7d4a17931b970110796638b 100644 (file)
@@ -195,10 +195,10 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
        }
 
        if (typ == BLOCK_TYPE_LOG) {
-               int block_header_skip = 4 + header_off;
-               uLongf dst_len = sz - block_header_skip; /* total size of dest
-                                                           buffer. */
-               uLongf src_len = block->len - block_header_skip;
+               uint32_t block_header_skip = 4 + header_off;
+               uLong dst_len = sz - block_header_skip;
+               uLong src_len = block->len - block_header_skip;
+               z_stream stream = {0};
 
                /* Log blocks specify the *uncompressed* size in their header. */
                REFTABLE_ALLOC_GROW(br->uncompressed_data, sz,
@@ -207,15 +207,33 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
                /* Copy over the block header verbatim. It's not compressed. */
                memcpy(br->uncompressed_data, block->data, block_header_skip);
 
-               /* Uncompress */
-               if (Z_OK !=
-                   uncompress2(br->uncompressed_data + block_header_skip, &dst_len,
-                               block->data + block_header_skip, &src_len)) {
+               err = inflateInit(&stream);
+               if (err != Z_OK) {
                        err = REFTABLE_ZLIB_ERROR;
                        goto done;
                }
 
-               if (dst_len + block_header_skip != sz) {
+               stream.next_in = block->data + block_header_skip;
+               stream.avail_in = src_len;
+               stream.next_out = br->uncompressed_data + block_header_skip;
+               stream.avail_out = dst_len;
+
+               /*
+                * We know both input as well as output size, and we know that
+                * the sizes should never be bigger than `uInt_MAX` because
+                * blocks can at most be 16MB large. We can thus use `Z_FINISH`
+                * here to instruct zlib to inflate the data in one go, which
+                * is more efficient than using `Z_NO_FLUSH`.
+                */
+               err = inflate(&stream, Z_FINISH);
+               inflateEnd(&stream);
+               if (err != Z_STREAM_END) {
+                       err = REFTABLE_ZLIB_ERROR;
+                       goto done;
+               }
+               err = 0;
+
+               if (stream.total_out + block_header_skip != sz) {
                        err = REFTABLE_FORMAT_ERROR;
                        goto done;
                }
@@ -224,7 +242,7 @@ int block_reader_init(struct block_reader *br, struct reftable_block *block,
                reftable_block_done(block);
                block->data = br->uncompressed_data;
                block->len = sz;
-               full_block_size = src_len + block_header_skip;
+               full_block_size = src_len + block_header_skip - stream.avail_in;
        } else if (full_block_size == 0) {
                full_block_size = sz;
        } else if (sz < full_block_size && sz < block->len &&