]> git.ipfire.org Git - thirdparty/git.git/commitdiff
reftable/writer: fix index corruption when writing multiple indices
authorPatrick Steinhardt <ps@pks.im>
Wed, 3 Jan 2024 06:22:21 +0000 (07:22 +0100)
committerJunio C Hamano <gitster@pobox.com>
Wed, 3 Jan 2024 17:54:20 +0000 (09:54 -0800)
Each reftable may contain multiple types of blocks for refs, objects and
reflog records, where each of these may have an index that makes it more
efficient to find the records. It was observed that the index for log
records can become corrupted under certain circumstances, where the
first entry of the index points into the object index instead of to the
log records.

As it turns out, this corruption can occur whenever we write a log index
as well as at least one additional index. Writing records and their index
is basically a two-step process:

  1. We write all blocks for the corresponding record. Each block that
     gets written is added to a list of blocks to index.

  2. Once all blocks were written we finish the section. If at least two
     blocks have been added to the list of blocks to index then we will
     now write the index for those blocks and flush it, as well.

When we have a very large number of blocks then we may decide to write a
multi-level index, which is why we also keep track of the list of the
index blocks in the same way as we previously kept track of the blocks
to index.

Now when we have finished writing all index blocks we clear the index
and flush the last block to disk. This is done in the wrong order though
because flushing the block to disk will re-add it to the list of blocks
to be indexed. The result is that the next section we are about to write
will have an entry in the list of blocks to index that points to the
last block of the preceding section's index, which will corrupt the log
index.

Fix this corruption by clearing the index after having written the last
block.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
reftable/readwrite_test.c
reftable/writer.c

index 278663f22d79ab602552a6e24ea1d252cbb2d4ea..9c16e0504ed0f868db6ce6d00fa5ac7061a34a99 100644 (file)
@@ -798,6 +798,85 @@ static void test_write_key_order(void)
        strbuf_release(&buf);
 }
 
+static void test_write_multiple_indices(void)
+{
+       struct reftable_write_options opts = {
+               .block_size = 100,
+       };
+       struct strbuf writer_buf = STRBUF_INIT, buf = STRBUF_INIT;
+       struct reftable_block_source source = { 0 };
+       struct reftable_iterator it = { 0 };
+       const struct reftable_stats *stats;
+       struct reftable_writer *writer;
+       struct reftable_reader *reader;
+       int err, i;
+
+       writer = reftable_new_writer(&strbuf_add_void, &writer_buf, &opts);
+       reftable_writer_set_limits(writer, 1, 1);
+       for (i = 0; i < 100; i++) {
+               unsigned char hash[GIT_SHA1_RAWSZ] = {i};
+               struct reftable_ref_record ref = {
+                       .update_index = 1,
+                       .value_type = REFTABLE_REF_VAL1,
+                       .value.val1 = hash,
+               };
+
+               strbuf_reset(&buf);
+               strbuf_addf(&buf, "refs/heads/%04d", i);
+               ref.refname = buf.buf,
+
+               err = reftable_writer_add_ref(writer, &ref);
+               EXPECT_ERR(err);
+       }
+
+       for (i = 0; i < 100; i++) {
+               unsigned char hash[GIT_SHA1_RAWSZ] = {i};
+               struct reftable_log_record log = {
+                       .update_index = 1,
+                       .value_type = REFTABLE_LOG_UPDATE,
+                       .value.update = {
+                               .old_hash = hash,
+                               .new_hash = hash,
+                       },
+               };
+
+               strbuf_reset(&buf);
+               strbuf_addf(&buf, "refs/heads/%04d", i);
+               log.refname = buf.buf,
+
+               err = reftable_writer_add_log(writer, &log);
+               EXPECT_ERR(err);
+       }
+
+       reftable_writer_close(writer);
+
+       /*
+        * The written data should be sufficiently large to result in indices
+        * for each of the block types.
+        */
+       stats = reftable_writer_stats(writer);
+       EXPECT(stats->ref_stats.index_offset > 0);
+       EXPECT(stats->obj_stats.index_offset > 0);
+       EXPECT(stats->log_stats.index_offset > 0);
+
+       block_source_from_strbuf(&source, &writer_buf);
+       err = reftable_new_reader(&reader, &source, "filename");
+       EXPECT_ERR(err);
+
+       /*
+        * Seeking the log uses the log index now. In case there is any
+        * confusion regarding indices we would notice here.
+        */
+       err = reftable_reader_seek_log(reader, &it, "");
+       EXPECT_ERR(err);
+
+       reftable_iterator_destroy(&it);
+       reftable_writer_free(writer);
+       reftable_reader_free(reader);
+       strbuf_release(&writer_buf);
+       strbuf_release(&buf);
+}
+
 static void test_corrupt_table_empty(void)
 {
        struct strbuf buf = STRBUF_INIT;
@@ -847,5 +926,6 @@ int readwrite_test_main(int argc, const char *argv[])
        RUN_TEST(test_log_overflow);
        RUN_TEST(test_write_object_id_length);
        RUN_TEST(test_write_object_id_min_length);
+       RUN_TEST(test_write_multiple_indices);
        return 0;
 }
index 2e322a5683d081eea60fc85dfc4d792ce89b7a93..ee4590e20f84dd442fde4f7506e27e568f9ad509 100644 (file)
@@ -432,12 +432,12 @@ static int writer_finish_section(struct reftable_writer *w)
                reftable_free(idx);
        }
 
-       writer_clear_index(w);
-
        err = writer_flush_block(w);
        if (err < 0)
                return err;
 
+       writer_clear_index(w);
+
        bstats = writer_reftable_block_stats(w, typ);
        bstats->index_blocks = w->stats.idx_stats.blocks - before_blocks;
        bstats->index_offset = index_start;