]> git.ipfire.org Git - thirdparty/git.git/commitdiff
midx-write.c: introduce `midx_pack_perm()` helper
authorTaylor Blau <me@ttaylorr.com>
Sat, 6 Dec 2025 20:31:31 +0000 (15:31 -0500)
committerJunio C Hamano <gitster@pobox.com>
Sat, 6 Dec 2025 22:38:08 +0000 (07:38 +0900)
The `ctx->pack_perm` array can be considered as a permutation between
the original `pack_int_id` of some given pack to its position in the
`ctx->info` array containing all packs.

Today we can always index into this array with any known `pack_int_id`,
since there is never a `pack_int_id` which is greater than or equal to
the value `ctx->nr`.

That is not necessarily the case with MIDX compaction. For example,
suppose we have a MIDX chain with three layers, each containing three
packs. The base of the MIDX chain will have packs with IDs 0, 1, and 2,
the next layer 3, 4, and 5, and so on. If we are compacting the topmost
two layers, we'll have input `pack_int_id` values between [3, 8], but
`ctx->nr` will only be 6.

In that example, if we want to know where the pack whose original
`pack_int_id` value was, say, 7, we would compute `ctx->pack_perm[7]`,
leading to an uninitialized read, since there are only 6 entries
allocated in that array.

To address this, there are a couple of options:

 - We could allocate enough entries in `ctx->pack_perm` to accommodate
   the largest `orig_pack_int_id` value.

 - Or, we could internally shift the input values by the number of packs
   in the base layer of the lower end of the MIDX compaction range.

This patch prepare us to take the latter approach, since it does not
allocate more memory than strictly necessary. (In our above example, the
base of the lower end of the compaction range is the first MIDX layer
(having three packs), so we would end up indexing `ctx->pack_perm[7-3]`,
which is a valid read.)

Note that this patch does not actually implement that approach yet, but
merely performs a behavior-preserving refactoring which will make the
change easier to carry out in the future.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
midx-write.c

index 55342fcb6dd7162fa46c563e829d63f20aed6be1..4a1a16431a66b4384eb3e3be5dd6a425da58894e 100644 (file)
@@ -114,6 +114,12 @@ struct write_midx_context {
        struct odb_source *source;
 };
 
+static uint32_t midx_pack_perm(struct write_midx_context *ctx,
+                              uint32_t orig_pack_int_id)
+{
+       return ctx->pack_perm[orig_pack_int_id];
+}
+
 static int should_include_pack(const struct write_midx_context *ctx,
                               const char *file_name)
 {
@@ -509,12 +515,12 @@ static int write_midx_object_offsets(struct hashfile *f,
        for (i = 0; i < ctx->entries_nr; i++) {
                struct pack_midx_entry *obj = list++;
 
-               if (ctx->pack_perm[obj->pack_int_id] == PACK_EXPIRED)
+               if (midx_pack_perm(ctx, obj->pack_int_id) == PACK_EXPIRED)
                        BUG("object %s is in an expired pack with int-id %d",
                            oid_to_hex(&obj->oid),
                            obj->pack_int_id);
 
-               hashwrite_be32(f, ctx->pack_perm[obj->pack_int_id]);
+               hashwrite_be32(f, midx_pack_perm(ctx, obj->pack_int_id));
 
                if (ctx->large_offsets_needed && obj->offset >> 31)
                        hashwrite_be32(f, MIDX_LARGE_OFFSET_NEEDED | nr_large_offset++);
@@ -615,7 +621,7 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx)
        for (i = 0; i < ctx->entries_nr; i++) {
                struct pack_midx_entry *e = &ctx->entries[i];
                data[i].nr = i;
-               data[i].pack = ctx->pack_perm[e->pack_int_id];
+               data[i].pack = midx_pack_perm(ctx, e->pack_int_id);
                if (!e->preferred)
                        data[i].pack |= (1U << 31);
                data[i].offset = e->offset;
@@ -625,7 +631,7 @@ static uint32_t *midx_pack_order(struct write_midx_context *ctx)
 
        for (i = 0; i < ctx->entries_nr; i++) {
                struct pack_midx_entry *e = &ctx->entries[data[i].nr];
-               struct pack_info *pack = &ctx->info[ctx->pack_perm[e->pack_int_id]];
+               struct pack_info *pack = &ctx->info[midx_pack_perm(ctx, e->pack_int_id)];
                if (pack->bitmap_pos == BITMAP_POS_UNKNOWN)
                        pack->bitmap_pos = i + base_objects;
                pack->bitmap_nr++;
@@ -686,7 +692,7 @@ static void prepare_midx_packing_data(struct packing_data *pdata,
                struct object_entry *to = packlist_alloc(pdata, &from->oid);
 
                oe_set_in_pack(pdata, to,
-                              ctx->info[ctx->pack_perm[from->pack_int_id]].p);
+                              ctx->info[midx_pack_perm(ctx, from->pack_int_id)].p);
        }
 
        trace2_region_leave("midx", "prepare_midx_packing_data", ctx->repo);
@@ -1285,7 +1291,7 @@ static int write_midx_internal(struct write_midx_opts *opts)
                                                      sizeof(*ctx.info),
                                                      idx_or_pack_name_cmp);
                if (preferred) {
-                       uint32_t perm = ctx.pack_perm[preferred->orig_pack_int_id];
+                       uint32_t perm = midx_pack_perm(&ctx, preferred->orig_pack_int_id);
                        if (perm == PACK_EXPIRED)
                                warning(_("preferred pack '%s' is expired"),
                                        opts->preferred_pack_name);