]> git.ipfire.org Git - thirdparty/git.git/commitdiff
builtin/repack: handle promisor packs with geometric repacking
authorPatrick Steinhardt <ps@pks.im>
Mon, 5 Jan 2026 13:16:45 +0000 (14:16 +0100)
committerJunio C Hamano <gitster@pobox.com>
Wed, 14 Jan 2026 14:29:24 +0000 (06:29 -0800)
When performing a fetch with an object filter, we mark the resulting
packfile as a promisor pack. An object part of such a pack may miss any
of its referenced objects, and Git knows to handle this case by fetching
any such missing objects from the promisor remote.

The "promisor" property needs to be retained going forward. So every
time we pack a promisor object, the resulting pack must be marked as a
promisor pack. git-repack(1) does this already: when a repository has a
promisor remote, it knows to pass "--exclude-promisor-objects" to the
git-pack-objects(1) child process. Promisor packs are written separately
when doing an all-into-one repack via `repack_promisor_objects()`.

But we don't support promisor objects when doing a geometric repack yet.
Promisor packs do not get any special treatment there, as we simply
merge promisor and non-promisor packs. The resulting pack is not even
marked as a promisor pack, which essentially corrupts the repository.

This corruption couldn't happen in the real world though: we pass both
"--exclude-promisor-objects" and "--stdin-packs" to git-pack-objects(1)
if a repository has a promisor remote, but as those options are mutually
exclusive we always end up dying. And while we made those flags
compatible with one another in a preceding commit, we still end up dying
in case git-pack-objects(1) is asked to repack a promisor pack.

There's multiple ways to fix this:

  - We can exclude promisor packs from the geometric progression
    altogether. This would have the consequence that we never repack
    promisor packs at all. But in a partial clone it is quite likely
    that the user generates a bunch of promisor packs over time, as
    every backfill fetch would create another one. So this doesn't
    really feel like a sensible option.

  - We can adapt git-pack-objects(1) to support repacking promisor packs
    and include them in the normal geometric progression. But this would
    mean that the set of promisor objects expands over time as the packs
    are merged with normal packs.

  - We can use a separate geometric progression to repack promisor
    packs.

The first two options both have significant downsides, so they aren't
really feasible. But the third option fixes both of these downsides: we
make sure that promisor packs get merged, and at the same time we never
expand the set of promisor objects beyond the set of objects that are
already marked as promisor objects.

Implement this strategy so that geometric repacking works in partial
clones.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/repack.c
repack-geometry.c
repack-promisor.c
repack.h
t/t7703-repack-geometric.sh

index d9012141f699c9174cf51d79542130ec541a8950..f6bb04bef7264eb5277d70898f6ab60cbac68dff 100644 (file)
@@ -332,6 +332,9 @@ int cmd_repack(int argc,
                    !(pack_everything & PACK_CRUFT))
                        strvec_push(&cmd.args, "--pack-loose-unreachable");
        } else if (geometry.split_factor) {
+               pack_geometry_repack_promisors(repo, &po_args, &geometry,
+                                              &names, packtmp);
+
                if (midx_must_contain_cruft)
                        strvec_push(&cmd.args, "--stdin-packs");
                else
index 0daf545a815c0f223fc7f8a9b7e24cc62a99d06a..7cebd0cb45f0eae36742e108faf3906aead9499a 100644 (file)
@@ -66,15 +66,25 @@ void pack_geometry_init(struct pack_geometry *geometry,
                if (p->is_cruft)
                        continue;
 
-               ALLOC_GROW(geometry->pack,
-                          geometry->pack_nr + 1,
-                          geometry->pack_alloc);
-
-               geometry->pack[geometry->pack_nr] = p;
-               geometry->pack_nr++;
+               if (p->pack_promisor) {
+                       ALLOC_GROW(geometry->promisor_pack,
+                                  geometry->promisor_pack_nr + 1,
+                                  geometry->promisor_pack_alloc);
+
+                       geometry->promisor_pack[geometry->promisor_pack_nr] = p;
+                       geometry->promisor_pack_nr++;
+               } else {
+                       ALLOC_GROW(geometry->pack,
+                                  geometry->pack_nr + 1,
+                                  geometry->pack_alloc);
+
+                       geometry->pack[geometry->pack_nr] = p;
+                       geometry->pack_nr++;
+               }
        }
 
        QSORT(geometry->pack, geometry->pack_nr, pack_geometry_cmp);
+       QSORT(geometry->promisor_pack, geometry->promisor_pack_nr, pack_geometry_cmp);
        strbuf_release(&buf);
 }
 
@@ -160,6 +170,9 @@ void pack_geometry_split(struct pack_geometry *geometry)
 {
        geometry->split = compute_pack_geometry_split(geometry->pack, geometry->pack_nr,
                                                      geometry->split_factor);
+       geometry->promisor_split = compute_pack_geometry_split(geometry->promisor_pack,
+                                                              geometry->promisor_pack_nr,
+                                                              geometry->split_factor);
 }
 
 struct packed_git *pack_geometry_preferred_pack(struct pack_geometry *geometry)
@@ -234,6 +247,8 @@ void pack_geometry_remove_redundant(struct pack_geometry *geometry,
 {
        remove_redundant_packs(geometry->pack, geometry->split,
                               names, existing, packdir);
+       remove_redundant_packs(geometry->promisor_pack, geometry->promisor_split,
+                              names, existing, packdir);
 }
 
 void pack_geometry_release(struct pack_geometry *geometry)
@@ -242,4 +257,5 @@ void pack_geometry_release(struct pack_geometry *geometry)
                return;
 
        free(geometry->pack);
+       free(geometry->promisor_pack);
 }
index 125038d92e25659743a4ea61d4798500af165e3f..73af57bce313768c57461ec35cf1829adafc518d 100644 (file)
@@ -109,3 +109,31 @@ void repack_promisor_objects(struct repository *repo,
 
        finish_repacking_promisor_objects(repo, &cmd, names, packtmp);
 }
+
+void pack_geometry_repack_promisors(struct repository *repo,
+                                   const struct pack_objects_args *args,
+                                   const struct pack_geometry *geometry,
+                                   struct string_list *names,
+                                   const char *packtmp)
+{
+       struct child_process cmd = CHILD_PROCESS_INIT;
+       FILE *in;
+
+       if (!geometry->promisor_split)
+               return;
+
+       prepare_pack_objects(&cmd, args, packtmp);
+       strvec_push(&cmd.args, "--stdin-packs");
+       cmd.in = -1;
+       if (start_command(&cmd))
+               die(_("could not start pack-objects to repack promisor packs"));
+
+       in = xfdopen(cmd.in, "w");
+       for (size_t i = 0; i < geometry->promisor_split; i++)
+               fprintf(in, "%s\n", pack_basename(geometry->promisor_pack[i]));
+       for (size_t i = geometry->promisor_split; i < geometry->promisor_pack_nr; i++)
+               fprintf(in, "^%s\n", pack_basename(geometry->promisor_pack[i]));
+       fclose(in);
+
+       finish_repacking_promisor_objects(repo, &cmd, names, packtmp);
+}
index 3a688a12eeb7aa97e813822177a3203587df6536..bc9f2e1a5de984028a7fcf1fa30873ab436eecde 100644 (file)
--- a/repack.h
+++ b/repack.h
@@ -103,9 +103,19 @@ struct pack_geometry {
        uint32_t pack_nr, pack_alloc;
        uint32_t split;
 
+       struct packed_git **promisor_pack;
+       uint32_t promisor_pack_nr, promisor_pack_alloc;
+       uint32_t promisor_split;
+
        int split_factor;
 };
 
+void pack_geometry_repack_promisors(struct repository *repo,
+                                   const struct pack_objects_args *args,
+                                   const struct pack_geometry *geometry,
+                                   struct string_list *names,
+                                   const char *packtmp);
+
 void pack_geometry_init(struct pack_geometry *geometry,
                        struct existing_packs *existing,
                        const struct pack_objects_args *args);
index 98806cdb6fe9b78775bdf5dc83cf22dd71555c08..04d5d8fc33e9e50e120ccc1d2fa4b13e738c7727 100755 (executable)
@@ -480,4 +480,65 @@ test_expect_success '--geometric -l disables writing bitmaps with non-local pack
        test_path_is_file member/.git/objects/pack/multi-pack-index-*.bitmap
 '
 
+write_packfile () {
+       NR="$1"
+       PREFIX="$2"
+
+       printf "blob\ndata <<EOB\n$PREFIX %s\nEOB\n" $(test_seq $NR) |
+               git fast-import &&
+       git pack-objects --pack-loose-unreachable .git/objects/pack/pack &&
+       git prune-packed
+}
+
+write_promisor_packfile () {
+       PACKFILE=$(write_packfile "$@") &&
+       touch .git/objects/pack/pack-$PACKFILE.promisor &&
+       echo "$PACKFILE"
+}
+
+test_expect_success 'geometric repack works with promisor packs' '
+       test_when_finished "rm -fr repo" &&
+       git init repo &&
+       (
+               cd repo &&
+               git config set maintenance.auto false &&
+               git remote add promisor garbage &&
+               git config set remote.promisor.promisor true &&
+
+               # Packs A and B need to be merged.
+               NORMAL_A=$(write_packfile 2 normal-a) &&
+               NORMAL_B=$(write_packfile 2 normal-b) &&
+               NORMAL_C=$(write_packfile 14 normal-c) &&
+
+               # Packs A, B and C need to be merged.
+               PROMISOR_A=$(write_promisor_packfile 1 promisor-a) &&
+               PROMISOR_B=$(write_promisor_packfile 3 promisor-b) &&
+               PROMISOR_C=$(write_promisor_packfile 3 promisor-c) &&
+               PROMISOR_D=$(write_promisor_packfile 20 promisor-d) &&
+               PROMISOR_E=$(write_promisor_packfile 40 promisor-e) &&
+
+               git cat-file --batch-all-objects --batch-check="%(objectname)" >objects-expect &&
+
+               ls .git/objects/pack/*.pack >packs-before &&
+               test_line_count = 8 packs-before &&
+               git repack --geometric=2 -d &&
+               ls .git/objects/pack/*.pack >packs-after &&
+               test_line_count = 5 packs-after &&
+               test_grep ! "$NORMAL_A" packs-after &&
+               test_grep ! "$NORMAL_B" packs-after &&
+               test_grep "$NORMAL_C" packs-after &&
+               test_grep ! "$PROMISOR_A" packs-after &&
+               test_grep ! "$PROMISOR_B" packs-after &&
+               test_grep ! "$PROMISOR_C" packs-after &&
+               test_grep "$PROMISOR_D" packs-after &&
+               test_grep "$PROMISOR_E" packs-after &&
+
+               ls .git/objects/pack/*.promisor >promisors &&
+               test_line_count = 3 promisors &&
+
+               git cat-file --batch-all-objects --batch-check="%(objectname)" >objects-actual &&
+               test_cmp objects-expect objects-actual
+       )
+'
+
 test_done