]> git.ipfire.org Git - thirdparty/git.git/commitdiff
builtin/repack.c: make largest pack preferred
authorTaylor Blau <me@ttaylorr.com>
Wed, 29 Sep 2021 01:55:20 +0000 (21:55 -0400)
committerJunio C Hamano <gitster@pobox.com>
Wed, 29 Sep 2021 04:20:56 +0000 (21:20 -0700)
When repacking into a geometric series and writing a multi-pack bitmap,
it is beneficial to have the largest resulting pack be the preferred
object source in the bitmap's MIDX, since selecting the large packs can
lead to fewer broken delta chains and better compression.

Teach 'git repack' to identify this pack and pass it to the MIDX write
machinery in order to mark it as preferred.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-repack.txt
builtin/repack.c
pack-bitmap.c
pack-bitmap.h
t/helper/test-read-midx.c
t/t7703-repack-geometric.sh

index 0f2d235ca559929f092eb3969d5a48cfcb13630d..7183fb498f4ccec69e6bd75dcef44f54974a159b 100644 (file)
@@ -190,6 +190,10 @@ this "roll-up", without respect to their reachability. This is subject
 to change in the future. This option (implying a drastically different
 repack mode) is not guaranteed to work with all other combinations of
 option to `git repack`.
++
+When writing a multi-pack bitmap, `git repack` selects the largest resulting
+pack as the preferred pack for object selection by the MIDX (see
+linkgit:git-multi-pack-index[1]).
 
 -m::
 --write-midx::
index dbbb14b3b4b1ca30fb0c53ce666ea4aa728830ac..f940e2bafe42b40e654a788ff765fc57d3f3a421 100644 (file)
@@ -423,6 +423,25 @@ static void split_pack_geometry(struct pack_geometry *geometry, int factor)
        geometry->split = split;
 }
 
+static struct packed_git *get_largest_active_pack(struct pack_geometry *geometry)
+{
+       if (!geometry) {
+               /*
+                * No geometry means either an all-into-one repack (in which
+                * case there is only one pack left and it is the largest) or an
+                * incremental one.
+                *
+                * If repacking incrementally, then we could check the size of
+                * all packs to determine which should be preferred, but leave
+                * this for later.
+                */
+               return NULL;
+       }
+       if (geometry->split == geometry->pack_nr)
+               return NULL;
+       return geometry->pack[geometry->pack_nr - 1];
+}
+
 static void clear_pack_geometry(struct pack_geometry *geometry)
 {
        if (!geometry)
@@ -468,10 +487,12 @@ static void midx_included_packs(struct string_list *include,
 }
 
 static int write_midx_included_packs(struct string_list *include,
+                                    struct pack_geometry *geometry,
                                     int show_progress, int write_bitmaps)
 {
        struct child_process cmd = CHILD_PROCESS_INIT;
        struct string_list_item *item;
+       struct packed_git *largest = get_largest_active_pack(geometry);
        FILE *in;
        int ret;
 
@@ -492,6 +513,10 @@ static int write_midx_included_packs(struct string_list *include,
        if (write_bitmaps)
                strvec_push(&cmd.args, "--bitmap");
 
+       if (largest)
+               strvec_pushf(&cmd.args, "--preferred-pack=%s",
+                            pack_basename(largest));
+
        ret = start_command(&cmd);
        if (ret)
                return ret;
@@ -783,7 +808,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
                midx_included_packs(&include, &existing_nonkept_packs,
                                    &existing_kept_packs, &names, geometry);
 
-               ret = write_midx_included_packs(&include,
+               ret = write_midx_included_packs(&include, geometry,
                                                show_progress, write_bitmaps > 0);
 
                string_list_clear(&include, 0);
index 8504110a4daa9013304bc545bc529d6f8c3a8446..67be9be9a629f173c3fd38dd01ba9f9a01388dae 100644 (file)
@@ -1418,7 +1418,7 @@ static int try_partial_reuse(struct packed_git *pack,
        return 0;
 }
 
-static uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git)
+uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git)
 {
        struct multi_pack_index *m = bitmap_git->midx;
        if (!m)
index 469090bad2cf170670edc7e6a9ea66a51cc25482..7d407c5a4c4437e83fa8a8507b07563b2d3f1731 100644 (file)
@@ -55,6 +55,7 @@ int test_bitmap_commits(struct repository *r);
 struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
                                         struct list_objects_filter_options *filter,
                                         int filter_provided_objects);
+uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git);
 int reuse_partial_packfile_from_bitmap(struct bitmap_index *,
                                       struct packed_git **packfile,
                                       uint32_t *entries,
index cb0d27049a07e1dcd886bbe6aaede26176a54ee2..00385591297567154f817c5a3f1fdeb0dd5c4b9a 100644 (file)
@@ -3,6 +3,7 @@
 #include "midx.h"
 #include "repository.h"
 #include "object-store.h"
+#include "pack-bitmap.h"
 
 static int read_midx_file(const char *object_dir, int show_objects)
 {
@@ -72,14 +73,36 @@ static int read_midx_checksum(const char *object_dir)
        return 0;
 }
 
+static int read_midx_preferred_pack(const char *object_dir)
+{
+       struct multi_pack_index *midx = NULL;
+       struct bitmap_index *bitmap = NULL;
+
+       setup_git_directory();
+
+       midx = load_multi_pack_index(object_dir, 1);
+       if (!midx)
+               return 1;
+
+       bitmap = prepare_bitmap_git(the_repository);
+       if (!(bitmap && bitmap_is_midx(bitmap)))
+               return 1;
+
+
+       printf("%s\n", midx->pack_names[midx_preferred_pack(bitmap)]);
+       return 0;
+}
+
 int cmd__read_midx(int argc, const char **argv)
 {
        if (!(argc == 2 || argc == 3))
-               usage("read-midx [--show-objects|--checksum] <object-dir>");
+               usage("read-midx [--show-objects|--checksum|--preferred-pack] <object-dir>");
 
        if (!strcmp(argv[1], "--show-objects"))
                return read_midx_file(argv[2], 1);
        else if (!strcmp(argv[1], "--checksum"))
                return read_midx_checksum(argv[2]);
+       else if (!strcmp(argv[1], "--preferred-pack"))
+               return read_midx_preferred_pack(argv[2]);
        return read_midx_file(argv[1], 0);
 }
index 67049f763762ee70945b81b6cfcdb923372ebb6a..bdbbcbf1eca88fef1c2dcd6632d880d3848c842d 100755 (executable)
@@ -180,4 +180,26 @@ test_expect_success '--geometric ignores kept packs' '
        )
 '
 
+test_expect_success '--geometric chooses largest MIDX preferred pack' '
+       git init geometric &&
+       test_when_finished "rm -fr geometric" &&
+       (
+               cd geometric &&
+
+               # These packs already form a geometric progression.
+               test_commit_bulk --start=1 1 && # 3 objects
+               test_commit_bulk --start=2 2 && # 6 objects
+               ls $objdir/pack/pack-*.idx >before &&
+               test_commit_bulk --start=4 4 && # 12 objects
+               ls $objdir/pack/pack-*.idx >after &&
+
+               git repack --geometric 2 -dbm &&
+
+               comm -3 before after | xargs -n 1 basename >expect &&
+               test-tool read-midx --preferred-pack $objdir >actual &&
+
+               test_cmp expect actual
+       )
+'
+
 test_done