]> git.ipfire.org Git - thirdparty/git.git/commitdiff
pack-objects: refactor path-walk delta phase
authorDerrick Stolee <stolee@gmail.com>
Fri, 16 May 2025 18:12:00 +0000 (18:12 +0000)
committerJunio C Hamano <gitster@pobox.com>
Fri, 16 May 2025 19:15:40 +0000 (12:15 -0700)
Previously, the --path-walk option to 'git pack-objects' would compute
deltas inline with the path-walk logic. This would make the progress
indicator look like it is taking a long time to enumerate objects, and
then very quickly computed deltas.

Instead of computing deltas on each region of objects organized by tree,
store a list of regions corresponding to these groups. These can later
be pulled from the list for delta compression before doing the "global"
delta search.

This presents a new progress indicator that can be used in tests to
verify that this stage is happening.

The current implementation is not integrated with threads, but we are
setting it up to arrive in the next change.

Since we do not attempt to sort objects by size until after exploring
all trees, we can remove the previous change to t5530 due to a different
error message appearing first.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
builtin/pack-objects.c
pack-objects.h
t/t5300-pack-object.sh
t/t5530-upload-pack-error.sh

index bdd20c074a9b50ac642ad7a3cf7762e3ce4ea561..c7bf3fbc02676bbdcfc8e729f523376efea46f29 100644 (file)
@@ -3237,6 +3237,51 @@ static int should_attempt_deltas(struct object_entry *entry)
        return 1;
 }
 
+static void find_deltas_for_region(struct object_entry *list,
+                                  struct packing_region *region,
+                                  unsigned int *processed)
+{
+       struct object_entry **delta_list;
+       unsigned int delta_list_nr = 0;
+
+       ALLOC_ARRAY(delta_list, region->nr);
+       for (size_t i = 0; i < region->nr; i++) {
+               struct object_entry *entry = list + region->start + i;
+               if (should_attempt_deltas(entry))
+                       delta_list[delta_list_nr++] = entry;
+       }
+
+       QSORT(delta_list, delta_list_nr, type_size_sort);
+       find_deltas(delta_list, &delta_list_nr, window, depth, processed);
+       free(delta_list);
+}
+
+static void find_deltas_by_region(struct object_entry *list,
+                                 struct packing_region *regions,
+                                 size_t start, size_t nr)
+{
+       unsigned int processed = 0;
+       size_t progress_nr;
+
+       if (!nr)
+               return;
+
+       progress_nr = regions[nr - 1].start + regions[nr - 1].nr;
+
+       if (progress)
+               progress_state = start_progress(the_repository,
+                                               _("Compressing objects by path"),
+                                               progress_nr);
+
+       while (nr--)
+               find_deltas_for_region(list,
+                                      &regions[start++],
+                                      &processed);
+
+       display_progress(progress_state, progress_nr);
+       stop_progress(&progress_state);
+}
+
 static void prepare_pack(int window, int depth)
 {
        struct object_entry **delta_list;
@@ -3261,6 +3306,10 @@ static void prepare_pack(int window, int depth)
        if (!to_pack.nr_objects || !window || !depth)
                return;
 
+       if (path_walk)
+               find_deltas_by_region(to_pack.objects, to_pack.regions,
+                                     0, to_pack.nr_regions);
+
        ALLOC_ARRAY(delta_list, to_pack.nr_objects);
        nr_deltas = n = 0;
 
@@ -4214,10 +4263,8 @@ static int add_objects_by_path(const char *path,
                               enum object_type type,
                               void *data)
 {
-       struct object_entry **delta_list = NULL;
        size_t oe_start = to_pack.nr_objects;
        size_t oe_end;
-       unsigned int sub_list_nr;
        unsigned int *processed = data;
 
        /*
@@ -4250,33 +4297,17 @@ static int add_objects_by_path(const char *path,
        if (oe_end == oe_start || !window)
                return 0;
 
-       sub_list_nr = 0;
-       if (oe_end > oe_start)
-               ALLOC_ARRAY(delta_list, oe_end - oe_start);
+       ALLOC_GROW(to_pack.regions,
+                  to_pack.nr_regions + 1,
+                  to_pack.nr_regions_alloc);
 
-       for (size_t i = 0; i < oe_end - oe_start; i++) {
-               struct object_entry *entry = to_pack.objects + oe_start + i;
+       to_pack.regions[to_pack.nr_regions].start = oe_start;
+       to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start;
+       to_pack.nr_regions++;
 
-               if (!should_attempt_deltas(entry))
-                       continue;
+       *processed += oids->nr;
+       display_progress(progress_state, *processed);
 
-               delta_list[sub_list_nr++] = entry;
-       }
-
-       /*
-        * Find delta bases among this list of objects that all match the same
-        * path. This causes the delta compression to be interleaved in the
-        * object walk, which can lead to confusing progress indicators. This is
-        * also incompatible with threaded delta calculations. In the future,
-        * consider creating a list of regions in the full to_pack.objects array
-        * that could be picked up by the threaded delta computation.
-        */
-       if (sub_list_nr && window) {
-               QSORT(delta_list, sub_list_nr, type_size_sort);
-               find_deltas(delta_list, &sub_list_nr, window, depth, processed);
-       }
-
-       free(delta_list);
        return 0;
 }
 
index d73e3843c92e9c52a29ff90c66a477c9288c6eff..51e1ff6b95bf23b4bdade6111259a000f2948ed6 100644 (file)
@@ -119,11 +119,23 @@ struct object_entry {
        unsigned ext_base:1; /* delta_idx points outside packlist */
 };
 
+/**
+ * A packing region is a section of the packing_data.objects array
+ * as given by a starting index and a number of elements.
+ */
+struct packing_region {
+       size_t start;
+       size_t nr;
+};
+
 struct packing_data {
        struct repository *repo;
        struct object_entry *objects;
        uint32_t nr_objects, nr_alloc;
 
+       struct packing_region *regions;
+       size_t nr_regions, nr_regions_alloc;
+
        int32_t *index;
        uint32_t index_size;
 
index 16420d1286398a5a90e185eb7b4a2e41ff388f55..c8df6afd7844682a0b59cdf2d8cc1fc9d3b46ef3 100755 (executable)
@@ -725,7 +725,9 @@ test_expect_success '--name-hash-version=2 and --write-bitmap-index are incompat
 
 test_expect_success '--path-walk pack everything' '
        git -C server rev-parse HEAD >in &&
-       git -C server pack-objects --stdout --revs --path-walk <in >out.pack &&
+       GIT_PROGRESS_DELAY=0 git -C server pack-objects \
+               --stdout --revs --path-walk --progress <in >out.pack 2>err &&
+       grep "Compressing objects by path" err &&
        git -C server index-pack --stdin <out.pack
 '
 
@@ -734,7 +736,9 @@ test_expect_success '--path-walk thin pack' '
        $(git -C server rev-parse HEAD)
        ^$(git -C server rev-parse HEAD~2)
        EOF
-       git -C server pack-objects --thin --stdout --revs --path-walk <in >out.pack &&
+       GIT_PROGRESS_DELAY=0 git -C server pack-objects \
+               --thin --stdout --revs --path-walk --progress <in >out.pack 2>err &&
+       grep "Compressing objects by path" err &&
        git -C server index-pack --fix-thin --stdin <out.pack
 '
 
index 8eb6fea839a63f514c1583e755d7d87e6435271b..558eedf25a4c9b9f2bac8dca0d69b1b7eaebf381 100755 (executable)
@@ -34,12 +34,6 @@ test_expect_success 'upload-pack fails due to error in pack-objects packing' '
        hexsz=$(test_oid hexsz) &&
        printf "%04xwant %s\n00000009done\n0000" \
                $(($hexsz + 10)) $head >input &&
-
-       # The current implementation of path-walk causes a different
-       # error message. This will be changed by a future refactoring.
-       GIT_TEST_PACK_PATH_WALK=0 &&
-       export GIT_TEST_PACK_PATH_WALK &&
-
        test_must_fail git upload-pack . <input >/dev/null 2>output.err &&
        test_grep "unable to read" output.err &&
        test_grep "pack-objects died" output.err