From: Derrick Stolee Date: Fri, 16 May 2025 18:12:00 +0000 (+0000) Subject: pack-objects: refactor path-walk delta phase X-Git-Tag: v2.51.0-rc0~141^2~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=206a1bb203613b6550f8acfdf300f822bfa3c985;p=thirdparty%2Fgit.git pack-objects: refactor path-walk delta phase Previously, the --path-walk option to 'git pack-objects' would compute deltas inline with the path-walk logic. This would make the progress indicator look like it is taking a long time to enumerate objects, and then very quickly computed deltas. Instead of computing deltas on each region of objects organized by tree, store a list of regions corresponding to these groups. These can later be pulled from the list for delta compression before doing the "global" delta search. This presents a new progress indicator that can be used in tests to verify that this stage is happening. The current implementation is not integrated with threads, but we are setting it up to arrive in the next change. Since we do not attempt to sort objects by size until after exploring all trees, we can remove the previous change to t5530 due to a different error message appearing first. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano --- diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index bdd20c074a..c7bf3fbc02 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -3237,6 +3237,51 @@ static int should_attempt_deltas(struct object_entry *entry) return 1; } +static void find_deltas_for_region(struct object_entry *list, + struct packing_region *region, + unsigned int *processed) +{ + struct object_entry **delta_list; + unsigned int delta_list_nr = 0; + + ALLOC_ARRAY(delta_list, region->nr); + for (size_t i = 0; i < region->nr; i++) { + struct object_entry *entry = list + region->start + i; + if (should_attempt_deltas(entry)) + delta_list[delta_list_nr++] = entry; + } + + QSORT(delta_list, delta_list_nr, type_size_sort); + find_deltas(delta_list, &delta_list_nr, window, depth, processed); + free(delta_list); +} + +static void find_deltas_by_region(struct object_entry *list, + struct packing_region *regions, + size_t start, size_t nr) +{ + unsigned int processed = 0; + size_t progress_nr; + + if (!nr) + return; + + progress_nr = regions[nr - 1].start + regions[nr - 1].nr; + + if (progress) + progress_state = start_progress(the_repository, + _("Compressing objects by path"), + progress_nr); + + while (nr--) + find_deltas_for_region(list, + ®ions[start++], + &processed); + + display_progress(progress_state, progress_nr); + stop_progress(&progress_state); +} + static void prepare_pack(int window, int depth) { struct object_entry **delta_list; @@ -3261,6 +3306,10 @@ static void prepare_pack(int window, int depth) if (!to_pack.nr_objects || !window || !depth) return; + if (path_walk) + find_deltas_by_region(to_pack.objects, to_pack.regions, + 0, to_pack.nr_regions); + ALLOC_ARRAY(delta_list, to_pack.nr_objects); nr_deltas = n = 0; @@ -4214,10 +4263,8 @@ static int add_objects_by_path(const char *path, enum object_type type, void *data) { - struct object_entry **delta_list = NULL; size_t oe_start = to_pack.nr_objects; size_t oe_end; - unsigned int sub_list_nr; unsigned int *processed = data; /* @@ -4250,33 +4297,17 @@ static int add_objects_by_path(const char *path, if (oe_end == oe_start || !window) return 0; - sub_list_nr = 0; - if (oe_end > oe_start) - ALLOC_ARRAY(delta_list, oe_end - oe_start); + ALLOC_GROW(to_pack.regions, + to_pack.nr_regions + 1, + to_pack.nr_regions_alloc); - for (size_t i = 0; i < oe_end - oe_start; i++) { - struct object_entry *entry = to_pack.objects + oe_start + i; + to_pack.regions[to_pack.nr_regions].start = oe_start; + to_pack.regions[to_pack.nr_regions].nr = oe_end - oe_start; + to_pack.nr_regions++; - if (!should_attempt_deltas(entry)) - continue; + *processed += oids->nr; + display_progress(progress_state, *processed); - delta_list[sub_list_nr++] = entry; - } - - /* - * Find delta bases among this list of objects that all match the same - * path. This causes the delta compression to be interleaved in the - * object walk, which can lead to confusing progress indicators. This is - * also incompatible with threaded delta calculations. In the future, - * consider creating a list of regions in the full to_pack.objects array - * that could be picked up by the threaded delta computation. - */ - if (sub_list_nr && window) { - QSORT(delta_list, sub_list_nr, type_size_sort); - find_deltas(delta_list, &sub_list_nr, window, depth, processed); - } - - free(delta_list); return 0; } diff --git a/pack-objects.h b/pack-objects.h index d73e3843c9..51e1ff6b95 100644 --- a/pack-objects.h +++ b/pack-objects.h @@ -119,11 +119,23 @@ struct object_entry { unsigned ext_base:1; /* delta_idx points outside packlist */ }; +/** + * A packing region is a section of the packing_data.objects array + * as given by a starting index and a number of elements. + */ +struct packing_region { + size_t start; + size_t nr; +}; + struct packing_data { struct repository *repo; struct object_entry *objects; uint32_t nr_objects, nr_alloc; + struct packing_region *regions; + size_t nr_regions, nr_regions_alloc; + int32_t *index; uint32_t index_size; diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index 16420d1286..c8df6afd78 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -725,7 +725,9 @@ test_expect_success '--name-hash-version=2 and --write-bitmap-index are incompat test_expect_success '--path-walk pack everything' ' git -C server rev-parse HEAD >in && - git -C server pack-objects --stdout --revs --path-walk out.pack && + GIT_PROGRESS_DELAY=0 git -C server pack-objects \ + --stdout --revs --path-walk --progress out.pack 2>err && + grep "Compressing objects by path" err && git -C server index-pack --stdin out.pack && + GIT_PROGRESS_DELAY=0 git -C server pack-objects \ + --thin --stdout --revs --path-walk --progress out.pack 2>err && + grep "Compressing objects by path" err && git -C server index-pack --fix-thin --stdin input && - - # The current implementation of path-walk causes a different - # error message. This will be changed by a future refactoring. - GIT_TEST_PACK_PATH_WALK=0 && - export GIT_TEST_PACK_PATH_WALK && - test_must_fail git upload-pack . /dev/null 2>output.err && test_grep "unable to read" output.err && test_grep "pack-objects died" output.err