]> git.ipfire.org Git - thirdparty/git.git/commitdiff
builtin/commit-graph.c: introduce '--max-new-filters=<n>'
authorTaylor Blau <me@ttaylorr.com>
Fri, 18 Sep 2020 13:27:27 +0000 (09:27 -0400)
committerJunio C Hamano <gitster@pobox.com>
Fri, 18 Sep 2020 17:35:39 +0000 (10:35 -0700)
Introduce a command-line flag to specify the maximum number of new Bloom
filters that a 'git commit-graph write' is willing to compute from
scratch.

Prior to this patch, a commit-graph write with '--changed-paths' would
compute Bloom filters for all selected commits which haven't already
been computed (i.e., by a previous commit-graph write with '--split'
such that a roll-up or replacement is performed).

This behavior can cause prohibitively-long commit-graph writes for a
variety of reasons:

  * There may be lots of filters whose diffs take a long time to
    generate (for example, they have close to the maximum number of
    changes, diffing itself takes a long time, etc).

  * Old-style commit-graphs (which encode filters with too many entries
    as not having been computed at all) cause us to waste time
    recomputing filters that appear to have not been computed only to
    discover that they are too-large.

This can make the upper-bound of the time it takes for 'git commit-graph
write --changed-paths' to be rather unpredictable.

To make this command behave more predictably, introduce
'--max-new-filters=<n>' to allow computing at most '<n>' Bloom filters
from scratch. This lets "computing" already-known filters proceed
quickly, while bounding the number of slow tasks that Git is willing to
do.

Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Documentation/git-commit-graph.txt
bloom.c
builtin/commit-graph.c
commit-graph.c
commit-graph.h
t/t4216-log-bloom.sh

index 17405c73a98e8a80bf0b45b2a730aee2693f8614..8c758557828a385d070ea11a5c994f0f1fecaa9c 100644 (file)
@@ -67,6 +67,12 @@ this option is given, future commit-graph writes will automatically assume
 that this option was intended. Use `--no-changed-paths` to stop storing this
 data.
 +
+With the `--max-new-filters=<n>` option, generate at most `n` new Bloom
+filters (if `--changed-paths` is specified). If `n` is `-1`, no limit is
+enforced. Only commits present in the new layer count against this
+limit. To retroactively compute Bloom filters over earlier layers, it is
+advised to use `--split=replace`.
++
 With the `--split[=<strategy>]` option, write the commit-graph as a
 chain of multiple commit-graph files stored in
 `<dir>/info/commit-graphs`. Commit-graph layers are merged based on the
diff --git a/bloom.c b/bloom.c
index d234551ce0928f956be732541dac2888da76d3b0..68c73200a54aa4fdad582d6031c59313c2c90d73 100644 (file)
--- a/bloom.c
+++ b/bloom.c
@@ -204,12 +204,11 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
 
        if (!filter->data) {
                load_commit_graph_info(r, c);
-               if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH &&
-                       load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
-                               return filter;
+               if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH)
+                       load_bloom_filter_from_graph(r->objects->commit_graph, filter, c);
        }
 
-       if (filter->data)
+       if (filter->data && filter->len)
                return filter;
        if (!compute_if_not_present)
                return NULL;
index f3243bd9825bfe0effa65e3ea0e26269b4be727a..5df9b2ef800d2b9683b32ca95a8d4b0cbe843411 100644 (file)
@@ -13,7 +13,8 @@ static char const * const builtin_commit_graph_usage[] = {
        N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
        N_("git commit-graph write [--object-dir <objdir>] [--append] "
           "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
-          "[--changed-paths] [--[no-]progress] <split options>"),
+          "[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
+          "<split options>"),
        NULL
 };
 
@@ -25,7 +26,8 @@ static const char * const builtin_commit_graph_verify_usage[] = {
 static const char * const builtin_commit_graph_write_usage[] = {
        N_("git commit-graph write [--object-dir <objdir>] [--append] "
           "[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
-          "[--changed-paths] [--[no-]progress] <split options>"),
+          "[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
+          "<split options>"),
        NULL
 };
 
@@ -162,6 +164,23 @@ static int read_one_commit(struct oidset *commits, struct progress *progress,
        return 0;
 }
 
+static int write_option_max_new_filters(const struct option *opt,
+                                       const char *arg,
+                                       int unset)
+{
+       int *to = opt->value;
+       if (unset)
+               *to = -1;
+       else {
+               const char *s;
+               *to = strtol(arg, (char **)&s, 10);
+               if (*s)
+                       return error(_("%s expects a numerical value"),
+                                    optname(opt, opt->flags));
+       }
+       return 0;
+}
+
 static int graph_write(int argc, const char **argv)
 {
        struct string_list pack_indexes = STRING_LIST_INIT_NODUP;
@@ -197,6 +216,9 @@ static int graph_write(int argc, const char **argv)
                        N_("maximum ratio between two levels of a split commit-graph")),
                OPT_EXPIRY_DATE(0, "expire-time", &write_opts.expire_time,
                        N_("only expire files older than a given date-time")),
+               OPT_CALLBACK_F(0, "max-new-filters", &write_opts.max_new_filters,
+                       NULL, N_("maximum number of changed-path Bloom filters to compute"),
+                       0, write_option_max_new_filters),
                OPT_END(),
        };
 
@@ -205,6 +227,7 @@ static int graph_write(int argc, const char **argv)
        write_opts.size_multiple = 2;
        write_opts.max_commits = 0;
        write_opts.expire_time = 0;
+       write_opts.max_new_filters = -1;
 
        trace2_cmd_mode("write");
 
index 1a53a03f5d472f34cd5a1bf43deeb61c4c394eb3..1aedc1c4df45299445cf03f1f45383a86051c57d 100644 (file)
@@ -1408,6 +1408,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
        int i;
        struct progress *progress = NULL;
        struct commit **sorted_commits;
+       int max_new_filters;
 
        init_bloom_filters();
 
@@ -1424,13 +1425,16 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
        else
                QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp);
 
+       max_new_filters = ctx->opts && ctx->opts->max_new_filters >= 0 ?
+               ctx->opts->max_new_filters : ctx->commits.nr;
+
        for (i = 0; i < ctx->commits.nr; i++) {
                enum bloom_filter_computed computed = 0;
                struct commit *c = sorted_commits[i];
                struct bloom_filter *filter = get_or_compute_bloom_filter(
                        ctx->r,
                        c,
-                       1,
+                       ctx->count_bloom_filter_computed < max_new_filters,
                        ctx->bloom_settings,
                        &computed);
                if (computed & BLOOM_COMPUTED) {
@@ -1441,7 +1445,8 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
                                ctx->count_bloom_filter_trunc_large++;
                } else if (computed & BLOOM_NOT_COMPUTED)
                        ctx->count_bloom_filter_not_computed++;
-               ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
+               ctx->total_bloom_filter_data_size += filter
+                       ? sizeof(unsigned char) * filter->len : 0;
                display_progress(progress, i + 1);
        }
 
index b7914b0a7aa881eeb638a7a09111b1b11a99e238..a22bd86701e74ba0f98b002ca18e14a101539a67 100644 (file)
@@ -110,6 +110,7 @@ struct commit_graph_opts {
        int max_commits;
        timestamp_t expire_time;
        enum commit_graph_split_flags split_flags;
+       int max_new_filters;
 };
 
 /*
index ec9845c9bee530fd87b4310a1c2e1ec1ac406f41..48ab1b16a6dec761e9e428d7e27bd004bb0e7405 100755 (executable)
@@ -305,4 +305,74 @@ test_expect_success 'correctly report commits with no changed paths' '
        )
 '
 
+test_expect_success 'Bloom generation is limited by --max-new-filters' '
+       (
+               cd limits &&
+               test_commit c2 filter &&
+               test_commit c3 filter &&
+               test_commit c4 no-filter &&
+
+               rm -f trace.event &&
+               GIT_TRACE2_EVENT="$(pwd)/trace.event" \
+                       git commit-graph write --reachable --split=replace \
+                               --changed-paths --max-new-filters=2 &&
+
+               test_filter_computed 2 trace.event &&
+               test_filter_not_computed 3 trace.event &&
+               test_filter_trunc_empty 0 trace.event &&
+               test_filter_trunc_large 0 trace.event
+       )
+'
+
+test_expect_success 'Bloom generation backfills previously-skipped filters' '
+       (
+               cd limits &&
+
+               rm -f trace.event &&
+               GIT_TRACE2_EVENT="$(pwd)/trace.event" \
+                       git commit-graph write --reachable --changed-paths \
+                               --split=replace --max-new-filters=1 &&
+               test_filter_computed 1 trace.event &&
+               test_filter_not_computed 4 trace.event &&
+               test_filter_trunc_empty 0 trace.event &&
+               test_filter_trunc_large 0 trace.event
+       )
+'
+
+test_expect_success 'Bloom generation backfills empty commits' '
+       git init empty &&
+       test_when_finished "rm -fr empty" &&
+       (
+               cd empty &&
+               for i in $(test_seq 1 6)
+               do
+                       git commit --allow-empty -m "$i"
+               done &&
+
+               # Generate Bloom filters for empty commits 1-6, two at a time.
+               for i in $(test_seq 1 3)
+               do
+                       rm -f trace.event &&
+                       GIT_TRACE2_EVENT="$(pwd)/trace.event" \
+                               git commit-graph write --reachable \
+                                       --changed-paths --max-new-filters=2 &&
+                       test_filter_computed 2 trace.event &&
+                       test_filter_not_computed 4 trace.event &&
+                       test_filter_trunc_empty 2 trace.event &&
+                       test_filter_trunc_large 0 trace.event
+               done &&
+
+               # Finally, make sure that once all commits have filters, that
+               # none are subsequently recomputed.
+               rm -f trace.event &&
+               GIT_TRACE2_EVENT="$(pwd)/trace.event" \
+                       git commit-graph write --reachable \
+                               --changed-paths --max-new-filters=2 &&
+               test_filter_computed 0 trace.event &&
+               test_filter_not_computed 6 trace.event &&
+               test_filter_trunc_empty 0 trace.event &&
+               test_filter_trunc_large 0 trace.event
+       )
+'
+
 test_done