]> git.ipfire.org Git - thirdparty/git.git/commitdiff
diff: halt tree-diff early after max_changes
authorDerrick Stolee <dstolee@microsoft.com>
Mon, 30 Mar 2020 00:31:27 +0000 (00:31 +0000)
committerJunio C Hamano <gitster@pobox.com>
Mon, 30 Mar 2020 16:59:53 +0000 (09:59 -0700)
When computing the changed-paths bloom filters for the commit-graph,
we limit the size of the filter by restricting the number of paths
in the diff. Instead of computing a large diff and then ignoring the
result, it is better to halt the diff computation early.

Create a new "max_changes" option in struct diff_options. If non-zero,
then halt the diff computation after discovering strictly more changed
paths. This includes paths corresponding to trees that change.

Use this max_changes option in the bloom filter calculations. This
reduces the time taken to compute the filters for the Linux kernel
repo from 2m50s to 2m35s. On a large internal repository with ~500
commits that perform tree-wide changes, the time reduced from
6m15s to 3m48s.

Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Garima Singh <garima.singh@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
bloom.c
diff.h
tree-diff.c

diff --git a/bloom.c b/bloom.c
index 881a9841ede14bd2b05d04722d418f227a3a10bd..a16eee92331183cb18232037c7d659646008b4ca 100644 (file)
--- a/bloom.c
+++ b/bloom.c
@@ -133,6 +133,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
        struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
        int i;
        struct diff_options diffopt;
+       int max_changes = 512;
 
        if (bloom_filters.slab_size == 0)
                return NULL;
@@ -141,6 +142,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
 
        repo_diff_setup(r, &diffopt);
        diffopt.flags.recursive = 1;
+       diffopt.max_changes = max_changes;
        diff_setup_done(&diffopt);
 
        if (c->parents)
@@ -149,7 +151,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
                diff_tree_oid(NULL, &c->object.oid, "", &diffopt);
        diffcore_std(&diffopt);
 
-       if (diff_queued_diff.nr <= 512) {
+       if (diff_queued_diff.nr <= max_changes) {
                struct hashmap pathmap;
                struct pathmap_hash_entry *e;
                struct hashmap_iter iter;
diff --git a/diff.h b/diff.h
index 6febe7e3656ae4da3923d9485bb9dee10c1d5acd..9443dc1b0039026ba6c32efd3e445fa61a0860f5 100644 (file)
--- a/diff.h
+++ b/diff.h
@@ -285,6 +285,11 @@ struct diff_options {
        /* Number of hexdigits to abbreviate raw format output to. */
        int abbrev;
 
+       /* If non-zero, then stop computing after this many changes. */
+       int max_changes;
+       /* For internal use only. */
+       int num_changes;
+
        int ita_invisible_in_index;
 /* white-space error highlighting */
 #define WSEH_NEW (1<<12)
index 33ded7f8b3e71069f7a75bf7524443f3d81b7532..f3d303c6e541acd4ab715c0a5a329abdc24e7819 100644 (file)
@@ -434,6 +434,9 @@ static struct combine_diff_path *ll_diff_tree_paths(
                if (diff_can_quit_early(opt))
                        break;
 
+               if (opt->max_changes && opt->num_changes > opt->max_changes)
+                       break;
+
                if (opt->pathspec.nr) {
                        skip_uninteresting(&t, base, opt);
                        for (i = 0; i < nparent; i++)
@@ -518,6 +521,7 @@ static struct combine_diff_path *ll_diff_tree_paths(
 
                        /* t↓ */
                        update_tree_entry(&t);
+                       opt->num_changes++;
                }
 
                /* t > p[imin] */
@@ -535,6 +539,7 @@ static struct combine_diff_path *ll_diff_tree_paths(
                skip_emit_tp:
                        /* ∀ pi=p[imin]  pi↓ */
                        update_tp_entries(tp, nparent);
+                       opt->num_changes++;
                }
        }
 
@@ -552,6 +557,7 @@ struct combine_diff_path *diff_tree_paths(
        const struct object_id **parents_oid, int nparent,
        struct strbuf *base, struct diff_options *opt)
 {
+       opt->num_changes = 0;
        p = ll_diff_tree_paths(p, oid, parents_oid, nparent, base, opt);
 
        /*