]> git.ipfire.org Git - thirdparty/git.git/commitdiff
revision.c: consult Bloom filters for root commits
authorTaylor Blau <me@ttaylorr.com>
Tue, 25 Jun 2024 17:39:13 +0000 (13:39 -0400)
committerJunio C Hamano <gitster@pobox.com>
Tue, 25 Jun 2024 20:52:05 +0000 (13:52 -0700)
The commit-graph stores changed-path Bloom filters which represent the
set of paths included in a tree-level diff between a commit's root tree
and that of its parent.

When a commit has no parents, the tree-diff is computed against that
commit's root tree and the empty tree. In other words, every path in
that commit's tree is stored in the Bloom filter (since they all appear
in the diff).

Consult these filters during pathspec-limited traversals in the function
`rev_same_tree_as_empty()`. Doing so yields a performance improvement
where we can avoid enumerating the full set of paths in a parentless
commit's root tree when we know that the path(s) of interest were not
listed in that commit's changed-path Bloom filter.

Suggested-by: SZEDER Gábor <szeder.dev@gmail.com>
Original-patch-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
revision.c
t/t4216-log-bloom.sh

index 2424c9bd674e534909df89e25c21b5eb119fda05..0e6f7d02b64f1e18b31adc4973c207f8c4731ce0 100644 (file)
@@ -833,17 +833,28 @@ static int rev_compare_tree(struct rev_info *revs,
        return tree_difference;
 }
 
-static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
+static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit,
+                                 int nth_parent)
 {
        struct tree *t1 = repo_get_commit_tree(the_repository, commit);
+       int bloom_ret = -1;
 
        if (!t1)
                return 0;
 
+       if (!nth_parent && revs->bloom_keys_nr) {
+               bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
+               if (!bloom_ret)
+                       return 1;
+       }
+
        tree_difference = REV_TREE_SAME;
        revs->pruning.flags.has_changes = 0;
        diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
 
+       if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
+               count_bloom_filter_false_positive++;
+
        return tree_difference == REV_TREE_SAME;
 }
 
@@ -881,7 +892,7 @@ static int compact_treesame(struct rev_info *revs, struct commit *commit, unsign
                if (nth_parent != 0)
                        die("compact_treesame %u", nth_parent);
                old_same = !!(commit->object.flags & TREESAME);
-               if (rev_same_tree_as_empty(revs, commit))
+               if (rev_same_tree_as_empty(revs, commit, nth_parent))
                        commit->object.flags |= TREESAME;
                else
                        commit->object.flags &= ~TREESAME;
@@ -977,7 +988,14 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
                return;
 
        if (!commit->parents) {
-               if (rev_same_tree_as_empty(revs, commit))
+               /*
+                * Pretend as if we are comparing ourselves to the
+                * (non-existent) first parent of this commit object. Even
+                * though no such parent exists, its changed-path Bloom filter
+                * (if one exists) is relative to the empty tree, using Bloom
+                * filters is allowed here.
+                */
+               if (rev_same_tree_as_empty(revs, commit, 0))
                        commit->object.flags |= TREESAME;
                return;
        }
@@ -1058,7 +1076,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
 
                case REV_TREE_NEW:
                        if (revs->remove_empty_trees &&
-                           rev_same_tree_as_empty(revs, p)) {
+                           rev_same_tree_as_empty(revs, p, nth_parent)) {
                                /* We are adding all the specified
                                 * paths from this parent, so the
                                 * history beyond this parent is not
index b7baf49d6221958b353671612c209a939b08813b..cc6ebc8140e2e0b91126dff1704f582b7d7ee465 100755 (executable)
@@ -88,7 +88,11 @@ test_bloom_filters_not_used () {
                # if the Bloom filter system is initialized, ensure that no
                # filters were used
                data="statistics:{"
-               data="$data\"filter_not_present\":0,"
+               # unusable filters (e.g., those computed with a
+               # different value of commitGraph.changedPathsVersion)
+               # are counted in the filter_not_present bucket, so any
+               # value is OK there.
+               data="$data\"filter_not_present\":[0-9][0-9]*,"
                data="$data\"maybe\":0,"
                data="$data\"definitely_not\":0,"
                data="$data\"false_positive\":0}"
@@ -175,7 +179,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
 
 test_bloom_filters_used_when_some_filters_are_missing () {
        log_args=$1
-       bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":9"
+       bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":6,\"definitely_not\":10"
        setup "$log_args" &&
        grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
        test_cmp log_wo_bloom log_w_bloom