]> git.ipfire.org Git - thirdparty/git.git/commitdiff
commit-graph: introduce 'get_bloom_filter_settings()'
authorTaylor Blau <me@ttaylorr.com>
Wed, 9 Sep 2020 15:22:44 +0000 (11:22 -0400)
committerJunio C Hamano <gitster@pobox.com>
Wed, 9 Sep 2020 19:51:48 +0000 (12:51 -0700)
Many places in the code often need a pointer to the commit-graph's
'struct bloom_filter_settings', in which case they often take the value
from the top-most commit-graph.

In the non-split case, this works as expected. In the split case,
however, things get a little tricky. Not all layers in a chain of
incremental commit-graphs are required to themselves have Bloom data,
and so whether or not some part of the code uses Bloom filters depends
entirely on whether or not the top-most level of the commit-graph chain
has Bloom filters.

This has been the behavior since Bloom filters were introduced, and has
been codified into the tests since a759bfa9ee (t4216: add end to end
tests for git log with Bloom filters, 2020-04-06). In fact, t4216.130
requires that Bloom filters are not used in exactly the case described
earlier.

There is no reason that this needs to be the case, since it is perfectly
valid for commits in an earlier layer to have Bloom filters when commits
in a newer layer do not.

Since Bloom settings are guaranteed in practice to be the same for any
layer in a chain that has Bloom data, it is sufficient to traverse the
'->base_graph' pointer until either (1) a non-null 'struct
bloom_filter_settings *' is found, or (2) until we are at the root of
the commit-graph chain.

Introduce a 'get_bloom_filter_settings()' function that does just this,
and use it instead of purely dereferencing the top-most graph's
'->bloom_filter_settings' pointer.

While we're at it, add an additional test in t5324 to guard against code
in the commit-graph writing machinery that doesn't correctly handle a
NULL 'struct bloom_filter *'.

Co-authored-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
blame.c
bloom.c
commit-graph.c
commit-graph.h
revision.c
t/t4216-log-bloom.sh
t/t5324-split-commit-graph.sh

diff --git a/blame.c b/blame.c
index 82fa16d6585b90e3635d87f6adc1e4856524f7d6..3e5f8787bc446af4d13c528a260254e50b5a8a8f 100644 (file)
--- a/blame.c
+++ b/blame.c
@@ -2891,16 +2891,18 @@ void setup_blame_bloom_data(struct blame_scoreboard *sb,
                            const char *path)
 {
        struct blame_bloom_data *bd;
+       struct bloom_filter_settings *bs;
 
        if (!sb->repo->objects->commit_graph)
                return;
 
-       if (!sb->repo->objects->commit_graph->bloom_filter_settings)
+       bs = get_bloom_filter_settings(sb->repo);
+       if (!bs)
                return;
 
        bd = xmalloc(sizeof(struct blame_bloom_data));
 
-       bd->settings = sb->repo->objects->commit_graph->bloom_filter_settings;
+       bd->settings = bs;
 
        bd->alloc = 4;
        bd->nr = 0;
diff --git a/bloom.c b/bloom.c
index 1a573226e70e571b49a7af29d49740768b1465af..cd9380ac623d4a01ec04ebd2110747e756c70df4 100644 (file)
--- a/bloom.c
+++ b/bloom.c
@@ -38,7 +38,7 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
        while (graph_pos < g->num_commits_in_base)
                g = g->base_graph;
 
-       /* The commit graph commit 'c' lives in doesn't carry bloom filters. */
+       /* The commit graph commit 'c' lives in doesn't carry Bloom filters. */
        if (!g->chunk_bloom_indexes)
                return 0;
 
@@ -195,8 +195,8 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
        if (!filter->data) {
                load_commit_graph_info(r, c);
                if (commit_graph_position(c) != COMMIT_NOT_FROM_GRAPH &&
-                       r->objects->commit_graph->chunk_bloom_indexes)
-                       load_bloom_filter_from_graph(r->objects->commit_graph, filter, c);
+                       load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
+                               return filter;
        }
 
        if (filter->data)
index e51c91dd5b0af4e4946091e5ca7f06707b3a0c5b..d4b06811bec068f0656df7c5e279a222203e0fb8 100644 (file)
@@ -660,6 +660,17 @@ int generation_numbers_enabled(struct repository *r)
        return !!first_generation;
 }
 
+struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r)
+{
+       struct commit_graph *g = r->objects->commit_graph;
+       while (g) {
+               if (g->bloom_filter_settings)
+                       return g->bloom_filter_settings;
+               g = g->base_graph;
+       }
+       return NULL;
+}
+
 static void close_commit_graph_one(struct commit_graph *g)
 {
        if (!g)
index 09a97030dcd8a2ea570da097d6f27a5d0cbaf934..0677dd1031a9d4bf9b8534b03392d9fc38140725 100644 (file)
@@ -87,6 +87,8 @@ struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size);
  */
 int generation_numbers_enabled(struct repository *r);
 
+struct bloom_filter_settings *get_bloom_filter_settings(struct repository *r);
+
 enum commit_graph_write_flags {
        COMMIT_GRAPH_WRITE_APPEND     = (1 << 0),
        COMMIT_GRAPH_WRITE_PROGRESS   = (1 << 1),
index 6de29cdf7a1cbb6138ba0e9bbdd1216085674e76..c45ed1076edd59f508787f8c2dac76d40f43c566 100644 (file)
@@ -681,10 +681,7 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
 
        repo_parse_commit(revs->repo, revs->commits->item);
 
-       if (!revs->repo->objects->commit_graph)
-               return;
-
-       revs->bloom_filter_settings = revs->repo->objects->commit_graph->bloom_filter_settings;
+       revs->bloom_filter_settings = get_bloom_filter_settings(revs->repo);
        if (!revs->bloom_filter_settings)
                return;
 
index c21cc160f3bb9dac91ce222e4fda5216927c1859..c9f9bdf1ba199b100cb505ac95abeae6f9c49a03 100755 (executable)
@@ -60,7 +60,7 @@ setup () {
 
 test_bloom_filters_used () {
        log_args=$1
-       bloom_trace_prefix="statistics:{\"filter_not_present\":0,\"maybe\""
+       bloom_trace_prefix="statistics:{\"filter_not_present\":${2:-0},\"maybe\""
        setup "$log_args" &&
        grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
        test_cmp log_wo_bloom log_w_bloom &&
@@ -134,8 +134,11 @@ test_expect_success 'setup - add commit-graph to the chain without Bloom filters
        test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain
 '
 
-test_expect_success 'Do not use Bloom filters if the latest graph does not have Bloom filters.' '
-       test_bloom_filters_not_used "-- A/B"
+test_expect_success 'use Bloom filters even if the latest graph does not have Bloom filters' '
+       # Ensure that the number of empty filters is equal to the number of
+       # filters in the latest graph layer to prove that they are loaded (and
+       # ignored).
+       test_bloom_filters_used "-- A/B" 3
 '
 
 test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
index 9b850ea9070f8081b1709b2646a73535e77b15d2..5bdfd53ef954606d439574e2dd456763001dc82e 100755 (executable)
@@ -425,4 +425,17 @@ done <<\EOF
 0600 -r--------
 EOF
 
+test_expect_success '--split=replace with partial Bloom data' '
+       rm -rf $graphdir $infodir/commit-graph &&
+       git reset --hard commits/3 &&
+       git rev-list -1 HEAD~2 >a &&
+       git rev-list -1 HEAD~1 >b &&
+       git commit-graph write --split=no-merge --stdin-commits --changed-paths <a &&
+       git commit-graph write --split=no-merge --stdin-commits <b &&
+       git commit-graph write --split=replace --stdin-commits --changed-paths <c &&
+       ls $graphdir/graph-*.graph >graph-files &&
+       test_line_count = 1 graph-files &&
+       verify_chain_files_exist $graphdir
+'
+
 test_done