]> git.ipfire.org Git - thirdparty/git.git/commitdiff
bloom: fix logic in get_bloom_filter()
authorDerrick Stolee <dstolee@microsoft.com>
Wed, 1 Jul 2020 13:27:23 +0000 (13:27 +0000)
committerJunio C Hamano <gitster@pobox.com>
Wed, 1 Jul 2020 21:17:43 +0000 (14:17 -0700)
The get_bloom_filter() method is a bit complicated in some parts where
it does not need to be. In particular, it needs to return a NULL filter
only when compute_if_not_present is zero AND the filter data cannot be
loaded from a commit-graph file. This currently happens by accident
because the commit-graph does not load changed-path Bloom filters from
an existing commit-graph when writing a new one. This will change in a
later patch.

Also clean up some style issues while we are here.

One side-effect of returning a NULL filter is that the filters that are
reported as "too large" will now be reported as NULL insead of length
zero. This case was not properly covered before, so add a test. Further,
remote the counting of the zero-length filters from revision.c and the
trace2 logs.

Helped-by: René Scharfe <l.s.r@web.de>
Helped-by: SZEDER Gábor <szeder.dev@gmail.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
bloom.c
commit-graph.c
revision.c
t/t4216-log-bloom.sh

diff --git a/bloom.c b/bloom.c
index c38d1cff0c6c62f9de0c4edbafcc6bbdc3de424e..2af5389795cc6e3d68b50538259611b53bd7fd3a 100644 (file)
--- a/bloom.c
+++ b/bloom.c
@@ -186,7 +186,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
        struct diff_options diffopt;
        int max_changes = 512;
 
-       if (bloom_filters.slab_size == 0)
+       if (!bloom_filters.slab_size)
                return NULL;
 
        filter = bloom_filter_slab_at(&bloom_filters, c);
@@ -194,16 +194,14 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
        if (!filter->data) {
                load_commit_graph_info(r, c);
                if (c->graph_pos != COMMIT_NOT_FROM_GRAPH &&
-                       r->objects->commit_graph->chunk_bloom_indexes) {
-                       if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
-                               return filter;
-                       else
-                               return NULL;
-               }
+                   r->objects->commit_graph->chunk_bloom_indexes)
+                       load_bloom_filter_from_graph(r->objects->commit_graph, filter, c);
        }
 
-       if (filter->data || !compute_if_not_present)
+       if (filter->data)
                return filter;
+       if (!compute_if_not_present)
+               return NULL;
 
        repo_diff_setup(r, &diffopt);
        diffopt.flags.recursive = 1;
index 6a28d4a5a6df24d2508d9ca9182ab628346295f6..50ce039a53ee6c4200a3019af21dd3c287df5324 100644 (file)
@@ -1098,7 +1098,8 @@ static void write_graph_chunk_bloom_indexes(struct hashfile *f,
 
        while (list < last) {
                struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
-               cur_pos += filter->len;
+               size_t len = filter ? filter->len : 0;
+               cur_pos += len;
                display_progress(progress, ++i);
                hashwrite_be32(f, cur_pos);
                list++;
@@ -1126,8 +1127,11 @@ static void write_graph_chunk_bloom_data(struct hashfile *f,
 
        while (list < last) {
                struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
+               size_t len = filter ? filter->len : 0;
                display_progress(progress, ++i);
-               hashwrite(f, filter->data, filter->len * sizeof(unsigned char));
+
+               if (len)
+                       hashwrite(f, filter->data, len * sizeof(unsigned char));
                list++;
        }
 
index c644c660917a55fc5962535bc4e9f160a058becf..7339750af12a8f49d505d968292dc039dab3cb0f 100644 (file)
@@ -633,7 +633,6 @@ static unsigned int count_bloom_filter_maybe;
 static unsigned int count_bloom_filter_definitely_not;
 static unsigned int count_bloom_filter_false_positive;
 static unsigned int count_bloom_filter_not_present;
-static unsigned int count_bloom_filter_length_zero;
 
 static void trace2_bloom_filter_statistics_atexit(void)
 {
@@ -641,7 +640,6 @@ static void trace2_bloom_filter_statistics_atexit(void)
 
        jw_object_begin(&jw, 0);
        jw_object_intmax(&jw, "filter_not_present", count_bloom_filter_not_present);
-       jw_object_intmax(&jw, "zero_length_filter", count_bloom_filter_length_zero);
        jw_object_intmax(&jw, "maybe", count_bloom_filter_maybe);
        jw_object_intmax(&jw, "definitely_not", count_bloom_filter_definitely_not);
        jw_object_intmax(&jw, "false_positive", count_bloom_filter_false_positive);
@@ -735,11 +733,6 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
                return -1;
        }
 
-       if (!filter->len) {
-               count_bloom_filter_length_zero++;
-               return -1;
-       }
-
        result = bloom_filter_contains(filter,
                                       revs->bloom_key,
                                       revs->bloom_filter_settings);
index c7011f33e2c1c0d9ad80a6e2e2185f901dbe862d..2761208e7494df41a858a792cd7a6924cb7f01f2 100755 (executable)
@@ -60,7 +60,7 @@ setup () {
 
 test_bloom_filters_used () {
        log_args=$1
-       bloom_trace_prefix="statistics:{\"filter_not_present\":0,\"zero_length_filter\":0,\"maybe\""
+       bloom_trace_prefix="statistics:{\"filter_not_present\":0,\"maybe\""
        setup "$log_args" &&
        grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
        test_cmp log_wo_bloom log_w_bloom &&
@@ -142,7 +142,7 @@ test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
 
 test_bloom_filters_used_when_some_filters_are_missing () {
        log_args=$1
-       bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":8,\"definitely_not\":6"
+       bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"maybe\":8,\"definitely_not\":6"
        setup "$log_args" &&
        grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
        test_cmp log_wo_bloom log_w_bloom
@@ -152,4 +152,24 @@ test_expect_success 'Use Bloom filters if they exist in the latest but not all c
        test_bloom_filters_used_when_some_filters_are_missing "-- A/B"
 '
 
+test_expect_success 'correctly report changes over limit' '
+       git init 513changes &&
+       (
+               cd 513changes &&
+               for i in $(test_seq 1 513)
+               do
+                       echo $i >file$i.txt || return 1
+               done &&
+               git add . &&
+               git commit -m "files" &&
+               git commit-graph write --reachable --changed-paths &&
+               for i in $(test_seq 1 513)
+               do
+                       git -c core.commitGraph=false log -- file$i.txt >expect &&
+                       git log -- file$i.txt >actual &&
+                       test_cmp expect actual || return 1
+               done
+       )
+'
+
 test_done
\ No newline at end of file