]> git.ipfire.org Git - thirdparty/git.git/commitdiff
bloom: split 'get_bloom_filter()' in two
authorTaylor Blau <me@ttaylorr.com>
Wed, 16 Sep 2020 18:07:32 +0000 (14:07 -0400)
committerJunio C Hamano <gitster@pobox.com>
Thu, 17 Sep 2020 16:31:25 +0000 (09:31 -0700)
'get_bloom_filter' takes a flag to control whether it will compute a
Bloom filter if the requested one is missing. In the next patch, we'll
add yet another parameter to this method, which would force all but one
caller to specify an extra 'NULL' parameter at the end.

Instead of doing this, split 'get_bloom_filter' into two functions:
'get_bloom_filter' and 'get_or_compute_bloom_filter'. The former only
looks up a Bloom filter (and does not compute one if it's missing,
thus dropping the 'compute_if_not_present' flag). The latter does
compute missing Bloom filters, with an additional parameter to store
whether or not it needed to do so.

This simplifies many call-sites, since the majority of existing callers
to 'get_bloom_filter' do not want missing Bloom filters to be computed
(so they can drop the parameter entirely and use the simpler version of
the function).

While we're at it, instrument the new 'get_or_compute_bloom_filter()'
with counters in the 'write_commit_graph_context' struct which store
the number of filters that we did and didn't compute, as well as filters
that were truncated.

It would be nice to drop the 'compute_if_not_present' flag entirely,
since all remaining callers of 'get_or_compute_bloom_filter' pass it as
'1', but this will change in a future patch and hence cannot be removed.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
blame.c
bloom.c
bloom.h
commit-graph.c
line-log.c
revision.c
t/helper/test-bloom.c

diff --git a/blame.c b/blame.c
index 3e5f8787bc446af4d13c528a260254e50b5a8a8f..756285fca73a41609e6f1e4947c9bc13fcb55eae 100644 (file)
--- a/blame.c
+++ b/blame.c
@@ -1275,7 +1275,7 @@ static int maybe_changed_path(struct repository *r,
        if (commit_graph_generation(origin->commit) == GENERATION_NUMBER_INFINITY)
                return 1;
 
-       filter = get_bloom_filter(r, origin->commit, 0);
+       filter = get_bloom_filter(r, origin->commit);
 
        if (!filter)
                return 1;
diff --git a/bloom.c b/bloom.c
index cd9380ac623d4a01ec04ebd2110747e756c70df4..393c61b4bcb2e4d0906594f2b6e321b6bd3d4bfd 100644 (file)
--- a/bloom.c
+++ b/bloom.c
@@ -177,9 +177,10 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data,
        return strcmp(e1->path, e2->path);
 }
 
-struct bloom_filter *get_bloom_filter(struct repository *r,
-                                     struct commit *c,
-                                     int compute_if_not_present)
+struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
+                                                struct commit *c,
+                                                int compute_if_not_present,
+                                                enum bloom_filter_computed *computed)
 {
        struct bloom_filter *filter;
        struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
@@ -187,6 +188,9 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
        struct diff_options diffopt;
        int max_changes = 512;
 
+       if (computed)
+               *computed = BLOOM_NOT_COMPUTED;
+
        if (!bloom_filters.slab_size)
                return NULL;
 
@@ -271,8 +275,14 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
                        diff_free_filepair(diff_queued_diff.queue[i]);
                filter->data = NULL;
                filter->len = 0;
+
+               if (computed)
+                       *computed |= BLOOM_TRUNC_LARGE;
        }
 
+       if (computed)
+               *computed |= BLOOM_COMPUTED;
+
        free(diff_queued_diff.queue);
        DIFF_QUEUE_CLEAR(&diff_queued_diff);
 
diff --git a/bloom.h b/bloom.h
index 0b9b59a6fea667fcc510dddb7d237e2383111c36..e2e035ad1412c61758674580998849b8e6c88457 100644 (file)
--- a/bloom.h
+++ b/bloom.h
@@ -89,9 +89,19 @@ void add_key_to_filter(const struct bloom_key *key,
 
 void init_bloom_filters(void);
 
-struct bloom_filter *get_bloom_filter(struct repository *r,
-                                     struct commit *c,
-                                     int compute_if_not_present);
+enum bloom_filter_computed {
+       BLOOM_NOT_COMPUTED = (1 << 0),
+       BLOOM_COMPUTED     = (1 << 1),
+       BLOOM_TRUNC_LARGE  = (1 << 2),
+};
+
+struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
+                                                struct commit *c,
+                                                int compute_if_not_present,
+                                                enum bloom_filter_computed *computed);
+
+#define get_bloom_filter(r, c) get_or_compute_bloom_filter( \
+       (r), (c), 0, NULL)
 
 int bloom_filter_contains(const struct bloom_filter *filter,
                          const struct bloom_key *key,
index 44dceb80459994b19d9d882d912a3364e1a91781..67a2812e79c88855bb413f0fca9be4ab12a71fd0 100644 (file)
@@ -965,6 +965,10 @@ struct write_commit_graph_context {
        const struct split_commit_graph_opts *split_opts;
        size_t total_bloom_filter_data_size;
        const struct bloom_filter_settings *bloom_settings;
+
+       int count_bloom_filter_computed;
+       int count_bloom_filter_not_computed;
+       int count_bloom_filter_trunc_large;
 };
 
 static int write_graph_chunk_fanout(struct hashfile *f,
@@ -1176,7 +1180,7 @@ static int write_graph_chunk_bloom_indexes(struct hashfile *f,
        uint32_t cur_pos = 0;
 
        while (list < last) {
-               struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
+               struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
                size_t len = filter ? filter->len : 0;
                cur_pos += len;
                display_progress(ctx->progress, ++ctx->progress_cnt);
@@ -1216,7 +1220,7 @@ static int write_graph_chunk_bloom_data(struct hashfile *f,
        hashwrite_be32(f, ctx->bloom_settings->bits_per_entry);
 
        while (list < last) {
-               struct bloom_filter *filter = get_bloom_filter(ctx->r, *list, 0);
+               struct bloom_filter *filter = get_bloom_filter(ctx->r, *list);
                size_t len = filter ? filter->len : 0;
 
                display_progress(ctx->progress, ++ctx->progress_cnt);
@@ -1386,6 +1390,16 @@ static void compute_generation_numbers(struct write_commit_graph_context *ctx)
        stop_progress(&ctx->progress);
 }
 
+static void trace2_bloom_filter_write_statistics(struct write_commit_graph_context *ctx)
+{
+       trace2_data_intmax("commit-graph", ctx->r, "filter-computed",
+                          ctx->count_bloom_filter_computed);
+       trace2_data_intmax("commit-graph", ctx->r, "filter-not-computed",
+                          ctx->count_bloom_filter_not_computed);
+       trace2_data_intmax("commit-graph", ctx->r, "filter-trunc-large",
+                          ctx->count_bloom_filter_trunc_large);
+}
+
 static void compute_bloom_filters(struct write_commit_graph_context *ctx)
 {
        int i;
@@ -1408,12 +1422,26 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
                QSORT(sorted_commits, ctx->commits.nr, commit_gen_cmp);
 
        for (i = 0; i < ctx->commits.nr; i++) {
+               enum bloom_filter_computed computed = 0;
                struct commit *c = sorted_commits[i];
-               struct bloom_filter *filter = get_bloom_filter(ctx->r, c, 1);
+               struct bloom_filter *filter = get_or_compute_bloom_filter(
+                       ctx->r,
+                       c,
+                       1,
+                       &computed);
+               if (computed & BLOOM_COMPUTED) {
+                       ctx->count_bloom_filter_computed++;
+                       if (computed & BLOOM_TRUNC_LARGE)
+                               ctx->count_bloom_filter_trunc_large++;
+               } else if (computed & BLOOM_NOT_COMPUTED)
+                       ctx->count_bloom_filter_not_computed++;
                ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
                display_progress(progress, i + 1);
        }
 
+       if (trace2_is_enabled())
+               trace2_bloom_filter_write_statistics(ctx);
+
        free(sorted_commits);
        stop_progress(&progress);
 }
index c53692834d858cb602431f6a15c30203615e3b0a..9e58fd185ac92465b4001177b668b1e3fec96fec 100644 (file)
@@ -1159,7 +1159,7 @@ static int bloom_filter_check(struct rev_info *rev,
                return 1;
 
        if (!rev->bloom_filter_settings ||
-           !(filter = get_bloom_filter(rev->repo, commit, 0)))
+           !(filter = get_bloom_filter(rev->repo, commit)))
                return 1;
 
        if (!range)
index c45ed1076edd59f508787f8c2dac76d40f43c566..b7ec71275564c616ae5ffe02050866fc6201b5fb 100644 (file)
@@ -753,7 +753,7 @@ static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
        if (commit_graph_generation(commit) == GENERATION_NUMBER_INFINITY)
                return -1;
 
-       filter = get_bloom_filter(revs->repo, commit, 0);
+       filter = get_bloom_filter(revs->repo, commit);
 
        if (!filter) {
                count_bloom_filter_not_present++;
index f0aa80b98eb3240c59db99e2b5fbc186fd87c717..531af439c21e77dfbeac274b10ed9b2a45e6e682 100644 (file)
@@ -39,7 +39,8 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
        struct bloom_filter *filter;
        setup_git_directory();
        c = lookup_commit(the_repository, commit_oid);
-       filter = get_bloom_filter(the_repository, c, 1);
+       filter = get_or_compute_bloom_filter(the_repository, c, 1,
+                                            NULL);
        print_bloom_filter(filter);
 }