]> git.ipfire.org Git - thirdparty/git.git/commitdiff
bloom: annotate filters with hash version
authorTaylor Blau <me@ttaylorr.com>
Tue, 25 Jun 2024 17:39:54 +0000 (13:39 -0400)
committerJunio C Hamano <gitster@pobox.com>
Tue, 25 Jun 2024 20:52:06 +0000 (13:52 -0700)
In subsequent commits, we will want to load existing Bloom filters out
of a commit-graph, even when the hash version they were computed with
does not match the value of `commitGraph.changedPathVersion`.

In order to differentiate between the two, add a "version" field to each
Bloom filter.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
bloom.c
bloom.h

diff --git a/bloom.c b/bloom.c
index 401999ed3c26238bb98835707523dbba2d2afde6..e64e53bc4caf48cb56505935214ca87885e1e47a 100644 (file)
--- a/bloom.c
+++ b/bloom.c
@@ -88,6 +88,7 @@ int load_bloom_filter_from_graph(struct commit_graph *g,
        filter->data = (unsigned char *)(g->chunk_bloom_data +
                                        sizeof(unsigned char) * start_index +
                                        BLOOMDATA_CHUNK_HEADER_SIZE);
+       filter->version = g->bloom_filter_settings->hash_version;
 
        return 1;
 }
@@ -210,11 +211,13 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
        return strcmp(e1->path, e2->path);
 }
 
-static void init_truncated_large_filter(struct bloom_filter *filter)
+static void init_truncated_large_filter(struct bloom_filter *filter,
+                                       int version)
 {
        filter->data = xmalloc(1);
        filter->data[0] = 0xFF;
        filter->len = 1;
+       filter->version = version;
 }
 
 struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
@@ -299,13 +302,15 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
                }
 
                if (hashmap_get_size(&pathmap) > settings->max_changed_paths) {
-                       init_truncated_large_filter(filter);
+                       init_truncated_large_filter(filter,
+                                                   settings->hash_version);
                        if (computed)
                                *computed |= BLOOM_TRUNC_LARGE;
                        goto cleanup;
                }
 
                filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
+               filter->version = settings->hash_version;
                if (!filter->len) {
                        if (computed)
                                *computed |= BLOOM_TRUNC_EMPTY;
@@ -325,7 +330,7 @@ struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
        } else {
                for (i = 0; i < diff_queued_diff.nr; i++)
                        diff_free_filepair(diff_queued_diff.queue[i]);
-               init_truncated_large_filter(filter);
+               init_truncated_large_filter(filter, settings->hash_version);
 
                if (computed)
                        *computed |= BLOOM_TRUNC_LARGE;
diff --git a/bloom.h b/bloom.h
index 1e4f612d2c203b5e507f871c523675477fa4e41b..c9dd7d402291c849bdb84b505498bb4cc43b3cc5 100644 (file)
--- a/bloom.h
+++ b/bloom.h
@@ -53,6 +53,7 @@ struct bloom_filter_settings {
 struct bloom_filter {
        unsigned char *data;
        size_t len;
+       int version;
 };
 
 /*