]> git.ipfire.org Git - thirdparty/git.git/blobdiff - bloom.c
fetch-pack: support more than one pack lockfile
[thirdparty/git.git] / bloom.c
diff --git a/bloom.c b/bloom.c
index 888b67f1ea6baedd589564b74719da29b40076d8..9b86aa3f59ab5a7fdb3fb25474f1ab1fa28d65db 100644 (file)
--- a/bloom.c
+++ b/bloom.c
@@ -1,5 +1,20 @@
 #include "git-compat-util.h"
 #include "bloom.h"
+#include "diff.h"
+#include "diffcore.h"
+#include "revision.h"
+#include "hashmap.h"
+#include "commit-graph.h"
+#include "commit.h"
+
+define_commit_slab(bloom_filter_slab, struct bloom_filter);
+
+static struct bloom_filter_slab bloom_filters;
+
+struct pathmap_hash_entry {
+    struct hashmap_entry entry;
+    const char path[FLEX_ARRAY];
+};
 
 static uint32_t rotate_left(uint32_t value, int32_t count)
 {
@@ -13,6 +28,36 @@ static inline unsigned char get_bitmask(uint32_t pos)
        return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1));
 }
 
+static int load_bloom_filter_from_graph(struct commit_graph *g,
+                                       struct bloom_filter *filter,
+                                       struct commit *c)
+{
+       uint32_t lex_pos, start_index, end_index;
+
+       while (c->graph_pos < g->num_commits_in_base)
+               g = g->base_graph;
+
+       /* The commit graph commit 'c' lives in doesn't carry bloom filters. */
+       if (!g->chunk_bloom_indexes)
+               return 0;
+
+       lex_pos = c->graph_pos - g->num_commits_in_base;
+
+       end_index = get_be32(g->chunk_bloom_indexes + 4 * lex_pos);
+
+       if (lex_pos > 0)
+               start_index = get_be32(g->chunk_bloom_indexes + 4 * (lex_pos - 1));
+       else
+               start_index = 0;
+
+       filter->len = end_index - start_index;
+       filter->data = (unsigned char *)(g->chunk_bloom_data +
+                                       sizeof(unsigned char) * start_index +
+                                       BLOOMDATA_CHUNK_HEADER_SIZE);
+
+       return 1;
+}
+
 /*
  * Calculate the murmur3 32-bit hash value for the given data
  * using the given seed.
@@ -78,9 +123,9 @@ uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len)
 }
 
 void fill_bloom_key(const char *data,
-                                       size_t len,
-                                       struct bloom_key *key,
-                                       const struct bloom_filter_settings *settings)
+                   size_t len,
+                   struct bloom_key *key,
+                   const struct bloom_filter_settings *settings)
 {
        int i;
        const uint32_t seed0 = 0x293ae76f;
@@ -94,8 +139,8 @@ void fill_bloom_key(const char *data,
 }
 
 void add_key_to_filter(const struct bloom_key *key,
-                                          struct bloom_filter *filter,
-                                          const struct bloom_filter_settings *settings)
+                      struct bloom_filter *filter,
+                      const struct bloom_filter_settings *settings)
 {
        int i;
        uint64_t mod = filter->len * BITS_PER_WORD;
@@ -107,3 +152,145 @@ void add_key_to_filter(const struct bloom_key *key,
                filter->data[block_pos] |= get_bitmask(hash_mod);
        }
 }
+
+void init_bloom_filters(void)
+{
+       init_bloom_filter_slab(&bloom_filters);
+}
+
+static int pathmap_cmp(const void *hashmap_cmp_fn_data,
+                      const struct hashmap_entry *eptr,
+                      const struct hashmap_entry *entry_or_key,
+                      const void *keydata)
+{
+       const struct pathmap_hash_entry *e1, *e2;
+
+       e1 = container_of(eptr, const struct pathmap_hash_entry, entry);
+       e2 = container_of(entry_or_key, const struct pathmap_hash_entry, entry);
+
+       return strcmp(e1->path, e2->path);
+}
+
+struct bloom_filter *get_bloom_filter(struct repository *r,
+                                     struct commit *c,
+                                     int compute_if_not_present)
+{
+       struct bloom_filter *filter;
+       struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
+       int i;
+       struct diff_options diffopt;
+       int max_changes = 512;
+
+       if (bloom_filters.slab_size == 0)
+               return NULL;
+
+       filter = bloom_filter_slab_at(&bloom_filters, c);
+
+       if (!filter->data) {
+               load_commit_graph_info(r, c);
+               if (c->graph_pos != COMMIT_NOT_FROM_GRAPH &&
+                       r->objects->commit_graph->chunk_bloom_indexes) {
+                       if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
+                               return filter;
+                       else
+                               return NULL;
+               }
+       }
+
+       if (filter->data || !compute_if_not_present)
+               return filter;
+
+       repo_diff_setup(r, &diffopt);
+       diffopt.flags.recursive = 1;
+       diffopt.detect_rename = 0;
+       diffopt.max_changes = max_changes;
+       diff_setup_done(&diffopt);
+
+       /* ensure commit is parsed so we have parent information */
+       repo_parse_commit(r, c);
+
+       if (c->parents)
+               diff_tree_oid(&c->parents->item->object.oid, &c->object.oid, "", &diffopt);
+       else
+               diff_tree_oid(NULL, &c->object.oid, "", &diffopt);
+       diffcore_std(&diffopt);
+
+       if (diffopt.num_changes <= max_changes) {
+               struct hashmap pathmap;
+               struct pathmap_hash_entry *e;
+               struct hashmap_iter iter;
+               hashmap_init(&pathmap, pathmap_cmp, NULL, 0);
+
+               for (i = 0; i < diff_queued_diff.nr; i++) {
+                       const char *path = diff_queued_diff.queue[i]->two->path;
+
+                       /*
+                        * Add each leading directory of the changed file, i.e. for
+                        * 'dir/subdir/file' add 'dir' and 'dir/subdir' as well, so
+                        * the Bloom filter could be used to speed up commands like
+                        * 'git log dir/subdir', too.
+                        *
+                        * Note that directories are added without the trailing '/'.
+                        */
+                       do {
+                               char *last_slash = strrchr(path, '/');
+
+                               FLEX_ALLOC_STR(e, path, path);
+                               hashmap_entry_init(&e->entry, strhash(path));
+
+                               if (!hashmap_get(&pathmap, &e->entry, NULL))
+                                       hashmap_add(&pathmap, &e->entry);
+                               else
+                                       free(e);
+
+                               if (!last_slash)
+                                       last_slash = (char*)path;
+                               *last_slash = '\0';
+
+                       } while (*path);
+
+                       diff_free_filepair(diff_queued_diff.queue[i]);
+               }
+
+               filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
+               filter->data = xcalloc(filter->len, sizeof(unsigned char));
+
+               hashmap_for_each_entry(&pathmap, &iter, e, entry) {
+                       struct bloom_key key;
+                       fill_bloom_key(e->path, strlen(e->path), &key, &settings);
+                       add_key_to_filter(&key, filter, &settings);
+               }
+
+               hashmap_free_entries(&pathmap, struct pathmap_hash_entry, entry);
+       } else {
+               for (i = 0; i < diff_queued_diff.nr; i++)
+                       diff_free_filepair(diff_queued_diff.queue[i]);
+               filter->data = NULL;
+               filter->len = 0;
+       }
+
+       free(diff_queued_diff.queue);
+       DIFF_QUEUE_CLEAR(&diff_queued_diff);
+
+       return filter;
+}
+
+int bloom_filter_contains(const struct bloom_filter *filter,
+                         const struct bloom_key *key,
+                         const struct bloom_filter_settings *settings)
+{
+       int i;
+       uint64_t mod = filter->len * BITS_PER_WORD;
+
+       if (!mod)
+               return -1;
+
+       for (i = 0; i < settings->num_hashes; i++) {
+               uint64_t hash_mod = key->hashes[i] % mod;
+               uint64_t block_pos = hash_mod / BITS_PER_WORD;
+               if (!(filter->data[block_pos] & get_bitmask(hash_mod)))
+                       return 0;
+       }
+
+       return 1;
+}