]> git.ipfire.org Git - thirdparty/git.git/blobdiff - bloom.c
Merge branch 'jt/t5500-unflake'
[thirdparty/git.git] / bloom.c
diff --git a/bloom.c b/bloom.c
index dd9bab9bbd6cfd6b71a39e73813ecf745bec835e..9b86aa3f59ab5a7fdb3fb25474f1ab1fa28d65db 100644 (file)
--- a/bloom.c
+++ b/bloom.c
@@ -9,7 +9,7 @@
 
 define_commit_slab(bloom_filter_slab, struct bloom_filter);
 
-struct bloom_filter_slab bloom_filters;
+static struct bloom_filter_slab bloom_filters;
 
 struct pathmap_hash_entry {
     struct hashmap_entry entry;
@@ -29,8 +29,8 @@ static inline unsigned char get_bitmask(uint32_t pos)
 }
 
 static int load_bloom_filter_from_graph(struct commit_graph *g,
-                                  struct bloom_filter *filter,
-                                  struct commit *c)
+                                       struct bloom_filter *filter,
+                                       struct commit *c)
 {
        uint32_t lex_pos, start_index, end_index;
 
@@ -123,9 +123,9 @@ uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len)
 }
 
 void fill_bloom_key(const char *data,
-                                       size_t len,
-                                       struct bloom_key *key,
-                                       const struct bloom_filter_settings *settings)
+                   size_t len,
+                   struct bloom_key *key,
+                   const struct bloom_filter_settings *settings)
 {
        int i;
        const uint32_t seed0 = 0x293ae76f;
@@ -139,8 +139,8 @@ void fill_bloom_key(const char *data,
 }
 
 void add_key_to_filter(const struct bloom_key *key,
-                                          struct bloom_filter *filter,
-                                          const struct bloom_filter_settings *settings)
+                      struct bloom_filter *filter,
+                      const struct bloom_filter_settings *settings)
 {
        int i;
        uint64_t mod = filter->len * BITS_PER_WORD;
@@ -158,9 +158,22 @@ void init_bloom_filters(void)
        init_bloom_filter_slab(&bloom_filters);
 }
 
+static int pathmap_cmp(const void *hashmap_cmp_fn_data,
+                      const struct hashmap_entry *eptr,
+                      const struct hashmap_entry *entry_or_key,
+                      const void *keydata)
+{
+       const struct pathmap_hash_entry *e1, *e2;
+
+       e1 = container_of(eptr, const struct pathmap_hash_entry, entry);
+       e2 = container_of(entry_or_key, const struct pathmap_hash_entry, entry);
+
+       return strcmp(e1->path, e2->path);
+}
+
 struct bloom_filter *get_bloom_filter(struct repository *r,
                                      struct commit *c,
-                                         int compute_if_not_present)
+                                     int compute_if_not_present)
 {
        struct bloom_filter *filter;
        struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
@@ -193,35 +206,42 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
        diffopt.max_changes = max_changes;
        diff_setup_done(&diffopt);
 
+       /* ensure commit is parsed so we have parent information */
+       repo_parse_commit(r, c);
+
        if (c->parents)
                diff_tree_oid(&c->parents->item->object.oid, &c->object.oid, "", &diffopt);
        else
                diff_tree_oid(NULL, &c->object.oid, "", &diffopt);
        diffcore_std(&diffopt);
 
-       if (diff_queued_diff.nr <= max_changes) {
+       if (diffopt.num_changes <= max_changes) {
                struct hashmap pathmap;
                struct pathmap_hash_entry *e;
                struct hashmap_iter iter;
-               hashmap_init(&pathmap, NULL, NULL, 0);
+               hashmap_init(&pathmap, pathmap_cmp, NULL, 0);
 
                for (i = 0; i < diff_queued_diff.nr; i++) {
                        const char *path = diff_queued_diff.queue[i]->two->path;
 
                        /*
-                       * Add each leading directory of the changed file, i.e. for
-                       * 'dir/subdir/file' add 'dir' and 'dir/subdir' as well, so
-                       * the Bloom filter could be used to speed up commands like
-                       * 'git log dir/subdir', too.
-                       *
-                       * Note that directories are added without the trailing '/'.
-                       */
+                        * Add each leading directory of the changed file, i.e. for
+                        * 'dir/subdir/file' add 'dir' and 'dir/subdir' as well, so
+                        * the Bloom filter could be used to speed up commands like
+                        * 'git log dir/subdir', too.
+                        *
+                        * Note that directories are added without the trailing '/'.
+                        */
                        do {
                                char *last_slash = strrchr(path, '/');
 
                                FLEX_ALLOC_STR(e, path, path);
                                hashmap_entry_init(&e->entry, strhash(path));
-                               hashmap_add(&pathmap, &e->entry);
+
+                               if (!hashmap_get(&pathmap, &e->entry, NULL))
+                                       hashmap_add(&pathmap, &e->entry);
+                               else
+                                       free(e);
 
                                if (!last_slash)
                                        last_slash = (char*)path;
@@ -273,4 +293,4 @@ int bloom_filter_contains(const struct bloom_filter *filter,
        }
 
        return 1;
-}
\ No newline at end of file
+}