]> git.ipfire.org Git - thirdparty/git.git/blame - bloom.h
bloom.c: core Bloom filter implementation for changed paths.
[thirdparty/git.git] / bloom.h
CommitLineData
f52207a4
GS
1#ifndef BLOOM_H
2#define BLOOM_H
3
ed591feb
GS
4struct commit;
5struct repository;
6
f1294eaf
GS
7struct bloom_filter_settings {
8 /*
9 * The version of the hashing technique being used.
10 * We currently only support version = 1 which is
11 * the seeded murmur3 hashing technique implemented
12 * in bloom.c.
13 */
14 uint32_t hash_version;
15
16 /*
17 * The number of times a path is hashed, i.e. the
18 * number of bit positions tht cumulatively
19 * determine whether a path is present in the
20 * Bloom filter.
21 */
22 uint32_t num_hashes;
23
24 /*
25 * The minimum number of bits per entry in the Bloom
26 * filter. If the filter contains 'n' entries, then
27 * filter size is the minimum number of 8-bit words
28 * that contain n*b bits.
29 */
30 uint32_t bits_per_entry;
31};
32
33#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 }
34#define BITS_PER_WORD 8
35
36/*
37 * A bloom_filter struct represents a data segment to
38 * use when testing hash values. The 'len' member
39 * dictates how many entries are stored in
40 * 'data'.
41 */
42struct bloom_filter {
43 unsigned char *data;
44 size_t len;
45};
46
47/*
48 * A bloom_key represents the k hash values for a
49 * given string. These can be precomputed and
50 * stored in a bloom_key for re-use when testing
51 * against a bloom_filter. The number of hashes is
52 * given by the Bloom filter settings and is the same
53 * for all Bloom filters and keys interacting with
54 * the loaded version of the commit graph file and
55 * the Bloom data chunks.
56 */
57struct bloom_key {
58 uint32_t *hashes;
59};
60
f52207a4
GS
61/*
62 * Calculate the murmur3 32-bit hash value for the given data
63 * using the given seed.
64 * Produces a uniformly distributed hash value.
65 * Not considered to be cryptographically secure.
66 * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
67 */
68uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len);
69
f1294eaf
GS
70void fill_bloom_key(const char *data,
71 size_t len,
72 struct bloom_key *key,
73 const struct bloom_filter_settings *settings);
74
75void add_key_to_filter(const struct bloom_key *key,
76 struct bloom_filter *filter,
77 const struct bloom_filter_settings *settings);
78
ed591feb
GS
79void init_bloom_filters(void);
80
81struct bloom_filter *get_bloom_filter(struct repository *r,
82 struct commit *c);
83
f52207a4 84#endif