]>
Commit | Line | Data |
---|---|---|
f52207a4 GS |
1 | #ifndef BLOOM_H |
2 | #define BLOOM_H | |
3 | ||
ed591feb GS |
4 | struct commit; |
5 | struct repository; | |
6 | ||
f1294eaf GS |
7 | struct bloom_filter_settings { |
8 | /* | |
9 | * The version of the hashing technique being used. | |
10 | * We currently only support version = 1 which is | |
11 | * the seeded murmur3 hashing technique implemented | |
12 | * in bloom.c. | |
13 | */ | |
14 | uint32_t hash_version; | |
15 | ||
16 | /* | |
17 | * The number of times a path is hashed, i.e. the | |
18 | * number of bit positions tht cumulatively | |
19 | * determine whether a path is present in the | |
20 | * Bloom filter. | |
21 | */ | |
22 | uint32_t num_hashes; | |
23 | ||
24 | /* | |
25 | * The minimum number of bits per entry in the Bloom | |
26 | * filter. If the filter contains 'n' entries, then | |
27 | * filter size is the minimum number of 8-bit words | |
28 | * that contain n*b bits. | |
29 | */ | |
30 | uint32_t bits_per_entry; | |
31 | }; | |
32 | ||
33 | #define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 } | |
34 | #define BITS_PER_WORD 8 | |
1217c03e | 35 | #define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t) |
f1294eaf GS |
36 | |
37 | /* | |
38 | * A bloom_filter struct represents a data segment to | |
39 | * use when testing hash values. The 'len' member | |
40 | * dictates how many entries are stored in | |
41 | * 'data'. | |
42 | */ | |
43 | struct bloom_filter { | |
44 | unsigned char *data; | |
45 | size_t len; | |
46 | }; | |
47 | ||
48 | /* | |
49 | * A bloom_key represents the k hash values for a | |
50 | * given string. These can be precomputed and | |
51 | * stored in a bloom_key for re-use when testing | |
52 | * against a bloom_filter. The number of hashes is | |
53 | * given by the Bloom filter settings and is the same | |
54 | * for all Bloom filters and keys interacting with | |
55 | * the loaded version of the commit graph file and | |
56 | * the Bloom data chunks. | |
57 | */ | |
58 | struct bloom_key { | |
59 | uint32_t *hashes; | |
60 | }; | |
61 | ||
f52207a4 GS |
62 | /* |
63 | * Calculate the murmur3 32-bit hash value for the given data | |
64 | * using the given seed. | |
65 | * Produces a uniformly distributed hash value. | |
66 | * Not considered to be cryptographically secure. | |
67 | * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm | |
68 | */ | |
69 | uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len); | |
70 | ||
f1294eaf GS |
71 | void fill_bloom_key(const char *data, |
72 | size_t len, | |
73 | struct bloom_key *key, | |
74 | const struct bloom_filter_settings *settings); | |
f32dde8c | 75 | void clear_bloom_key(struct bloom_key *key); |
f1294eaf GS |
76 | |
77 | void add_key_to_filter(const struct bloom_key *key, | |
eb591e42 DS |
78 | struct bloom_filter *filter, |
79 | const struct bloom_filter_settings *settings); | |
f1294eaf | 80 | |
ed591feb GS |
81 | void init_bloom_filters(void); |
82 | ||
83 | struct bloom_filter *get_bloom_filter(struct repository *r, | |
1217c03e GS |
84 | struct commit *c, |
85 | int compute_if_not_present); | |
ed591feb | 86 | |
a56b9464 GS |
87 | int bloom_filter_contains(const struct bloom_filter *filter, |
88 | const struct bloom_key *key, | |
89 | const struct bloom_filter_settings *settings); | |
90 | ||
066b70ae | 91 | #endif |