]>
Commit | Line | Data |
---|---|---|
f52207a4 GS |
1 | #ifndef BLOOM_H |
2 | #define BLOOM_H | |
3 | ||
f1294eaf GS |
4 | struct bloom_filter_settings { |
5 | /* | |
6 | * The version of the hashing technique being used. | |
7 | * We currently only support version = 1 which is | |
8 | * the seeded murmur3 hashing technique implemented | |
9 | * in bloom.c. | |
10 | */ | |
11 | uint32_t hash_version; | |
12 | ||
13 | /* | |
14 | * The number of times a path is hashed, i.e. the | |
15 | * number of bit positions tht cumulatively | |
16 | * determine whether a path is present in the | |
17 | * Bloom filter. | |
18 | */ | |
19 | uint32_t num_hashes; | |
20 | ||
21 | /* | |
22 | * The minimum number of bits per entry in the Bloom | |
23 | * filter. If the filter contains 'n' entries, then | |
24 | * filter size is the minimum number of 8-bit words | |
25 | * that contain n*b bits. | |
26 | */ | |
27 | uint32_t bits_per_entry; | |
28 | }; | |
29 | ||
30 | #define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 } | |
31 | #define BITS_PER_WORD 8 | |
32 | ||
33 | /* | |
34 | * A bloom_filter struct represents a data segment to | |
35 | * use when testing hash values. The 'len' member | |
36 | * dictates how many entries are stored in | |
37 | * 'data'. | |
38 | */ | |
39 | struct bloom_filter { | |
40 | unsigned char *data; | |
41 | size_t len; | |
42 | }; | |
43 | ||
44 | /* | |
45 | * A bloom_key represents the k hash values for a | |
46 | * given string. These can be precomputed and | |
47 | * stored in a bloom_key for re-use when testing | |
48 | * against a bloom_filter. The number of hashes is | |
49 | * given by the Bloom filter settings and is the same | |
50 | * for all Bloom filters and keys interacting with | |
51 | * the loaded version of the commit graph file and | |
52 | * the Bloom data chunks. | |
53 | */ | |
54 | struct bloom_key { | |
55 | uint32_t *hashes; | |
56 | }; | |
57 | ||
f52207a4 GS |
58 | /* |
59 | * Calculate the murmur3 32-bit hash value for the given data | |
60 | * using the given seed. | |
61 | * Produces a uniformly distributed hash value. | |
62 | * Not considered to be cryptographically secure. | |
63 | * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm | |
64 | */ | |
65 | uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len); | |
66 | ||
f1294eaf GS |
67 | void fill_bloom_key(const char *data, |
68 | size_t len, | |
69 | struct bloom_key *key, | |
70 | const struct bloom_filter_settings *settings); | |
71 | ||
72 | void add_key_to_filter(const struct bloom_key *key, | |
73 | struct bloom_filter *filter, | |
74 | const struct bloom_filter_settings *settings); | |
75 | ||
f52207a4 | 76 | #endif |