]>
Commit | Line | Data |
---|---|---|
f52207a4 GS |
1 | #ifndef BLOOM_H |
2 | #define BLOOM_H | |
3 | ||
ed591feb GS |
4 | struct commit; |
5 | struct repository; | |
6 | ||
f1294eaf GS |
7 | struct bloom_filter_settings { |
8 | /* | |
9 | * The version of the hashing technique being used. | |
10 | * We currently only support version = 1 which is | |
11 | * the seeded murmur3 hashing technique implemented | |
12 | * in bloom.c. | |
13 | */ | |
14 | uint32_t hash_version; | |
15 | ||
16 | /* | |
17 | * The number of times a path is hashed, i.e. the | |
18 | * number of bit positions tht cumulatively | |
19 | * determine whether a path is present in the | |
20 | * Bloom filter. | |
21 | */ | |
22 | uint32_t num_hashes; | |
23 | ||
24 | /* | |
25 | * The minimum number of bits per entry in the Bloom | |
26 | * filter. If the filter contains 'n' entries, then | |
27 | * filter size is the minimum number of 8-bit words | |
28 | * that contain n*b bits. | |
29 | */ | |
30 | uint32_t bits_per_entry; | |
97ffa4fa TB |
31 | |
32 | /* | |
33 | * The maximum number of changed paths per commit | |
34 | * before declaring a Bloom filter to be too-large. | |
35 | * | |
36 | * Not written to the commit-graph file. | |
37 | */ | |
38 | uint32_t max_changed_paths; | |
f1294eaf GS |
39 | }; |
40 | ||
97ffa4fa TB |
41 | #define DEFAULT_BLOOM_MAX_CHANGES 512 |
42 | #define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10, DEFAULT_BLOOM_MAX_CHANGES } | |
f1294eaf | 43 | #define BITS_PER_WORD 8 |
1217c03e | 44 | #define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t) |
f1294eaf GS |
45 | |
46 | /* | |
47 | * A bloom_filter struct represents a data segment to | |
48 | * use when testing hash values. The 'len' member | |
49 | * dictates how many entries are stored in | |
50 | * 'data'. | |
51 | */ | |
52 | struct bloom_filter { | |
53 | unsigned char *data; | |
54 | size_t len; | |
55 | }; | |
56 | ||
57 | /* | |
58 | * A bloom_key represents the k hash values for a | |
59 | * given string. These can be precomputed and | |
60 | * stored in a bloom_key for re-use when testing | |
61 | * against a bloom_filter. The number of hashes is | |
62 | * given by the Bloom filter settings and is the same | |
63 | * for all Bloom filters and keys interacting with | |
64 | * the loaded version of the commit graph file and | |
65 | * the Bloom data chunks. | |
66 | */ | |
67 | struct bloom_key { | |
68 | uint32_t *hashes; | |
69 | }; | |
70 | ||
f52207a4 GS |
71 | /* |
72 | * Calculate the murmur3 32-bit hash value for the given data | |
73 | * using the given seed. | |
74 | * Produces a uniformly distributed hash value. | |
75 | * Not considered to be cryptographically secure. | |
76 | * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm | |
77 | */ | |
78 | uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len); | |
79 | ||
f1294eaf GS |
80 | void fill_bloom_key(const char *data, |
81 | size_t len, | |
82 | struct bloom_key *key, | |
83 | const struct bloom_filter_settings *settings); | |
f32dde8c | 84 | void clear_bloom_key(struct bloom_key *key); |
f1294eaf GS |
85 | |
86 | void add_key_to_filter(const struct bloom_key *key, | |
eb591e42 DS |
87 | struct bloom_filter *filter, |
88 | const struct bloom_filter_settings *settings); | |
f1294eaf | 89 | |
ed591feb GS |
90 | void init_bloom_filters(void); |
91 | ||
312cff52 TB |
92 | enum bloom_filter_computed { |
93 | BLOOM_NOT_COMPUTED = (1 << 0), | |
94 | BLOOM_COMPUTED = (1 << 1), | |
95 | BLOOM_TRUNC_LARGE = (1 << 2), | |
59f0d507 | 96 | BLOOM_TRUNC_EMPTY = (1 << 3), |
312cff52 TB |
97 | }; |
98 | ||
99 | struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, | |
100 | struct commit *c, | |
101 | int compute_if_not_present, | |
9a7a9ed1 | 102 | const struct bloom_filter_settings *settings, |
312cff52 TB |
103 | enum bloom_filter_computed *computed); |
104 | ||
105 | #define get_bloom_filter(r, c) get_or_compute_bloom_filter( \ | |
9a7a9ed1 | 106 | (r), (c), 0, NULL, NULL) |
ed591feb | 107 | |
a56b9464 GS |
108 | int bloom_filter_contains(const struct bloom_filter *filter, |
109 | const struct bloom_key *key, | |
110 | const struct bloom_filter_settings *settings); | |
111 | ||
066b70ae | 112 | #endif |