]>
Commit | Line | Data |
---|---|---|
f52207a4 GS |
1 | #ifndef BLOOM_H |
2 | #define BLOOM_H | |
3 | ||
ed591feb GS |
4 | struct commit; |
5 | struct repository; | |
a09858d4 | 6 | struct commit_graph; |
ed591feb | 7 | |
f1294eaf GS |
8 | struct bloom_filter_settings { |
9 | /* | |
10 | * The version of the hashing technique being used. | |
ba5a81d5 | 11 | * The newest version is 2, which is |
f1294eaf | 12 | * the seeded murmur3 hashing technique implemented |
ba5a81d5 TB |
13 | * in bloom.c. Bloom filters of version 1 were created |
14 | * with prior versions of Git, which had a bug in the | |
15 | * implementation of the hash function. | |
f1294eaf GS |
16 | */ |
17 | uint32_t hash_version; | |
18 | ||
19 | /* | |
20 | * The number of times a path is hashed, i.e. the | |
a3711f9f | 21 | * number of bit positions that cumulatively |
f1294eaf GS |
22 | * determine whether a path is present in the |
23 | * Bloom filter. | |
24 | */ | |
25 | uint32_t num_hashes; | |
26 | ||
27 | /* | |
28 | * The minimum number of bits per entry in the Bloom | |
29 | * filter. If the filter contains 'n' entries, then | |
30 | * filter size is the minimum number of 8-bit words | |
31 | * that contain n*b bits. | |
32 | */ | |
33 | uint32_t bits_per_entry; | |
97ffa4fa TB |
34 | |
35 | /* | |
36 | * The maximum number of changed paths per commit | |
37 | * before declaring a Bloom filter to be too-large. | |
38 | * | |
39 | * Not written to the commit-graph file. | |
40 | */ | |
41 | uint32_t max_changed_paths; | |
f1294eaf GS |
42 | }; |
43 | ||
97ffa4fa TB |
44 | #define DEFAULT_BLOOM_MAX_CHANGES 512 |
45 | #define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10, DEFAULT_BLOOM_MAX_CHANGES } | |
f1294eaf | 46 | #define BITS_PER_WORD 8 |
1217c03e | 47 | #define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t) |
f1294eaf GS |
48 | |
49 | /* | |
50 | * A bloom_filter struct represents a data segment to | |
51 | * use when testing hash values. The 'len' member | |
52 | * dictates how many entries are stored in | |
53 | * 'data'. | |
54 | */ | |
55 | struct bloom_filter { | |
56 | unsigned char *data; | |
57 | size_t len; | |
5b5d5b59 | 58 | int version; |
9c8a9ec7 TB |
59 | |
60 | void *to_free; | |
f1294eaf GS |
61 | }; |
62 | ||
63 | /* | |
64 | * A bloom_key represents the k hash values for a | |
65 | * given string. These can be precomputed and | |
66 | * stored in a bloom_key for re-use when testing | |
67 | * against a bloom_filter. The number of hashes is | |
68 | * given by the Bloom filter settings and is the same | |
69 | * for all Bloom filters and keys interacting with | |
70 | * the loaded version of the commit graph file and | |
71 | * the Bloom data chunks. | |
72 | */ | |
73 | struct bloom_key { | |
74 | uint32_t *hashes; | |
75 | }; | |
76 | ||
a09858d4 TB |
77 | int load_bloom_filter_from_graph(struct commit_graph *g, |
78 | struct bloom_filter *filter, | |
79 | uint32_t graph_pos); | |
80 | ||
f52207a4 GS |
81 | /* |
82 | * Calculate the murmur3 32-bit hash value for the given data | |
83 | * using the given seed. | |
84 | * Produces a uniformly distributed hash value. | |
85 | * Not considered to be cryptographically secure. | |
86 | * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm | |
87 | */ | |
ba5a81d5 | 88 | uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len); |
f52207a4 | 89 | |
f1294eaf GS |
90 | void fill_bloom_key(const char *data, |
91 | size_t len, | |
92 | struct bloom_key *key, | |
93 | const struct bloom_filter_settings *settings); | |
f32dde8c | 94 | void clear_bloom_key(struct bloom_key *key); |
f1294eaf GS |
95 | |
96 | void add_key_to_filter(const struct bloom_key *key, | |
eb591e42 DS |
97 | struct bloom_filter *filter, |
98 | const struct bloom_filter_settings *settings); | |
f1294eaf | 99 | |
ed591feb | 100 | void init_bloom_filters(void); |
9c8a9ec7 | 101 | void deinit_bloom_filters(void); |
ed591feb | 102 | |
312cff52 TB |
103 | enum bloom_filter_computed { |
104 | BLOOM_NOT_COMPUTED = (1 << 0), | |
105 | BLOOM_COMPUTED = (1 << 1), | |
106 | BLOOM_TRUNC_LARGE = (1 << 2), | |
59f0d507 | 107 | BLOOM_TRUNC_EMPTY = (1 << 3), |
5421e7c3 | 108 | BLOOM_UPGRADED = (1 << 4), |
312cff52 TB |
109 | }; |
110 | ||
111 | struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, | |
112 | struct commit *c, | |
113 | int compute_if_not_present, | |
9a7a9ed1 | 114 | const struct bloom_filter_settings *settings, |
312cff52 TB |
115 | enum bloom_filter_computed *computed); |
116 | ||
b2cf3310 TB |
117 | /* |
118 | * Find the Bloom filter associated with the given commit "c". | |
119 | * | |
120 | * If any of the following are true | |
121 | * | |
122 | * - the repository does not have a commit-graph, or | |
123 | * - the repository disables reading from the commit-graph, or | |
124 | * - the given commit does not have a Bloom filter computed, or | |
125 | * - there is a Bloom filter for commit "c", but it cannot be read | |
126 | * because the filter uses an incompatible version of murmur3 | |
127 | * | |
128 | * , then `get_bloom_filter()` will return NULL. Otherwise, the corresponding | |
129 | * Bloom filter will be returned. | |
130 | * | |
131 | * For callers who wish to inspect Bloom filters with incompatible hash | |
132 | * versions, use get_or_compute_bloom_filter(). | |
133 | */ | |
134 | struct bloom_filter *get_bloom_filter(struct repository *r, struct commit *c); | |
ed591feb | 135 | |
a56b9464 GS |
136 | int bloom_filter_contains(const struct bloom_filter *filter, |
137 | const struct bloom_key *key, | |
138 | const struct bloom_filter_settings *settings); | |
139 | ||
066b70ae | 140 | #endif |