]> git.ipfire.org Git - thirdparty/git.git/blame - bloom.h
The sixth batch
[thirdparty/git.git] / bloom.h
CommitLineData
f52207a4
GS
1#ifndef BLOOM_H
2#define BLOOM_H
3
ed591feb
GS
4struct commit;
5struct repository;
a09858d4 6struct commit_graph;
ed591feb 7
f1294eaf
GS
8struct bloom_filter_settings {
9 /*
10 * The version of the hashing technique being used.
ba5a81d5 11 * The newest version is 2, which is
f1294eaf 12 * the seeded murmur3 hashing technique implemented
ba5a81d5
TB
13 * in bloom.c. Bloom filters of version 1 were created
14 * with prior versions of Git, which had a bug in the
15 * implementation of the hash function.
f1294eaf
GS
16 */
17 uint32_t hash_version;
18
19 /*
20 * The number of times a path is hashed, i.e. the
a3711f9f 21 * number of bit positions that cumulatively
f1294eaf
GS
22 * determine whether a path is present in the
23 * Bloom filter.
24 */
25 uint32_t num_hashes;
26
27 /*
28 * The minimum number of bits per entry in the Bloom
29 * filter. If the filter contains 'n' entries, then
30 * filter size is the minimum number of 8-bit words
31 * that contain n*b bits.
32 */
33 uint32_t bits_per_entry;
97ffa4fa
TB
34
35 /*
36 * The maximum number of changed paths per commit
37 * before declaring a Bloom filter to be too-large.
38 *
39 * Not written to the commit-graph file.
40 */
41 uint32_t max_changed_paths;
f1294eaf
GS
42};
43
97ffa4fa
TB
44#define DEFAULT_BLOOM_MAX_CHANGES 512
45#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10, DEFAULT_BLOOM_MAX_CHANGES }
f1294eaf 46#define BITS_PER_WORD 8
1217c03e 47#define BLOOMDATA_CHUNK_HEADER_SIZE 3 * sizeof(uint32_t)
f1294eaf
GS
48
49/*
50 * A bloom_filter struct represents a data segment to
51 * use when testing hash values. The 'len' member
52 * dictates how many entries are stored in
53 * 'data'.
54 */
55struct bloom_filter {
56 unsigned char *data;
57 size_t len;
5b5d5b59 58 int version;
9c8a9ec7
TB
59
60 void *to_free;
f1294eaf
GS
61};
62
63/*
64 * A bloom_key represents the k hash values for a
65 * given string. These can be precomputed and
66 * stored in a bloom_key for re-use when testing
67 * against a bloom_filter. The number of hashes is
68 * given by the Bloom filter settings and is the same
69 * for all Bloom filters and keys interacting with
70 * the loaded version of the commit graph file and
71 * the Bloom data chunks.
72 */
73struct bloom_key {
74 uint32_t *hashes;
75};
76
a09858d4
TB
77int load_bloom_filter_from_graph(struct commit_graph *g,
78 struct bloom_filter *filter,
79 uint32_t graph_pos);
80
f52207a4
GS
81/*
82 * Calculate the murmur3 32-bit hash value for the given data
83 * using the given seed.
84 * Produces a uniformly distributed hash value.
85 * Not considered to be cryptographically secure.
86 * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
87 */
ba5a81d5 88uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len);
f52207a4 89
f1294eaf
GS
90void fill_bloom_key(const char *data,
91 size_t len,
92 struct bloom_key *key,
93 const struct bloom_filter_settings *settings);
f32dde8c 94void clear_bloom_key(struct bloom_key *key);
f1294eaf
GS
95
96void add_key_to_filter(const struct bloom_key *key,
eb591e42
DS
97 struct bloom_filter *filter,
98 const struct bloom_filter_settings *settings);
f1294eaf 99
ed591feb 100void init_bloom_filters(void);
9c8a9ec7 101void deinit_bloom_filters(void);
ed591feb 102
312cff52
TB
103enum bloom_filter_computed {
104 BLOOM_NOT_COMPUTED = (1 << 0),
105 BLOOM_COMPUTED = (1 << 1),
106 BLOOM_TRUNC_LARGE = (1 << 2),
59f0d507 107 BLOOM_TRUNC_EMPTY = (1 << 3),
5421e7c3 108 BLOOM_UPGRADED = (1 << 4),
312cff52
TB
109};
110
111struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
112 struct commit *c,
113 int compute_if_not_present,
9a7a9ed1 114 const struct bloom_filter_settings *settings,
312cff52
TB
115 enum bloom_filter_computed *computed);
116
b2cf3310
TB
117/*
118 * Find the Bloom filter associated with the given commit "c".
119 *
120 * If any of the following are true
121 *
122 * - the repository does not have a commit-graph, or
123 * - the repository disables reading from the commit-graph, or
124 * - the given commit does not have a Bloom filter computed, or
125 * - there is a Bloom filter for commit "c", but it cannot be read
126 * because the filter uses an incompatible version of murmur3
127 *
128 * , then `get_bloom_filter()` will return NULL. Otherwise, the corresponding
129 * Bloom filter will be returned.
130 *
131 * For callers who wish to inspect Bloom filters with incompatible hash
132 * versions, use get_or_compute_bloom_filter().
133 */
134struct bloom_filter *get_bloom_filter(struct repository *r, struct commit *c);
ed591feb 135
a56b9464
GS
136int bloom_filter_contains(const struct bloom_filter *filter,
137 const struct bloom_key *key,
138 const struct bloom_filter_settings *settings);
139
066b70ae 140#endif