]> git.ipfire.org Git - thirdparty/git.git/blame - bloom.h
bloom.c: introduce core Bloom filter constructs
[thirdparty/git.git] / bloom.h
CommitLineData
f52207a4
GS
1#ifndef BLOOM_H
2#define BLOOM_H
3
f1294eaf
GS
4struct bloom_filter_settings {
5 /*
6 * The version of the hashing technique being used.
7 * We currently only support version = 1 which is
8 * the seeded murmur3 hashing technique implemented
9 * in bloom.c.
10 */
11 uint32_t hash_version;
12
13 /*
14 * The number of times a path is hashed, i.e. the
15 * number of bit positions tht cumulatively
16 * determine whether a path is present in the
17 * Bloom filter.
18 */
19 uint32_t num_hashes;
20
21 /*
22 * The minimum number of bits per entry in the Bloom
23 * filter. If the filter contains 'n' entries, then
24 * filter size is the minimum number of 8-bit words
25 * that contain n*b bits.
26 */
27 uint32_t bits_per_entry;
28};
29
30#define DEFAULT_BLOOM_FILTER_SETTINGS { 1, 7, 10 }
31#define BITS_PER_WORD 8
32
33/*
34 * A bloom_filter struct represents a data segment to
35 * use when testing hash values. The 'len' member
36 * dictates how many entries are stored in
37 * 'data'.
38 */
39struct bloom_filter {
40 unsigned char *data;
41 size_t len;
42};
43
44/*
45 * A bloom_key represents the k hash values for a
46 * given string. These can be precomputed and
47 * stored in a bloom_key for re-use when testing
48 * against a bloom_filter. The number of hashes is
49 * given by the Bloom filter settings and is the same
50 * for all Bloom filters and keys interacting with
51 * the loaded version of the commit graph file and
52 * the Bloom data chunks.
53 */
54struct bloom_key {
55 uint32_t *hashes;
56};
57
f52207a4
GS
58/*
59 * Calculate the murmur3 32-bit hash value for the given data
60 * using the given seed.
61 * Produces a uniformly distributed hash value.
62 * Not considered to be cryptographically secure.
63 * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
64 */
65uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len);
66
f1294eaf
GS
67void fill_bloom_key(const char *data,
68 size_t len,
69 struct bloom_key *key,
70 const struct bloom_filter_settings *settings);
71
72void add_key_to_filter(const struct bloom_key *key,
73 struct bloom_filter *filter,
74 const struct bloom_filter_settings *settings);
75
f52207a4 76#endif