]>
Commit | Line | Data |
---|---|---|
f52207a4 GS |
1 | #include "git-compat-util.h" |
2 | #include "bloom.h" | |
3 | ||
4 | static uint32_t rotate_left(uint32_t value, int32_t count) | |
5 | { | |
6 | uint32_t mask = 8 * sizeof(uint32_t) - 1; | |
7 | count &= mask; | |
8 | return ((value << count) | (value >> ((-count) & mask))); | |
9 | } | |
10 | ||
f1294eaf GS |
11 | static inline unsigned char get_bitmask(uint32_t pos) |
12 | { | |
13 | return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1)); | |
14 | } | |
15 | ||
f52207a4 GS |
16 | /* |
17 | * Calculate the murmur3 32-bit hash value for the given data | |
18 | * using the given seed. | |
19 | * Produces a uniformly distributed hash value. | |
20 | * Not considered to be cryptographically secure. | |
21 | * Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm | |
22 | */ | |
23 | uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len) | |
24 | { | |
25 | const uint32_t c1 = 0xcc9e2d51; | |
26 | const uint32_t c2 = 0x1b873593; | |
27 | const uint32_t r1 = 15; | |
28 | const uint32_t r2 = 13; | |
29 | const uint32_t m = 5; | |
30 | const uint32_t n = 0xe6546b64; | |
31 | int i; | |
32 | uint32_t k1 = 0; | |
33 | const char *tail; | |
34 | ||
35 | int len4 = len / sizeof(uint32_t); | |
36 | ||
37 | uint32_t k; | |
38 | for (i = 0; i < len4; i++) { | |
39 | uint32_t byte1 = (uint32_t)data[4*i]; | |
40 | uint32_t byte2 = ((uint32_t)data[4*i + 1]) << 8; | |
41 | uint32_t byte3 = ((uint32_t)data[4*i + 2]) << 16; | |
42 | uint32_t byte4 = ((uint32_t)data[4*i + 3]) << 24; | |
43 | k = byte1 | byte2 | byte3 | byte4; | |
44 | k *= c1; | |
45 | k = rotate_left(k, r1); | |
46 | k *= c2; | |
47 | ||
48 | seed ^= k; | |
49 | seed = rotate_left(seed, r2) * m + n; | |
50 | } | |
51 | ||
52 | tail = (data + len4 * sizeof(uint32_t)); | |
53 | ||
54 | switch (len & (sizeof(uint32_t) - 1)) { | |
55 | case 3: | |
56 | k1 ^= ((uint32_t)tail[2]) << 16; | |
57 | /*-fallthrough*/ | |
58 | case 2: | |
59 | k1 ^= ((uint32_t)tail[1]) << 8; | |
60 | /*-fallthrough*/ | |
61 | case 1: | |
62 | k1 ^= ((uint32_t)tail[0]) << 0; | |
63 | k1 *= c1; | |
64 | k1 = rotate_left(k1, r1); | |
65 | k1 *= c2; | |
66 | seed ^= k1; | |
67 | break; | |
68 | } | |
69 | ||
70 | seed ^= (uint32_t)len; | |
71 | seed ^= (seed >> 16); | |
72 | seed *= 0x85ebca6b; | |
73 | seed ^= (seed >> 13); | |
74 | seed *= 0xc2b2ae35; | |
75 | seed ^= (seed >> 16); | |
76 | ||
77 | return seed; | |
f1294eaf GS |
78 | } |
79 | ||
80 | void fill_bloom_key(const char *data, | |
81 | size_t len, | |
82 | struct bloom_key *key, | |
83 | const struct bloom_filter_settings *settings) | |
84 | { | |
85 | int i; | |
86 | const uint32_t seed0 = 0x293ae76f; | |
87 | const uint32_t seed1 = 0x7e646e2c; | |
88 | const uint32_t hash0 = murmur3_seeded(seed0, data, len); | |
89 | const uint32_t hash1 = murmur3_seeded(seed1, data, len); | |
90 | ||
91 | key->hashes = (uint32_t *)xcalloc(settings->num_hashes, sizeof(uint32_t)); | |
92 | for (i = 0; i < settings->num_hashes; i++) | |
93 | key->hashes[i] = hash0 + i * hash1; | |
94 | } | |
95 | ||
96 | void add_key_to_filter(const struct bloom_key *key, | |
97 | struct bloom_filter *filter, | |
98 | const struct bloom_filter_settings *settings) | |
99 | { | |
100 | int i; | |
101 | uint64_t mod = filter->len * BITS_PER_WORD; | |
102 | ||
103 | for (i = 0; i < settings->num_hashes; i++) { | |
104 | uint64_t hash_mod = key->hashes[i] % mod; | |
105 | uint64_t block_pos = hash_mod / BITS_PER_WORD; | |
106 | ||
107 | filter->data[block_pos] |= get_bitmask(hash_mod); | |
108 | } | |
109 | } |