]>
Commit | Line | Data |
---|---|---|
e1273106 VM |
1 | /** |
2 | * Copyright 2013, GitHub, Inc | |
3 | * Copyright 2009-2013, Daniel Lemire, Cliff Moon, | |
4 | * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version 2 | |
9 | * of the License, or (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. | |
19 | */ | |
20 | #ifndef __EWOK_BITMAP_H__ | |
21 | #define __EWOK_BITMAP_H__ | |
22 | ||
be0d9d53 | 23 | struct strbuf; |
e1273106 | 24 | typedef uint64_t eword_t; |
34b935c0 | 25 | #define BITS_IN_EWORD (sizeof(eword_t) * 8) |
e1273106 VM |
26 | |
27 | /** | |
28 | * Do not use __builtin_popcountll. The GCC implementation | |
29 | * is notoriously slow on all platforms. | |
30 | * | |
31 | * See: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36041 | |
32 | */ | |
33 | static inline uint32_t ewah_bit_popcount64(uint64_t x) | |
34 | { | |
35 | x = (x & 0x5555555555555555ULL) + ((x >> 1) & 0x5555555555555555ULL); | |
36 | x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); | |
37 | x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >> 4) & 0x0F0F0F0F0F0F0F0FULL); | |
38 | return (x * 0x0101010101010101ULL) >> 56; | |
39 | } | |
40 | ||
bd4e8822 TC |
41 | /* __builtin_ctzll was not available until 3.4.0 */ |
42 | #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR > 3)) | |
e1273106 VM |
43 | #define ewah_bit_ctz64(x) __builtin_ctzll(x) |
44 | #else | |
45 | static inline int ewah_bit_ctz64(uint64_t x) | |
46 | { | |
47 | int n = 0; | |
48 | if ((x & 0xffffffff) == 0) { x >>= 32; n += 32; } | |
49 | if ((x & 0xffff) == 0) { x >>= 16; n += 16; } | |
50 | if ((x & 0xff) == 0) { x >>= 8; n += 8; } | |
51 | if ((x & 0xf) == 0) { x >>= 4; n += 4; } | |
52 | if ((x & 0x3) == 0) { x >>= 2; n += 2; } | |
53 | if ((x & 0x1) == 0) { x >>= 1; n += 1; } | |
54 | return n + !x; | |
55 | } | |
56 | #endif | |
57 | ||
58 | struct ewah_bitmap { | |
59 | eword_t *buffer; | |
60 | size_t buffer_size; | |
61 | size_t alloc_size; | |
62 | size_t bit_size; | |
63 | eword_t *rlw; | |
64 | }; | |
65 | ||
66 | typedef void (*ewah_callback)(size_t pos, void *); | |
67 | ||
68 | struct ewah_bitmap *ewah_pool_new(void); | |
69 | void ewah_pool_free(struct ewah_bitmap *self); | |
70 | ||
71 | /** | |
72 | * Allocate a new EWAH Compressed bitmap | |
73 | */ | |
74 | struct ewah_bitmap *ewah_new(void); | |
75 | ||
76 | /** | |
77 | * Clear all the bits in the bitmap. Does not free or resize | |
78 | * memory. | |
79 | */ | |
80 | void ewah_clear(struct ewah_bitmap *self); | |
81 | ||
82 | /** | |
83 | * Free all the memory of the bitmap | |
84 | */ | |
85 | void ewah_free(struct ewah_bitmap *self); | |
86 | ||
87 | int ewah_serialize_to(struct ewah_bitmap *self, | |
88 | int (*write_fun)(void *out, const void *buf, size_t len), | |
89 | void *out); | |
90 | int ewah_serialize(struct ewah_bitmap *self, int fd); | |
91 | int ewah_serialize_native(struct ewah_bitmap *self, int fd); | |
be0d9d53 | 92 | int ewah_serialize_strbuf(struct ewah_bitmap *self, struct strbuf *); |
e1273106 VM |
93 | |
94 | int ewah_deserialize(struct ewah_bitmap *self, int fd); | |
a0a2f7d7 | 95 | int ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len); |
e1273106 VM |
96 | |
97 | uint32_t ewah_checksum(struct ewah_bitmap *self); | |
98 | ||
99 | /** | |
100 | * Logical not (bitwise negation) in-place on the bitmap | |
101 | * | |
102 | * This operation is linear time based on the size of the bitmap. | |
103 | */ | |
104 | void ewah_not(struct ewah_bitmap *self); | |
105 | ||
106 | /** | |
107 | * Call the given callback with the position of every single bit | |
108 | * that has been set on the bitmap. | |
109 | * | |
110 | * This is an efficient operation that does not fully decompress | |
111 | * the bitmap. | |
112 | */ | |
113 | void ewah_each_bit(struct ewah_bitmap *self, ewah_callback callback, void *payload); | |
114 | ||
115 | /** | |
116 | * Set a given bit on the bitmap. | |
117 | * | |
118 | * The bit at position `pos` will be set to true. Because of the | |
119 | * way that the bitmap is compressed, a set bit cannot be unset | |
120 | * later on. | |
121 | * | |
122 | * Furthermore, since the bitmap uses streaming compression, bits | |
123 | * can only set incrementally. | |
124 | * | |
125 | * E.g. | |
126 | * ewah_set(bitmap, 1); // ok | |
127 | * ewah_set(bitmap, 76); // ok | |
128 | * ewah_set(bitmap, 77); // ok | |
129 | * ewah_set(bitmap, 8712800127); // ok | |
130 | * ewah_set(bitmap, 25); // failed, assert raised | |
131 | */ | |
132 | void ewah_set(struct ewah_bitmap *self, size_t i); | |
133 | ||
134 | struct ewah_iterator { | |
135 | const eword_t *buffer; | |
136 | size_t buffer_size; | |
137 | ||
138 | size_t pointer; | |
139 | eword_t compressed, literals; | |
140 | eword_t rl, lw; | |
141 | int b; | |
142 | }; | |
143 | ||
144 | /** | |
145 | * Initialize a new iterator to run through the bitmap in uncompressed form. | |
146 | * | |
147 | * The iterator can be stack allocated. The underlying bitmap must not be freed | |
148 | * before the iteration is over. | |
149 | * | |
150 | * E.g. | |
151 | * | |
152 | * struct ewah_bitmap *bitmap = ewah_new(); | |
153 | * struct ewah_iterator it; | |
154 | * | |
155 | * ewah_iterator_init(&it, bitmap); | |
156 | */ | |
157 | void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent); | |
158 | ||
159 | /** | |
160 | * Yield every single word in the bitmap in uncompressed form. This is: | |
161 | * yield single words (32-64 bits) where each bit represents an actual | |
162 | * bit from the bitmap. | |
163 | * | |
164 | * Return: true if a word was yield, false if there are no words left | |
165 | */ | |
166 | int ewah_iterator_next(eword_t *next, struct ewah_iterator *it); | |
167 | ||
168 | void ewah_or( | |
169 | struct ewah_bitmap *ewah_i, | |
170 | struct ewah_bitmap *ewah_j, | |
171 | struct ewah_bitmap *out); | |
172 | ||
173 | void ewah_and_not( | |
174 | struct ewah_bitmap *ewah_i, | |
175 | struct ewah_bitmap *ewah_j, | |
176 | struct ewah_bitmap *out); | |
177 | ||
178 | void ewah_xor( | |
179 | struct ewah_bitmap *ewah_i, | |
180 | struct ewah_bitmap *ewah_j, | |
181 | struct ewah_bitmap *out); | |
182 | ||
183 | void ewah_and( | |
184 | struct ewah_bitmap *ewah_i, | |
185 | struct ewah_bitmap *ewah_j, | |
186 | struct ewah_bitmap *out); | |
187 | ||
188 | /** | |
189 | * Direct word access | |
190 | */ | |
191 | size_t ewah_add_empty_words(struct ewah_bitmap *self, int v, size_t number); | |
192 | void ewah_add_dirty_words( | |
193 | struct ewah_bitmap *self, const eword_t *buffer, size_t number, int negate); | |
194 | size_t ewah_add(struct ewah_bitmap *self, eword_t word); | |
195 | ||
196 | ||
197 | /** | |
198 | * Uncompressed, old-school bitmap that can be efficiently compressed | |
199 | * into an `ewah_bitmap`. | |
200 | */ | |
201 | struct bitmap { | |
202 | eword_t *words; | |
203 | size_t word_alloc; | |
204 | }; | |
205 | ||
206 | struct bitmap *bitmap_new(void); | |
207 | void bitmap_set(struct bitmap *self, size_t pos); | |
208 | void bitmap_clear(struct bitmap *self, size_t pos); | |
209 | int bitmap_get(struct bitmap *self, size_t pos); | |
210 | void bitmap_reset(struct bitmap *self); | |
211 | void bitmap_free(struct bitmap *self); | |
212 | int bitmap_equals(struct bitmap *self, struct bitmap *other); | |
213 | int bitmap_is_subset(struct bitmap *self, struct bitmap *super); | |
214 | ||
215 | struct ewah_bitmap * bitmap_to_ewah(struct bitmap *bitmap); | |
216 | struct bitmap *ewah_to_bitmap(struct ewah_bitmap *ewah); | |
217 | ||
218 | void bitmap_and_not(struct bitmap *self, struct bitmap *other); | |
219 | void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other); | |
220 | void bitmap_or(struct bitmap *self, const struct bitmap *other); | |
221 | ||
222 | void bitmap_each_bit(struct bitmap *self, ewah_callback callback, void *data); | |
223 | size_t bitmap_popcount(struct bitmap *self); | |
224 | ||
225 | #endif |