]>
Commit | Line | Data |
---|---|---|
e1273106 VM |
1 | /** |
2 | * Copyright 2013, GitHub, Inc | |
3 | * Copyright 2009-2013, Daniel Lemire, Cliff Moon, | |
4 | * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version 2 | |
9 | * of the License, or (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
48425792 | 17 | * along with this program; if not, see <http://www.gnu.org/licenses/>. |
e1273106 VM |
18 | */ |
19 | #ifndef __EWOK_BITMAP_H__ | |
20 | #define __EWOK_BITMAP_H__ | |
21 | ||
be0d9d53 | 22 | struct strbuf; |
e1273106 | 23 | typedef uint64_t eword_t; |
34b935c0 | 24 | #define BITS_IN_EWORD (sizeof(eword_t) * 8) |
e1273106 VM |
25 | |
26 | /** | |
27 | * Do not use __builtin_popcountll. The GCC implementation | |
28 | * is notoriously slow on all platforms. | |
29 | * | |
30 | * See: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36041 | |
31 | */ | |
32 | static inline uint32_t ewah_bit_popcount64(uint64_t x) | |
33 | { | |
34 | x = (x & 0x5555555555555555ULL) + ((x >> 1) & 0x5555555555555555ULL); | |
35 | x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); | |
36 | x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >> 4) & 0x0F0F0F0F0F0F0F0FULL); | |
37 | return (x * 0x0101010101010101ULL) >> 56; | |
38 | } | |
39 | ||
bd4e8822 TC |
40 | /* __builtin_ctzll was not available until 3.4.0 */ |
41 | #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR > 3)) | |
e1273106 VM |
42 | #define ewah_bit_ctz64(x) __builtin_ctzll(x) |
43 | #else | |
44 | static inline int ewah_bit_ctz64(uint64_t x) | |
45 | { | |
46 | int n = 0; | |
47 | if ((x & 0xffffffff) == 0) { x >>= 32; n += 32; } | |
48 | if ((x & 0xffff) == 0) { x >>= 16; n += 16; } | |
49 | if ((x & 0xff) == 0) { x >>= 8; n += 8; } | |
50 | if ((x & 0xf) == 0) { x >>= 4; n += 4; } | |
51 | if ((x & 0x3) == 0) { x >>= 2; n += 2; } | |
52 | if ((x & 0x1) == 0) { x >>= 1; n += 1; } | |
53 | return n + !x; | |
54 | } | |
55 | #endif | |
56 | ||
57 | struct ewah_bitmap { | |
58 | eword_t *buffer; | |
59 | size_t buffer_size; | |
60 | size_t alloc_size; | |
61 | size_t bit_size; | |
62 | eword_t *rlw; | |
63 | }; | |
64 | ||
65 | typedef void (*ewah_callback)(size_t pos, void *); | |
66 | ||
67 | struct ewah_bitmap *ewah_pool_new(void); | |
68 | void ewah_pool_free(struct ewah_bitmap *self); | |
69 | ||
70 | /** | |
71 | * Allocate a new EWAH Compressed bitmap | |
72 | */ | |
73 | struct ewah_bitmap *ewah_new(void); | |
74 | ||
75 | /** | |
76 | * Clear all the bits in the bitmap. Does not free or resize | |
77 | * memory. | |
78 | */ | |
79 | void ewah_clear(struct ewah_bitmap *self); | |
80 | ||
81 | /** | |
82 | * Free all the memory of the bitmap | |
83 | */ | |
84 | void ewah_free(struct ewah_bitmap *self); | |
85 | ||
86 | int ewah_serialize_to(struct ewah_bitmap *self, | |
87 | int (*write_fun)(void *out, const void *buf, size_t len), | |
88 | void *out); | |
89 | int ewah_serialize(struct ewah_bitmap *self, int fd); | |
90 | int ewah_serialize_native(struct ewah_bitmap *self, int fd); | |
be0d9d53 | 91 | int ewah_serialize_strbuf(struct ewah_bitmap *self, struct strbuf *); |
e1273106 VM |
92 | |
93 | int ewah_deserialize(struct ewah_bitmap *self, int fd); | |
9d2e330b | 94 | ssize_t ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len); |
e1273106 VM |
95 | |
96 | uint32_t ewah_checksum(struct ewah_bitmap *self); | |
97 | ||
98 | /** | |
99 | * Logical not (bitwise negation) in-place on the bitmap | |
100 | * | |
101 | * This operation is linear time based on the size of the bitmap. | |
102 | */ | |
103 | void ewah_not(struct ewah_bitmap *self); | |
104 | ||
105 | /** | |
106 | * Call the given callback with the position of every single bit | |
107 | * that has been set on the bitmap. | |
108 | * | |
109 | * This is an efficient operation that does not fully decompress | |
110 | * the bitmap. | |
111 | */ | |
112 | void ewah_each_bit(struct ewah_bitmap *self, ewah_callback callback, void *payload); | |
113 | ||
114 | /** | |
115 | * Set a given bit on the bitmap. | |
116 | * | |
117 | * The bit at position `pos` will be set to true. Because of the | |
118 | * way that the bitmap is compressed, a set bit cannot be unset | |
119 | * later on. | |
120 | * | |
121 | * Furthermore, since the bitmap uses streaming compression, bits | |
122 | * can only set incrementally. | |
123 | * | |
124 | * E.g. | |
125 | * ewah_set(bitmap, 1); // ok | |
126 | * ewah_set(bitmap, 76); // ok | |
127 | * ewah_set(bitmap, 77); // ok | |
128 | * ewah_set(bitmap, 8712800127); // ok | |
129 | * ewah_set(bitmap, 25); // failed, assert raised | |
130 | */ | |
131 | void ewah_set(struct ewah_bitmap *self, size_t i); | |
132 | ||
133 | struct ewah_iterator { | |
134 | const eword_t *buffer; | |
135 | size_t buffer_size; | |
136 | ||
137 | size_t pointer; | |
138 | eword_t compressed, literals; | |
139 | eword_t rl, lw; | |
140 | int b; | |
141 | }; | |
142 | ||
143 | /** | |
144 | * Initialize a new iterator to run through the bitmap in uncompressed form. | |
145 | * | |
146 | * The iterator can be stack allocated. The underlying bitmap must not be freed | |
147 | * before the iteration is over. | |
148 | * | |
149 | * E.g. | |
150 | * | |
151 | * struct ewah_bitmap *bitmap = ewah_new(); | |
152 | * struct ewah_iterator it; | |
153 | * | |
154 | * ewah_iterator_init(&it, bitmap); | |
155 | */ | |
156 | void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent); | |
157 | ||
158 | /** | |
159 | * Yield every single word in the bitmap in uncompressed form. This is: | |
160 | * yield single words (32-64 bits) where each bit represents an actual | |
161 | * bit from the bitmap. | |
162 | * | |
163 | * Return: true if a word was yield, false if there are no words left | |
164 | */ | |
165 | int ewah_iterator_next(eword_t *next, struct ewah_iterator *it); | |
166 | ||
167 | void ewah_or( | |
168 | struct ewah_bitmap *ewah_i, | |
169 | struct ewah_bitmap *ewah_j, | |
170 | struct ewah_bitmap *out); | |
171 | ||
172 | void ewah_and_not( | |
173 | struct ewah_bitmap *ewah_i, | |
174 | struct ewah_bitmap *ewah_j, | |
175 | struct ewah_bitmap *out); | |
176 | ||
177 | void ewah_xor( | |
178 | struct ewah_bitmap *ewah_i, | |
179 | struct ewah_bitmap *ewah_j, | |
180 | struct ewah_bitmap *out); | |
181 | ||
182 | void ewah_and( | |
183 | struct ewah_bitmap *ewah_i, | |
184 | struct ewah_bitmap *ewah_j, | |
185 | struct ewah_bitmap *out); | |
186 | ||
187 | /** | |
188 | * Direct word access | |
189 | */ | |
190 | size_t ewah_add_empty_words(struct ewah_bitmap *self, int v, size_t number); | |
191 | void ewah_add_dirty_words( | |
192 | struct ewah_bitmap *self, const eword_t *buffer, size_t number, int negate); | |
193 | size_t ewah_add(struct ewah_bitmap *self, eword_t word); | |
194 | ||
195 | ||
196 | /** | |
197 | * Uncompressed, old-school bitmap that can be efficiently compressed | |
198 | * into an `ewah_bitmap`. | |
199 | */ | |
200 | struct bitmap { | |
201 | eword_t *words; | |
202 | size_t word_alloc; | |
203 | }; | |
204 | ||
205 | struct bitmap *bitmap_new(void); | |
206 | void bitmap_set(struct bitmap *self, size_t pos); | |
e1273106 VM |
207 | int bitmap_get(struct bitmap *self, size_t pos); |
208 | void bitmap_reset(struct bitmap *self); | |
209 | void bitmap_free(struct bitmap *self); | |
210 | int bitmap_equals(struct bitmap *self, struct bitmap *other); | |
211 | int bitmap_is_subset(struct bitmap *self, struct bitmap *super); | |
212 | ||
213 | struct ewah_bitmap * bitmap_to_ewah(struct bitmap *bitmap); | |
214 | struct bitmap *ewah_to_bitmap(struct ewah_bitmap *ewah); | |
215 | ||
216 | void bitmap_and_not(struct bitmap *self, struct bitmap *other); | |
217 | void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other); | |
218 | void bitmap_or(struct bitmap *self, const struct bitmap *other); | |
219 | ||
e1273106 VM |
220 | size_t bitmap_popcount(struct bitmap *self); |
221 | ||
222 | #endif |