]>
Commit | Line | Data |
---|---|---|
e1273106 VM |
1 | /** |
2 | * Copyright 2013, GitHub, Inc | |
3 | * Copyright 2009-2013, Daniel Lemire, Cliff Moon, | |
4 | * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License | |
8 | * as published by the Free Software Foundation; either version 2 | |
9 | * of the License, or (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
48425792 | 17 | * along with this program; if not, see <http://www.gnu.org/licenses/>. |
e1273106 VM |
18 | */ |
19 | #ifndef __EWOK_BITMAP_H__ | |
20 | #define __EWOK_BITMAP_H__ | |
21 | ||
be0d9d53 | 22 | struct strbuf; |
e1273106 | 23 | typedef uint64_t eword_t; |
34b935c0 | 24 | #define BITS_IN_EWORD (sizeof(eword_t) * 8) |
e1273106 VM |
25 | |
26 | /** | |
27 | * Do not use __builtin_popcountll. The GCC implementation | |
28 | * is notoriously slow on all platforms. | |
29 | * | |
30 | * See: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=36041 | |
31 | */ | |
32 | static inline uint32_t ewah_bit_popcount64(uint64_t x) | |
33 | { | |
34 | x = (x & 0x5555555555555555ULL) + ((x >> 1) & 0x5555555555555555ULL); | |
35 | x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL); | |
36 | x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >> 4) & 0x0F0F0F0F0F0F0F0FULL); | |
37 | return (x * 0x0101010101010101ULL) >> 56; | |
38 | } | |
39 | ||
bd4e8822 TC |
40 | /* __builtin_ctzll was not available until 3.4.0 */ |
41 | #if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR > 3)) | |
e1273106 VM |
42 | #define ewah_bit_ctz64(x) __builtin_ctzll(x) |
43 | #else | |
44 | static inline int ewah_bit_ctz64(uint64_t x) | |
45 | { | |
46 | int n = 0; | |
47 | if ((x & 0xffffffff) == 0) { x >>= 32; n += 32; } | |
48 | if ((x & 0xffff) == 0) { x >>= 16; n += 16; } | |
49 | if ((x & 0xff) == 0) { x >>= 8; n += 8; } | |
50 | if ((x & 0xf) == 0) { x >>= 4; n += 4; } | |
51 | if ((x & 0x3) == 0) { x >>= 2; n += 2; } | |
52 | if ((x & 0x1) == 0) { x >>= 1; n += 1; } | |
53 | return n + !x; | |
54 | } | |
55 | #endif | |
56 | ||
57 | struct ewah_bitmap { | |
58 | eword_t *buffer; | |
59 | size_t buffer_size; | |
60 | size_t alloc_size; | |
61 | size_t bit_size; | |
62 | eword_t *rlw; | |
63 | }; | |
64 | ||
65 | typedef void (*ewah_callback)(size_t pos, void *); | |
66 | ||
67 | struct ewah_bitmap *ewah_pool_new(void); | |
68 | void ewah_pool_free(struct ewah_bitmap *self); | |
69 | ||
70 | /** | |
71 | * Allocate a new EWAH Compressed bitmap | |
72 | */ | |
73 | struct ewah_bitmap *ewah_new(void); | |
74 | ||
e1273106 VM |
75 | /** |
76 | * Free all the memory of the bitmap | |
77 | */ | |
78 | void ewah_free(struct ewah_bitmap *self); | |
79 | ||
80 | int ewah_serialize_to(struct ewah_bitmap *self, | |
81 | int (*write_fun)(void *out, const void *buf, size_t len), | |
82 | void *out); | |
be0d9d53 | 83 | int ewah_serialize_strbuf(struct ewah_bitmap *self, struct strbuf *); |
e1273106 | 84 | |
9d2e330b | 85 | ssize_t ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len); |
e1273106 VM |
86 | |
87 | uint32_t ewah_checksum(struct ewah_bitmap *self); | |
88 | ||
e1273106 VM |
89 | /** |
90 | * Call the given callback with the position of every single bit | |
91 | * that has been set on the bitmap. | |
92 | * | |
93 | * This is an efficient operation that does not fully decompress | |
94 | * the bitmap. | |
95 | */ | |
96 | void ewah_each_bit(struct ewah_bitmap *self, ewah_callback callback, void *payload); | |
97 | ||
98 | /** | |
99 | * Set a given bit on the bitmap. | |
100 | * | |
101 | * The bit at position `pos` will be set to true. Because of the | |
102 | * way that the bitmap is compressed, a set bit cannot be unset | |
103 | * later on. | |
104 | * | |
105 | * Furthermore, since the bitmap uses streaming compression, bits | |
106 | * can only set incrementally. | |
107 | * | |
108 | * E.g. | |
109 | * ewah_set(bitmap, 1); // ok | |
110 | * ewah_set(bitmap, 76); // ok | |
111 | * ewah_set(bitmap, 77); // ok | |
112 | * ewah_set(bitmap, 8712800127); // ok | |
113 | * ewah_set(bitmap, 25); // failed, assert raised | |
114 | */ | |
115 | void ewah_set(struct ewah_bitmap *self, size_t i); | |
116 | ||
117 | struct ewah_iterator { | |
118 | const eword_t *buffer; | |
119 | size_t buffer_size; | |
120 | ||
121 | size_t pointer; | |
122 | eword_t compressed, literals; | |
123 | eword_t rl, lw; | |
124 | int b; | |
125 | }; | |
126 | ||
127 | /** | |
128 | * Initialize a new iterator to run through the bitmap in uncompressed form. | |
129 | * | |
130 | * The iterator can be stack allocated. The underlying bitmap must not be freed | |
131 | * before the iteration is over. | |
132 | * | |
133 | * E.g. | |
134 | * | |
135 | * struct ewah_bitmap *bitmap = ewah_new(); | |
136 | * struct ewah_iterator it; | |
137 | * | |
138 | * ewah_iterator_init(&it, bitmap); | |
139 | */ | |
140 | void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent); | |
141 | ||
142 | /** | |
143 | * Yield every single word in the bitmap in uncompressed form. This is: | |
144 | * yield single words (32-64 bits) where each bit represents an actual | |
145 | * bit from the bitmap. | |
146 | * | |
147 | * Return: true if a word was yield, false if there are no words left | |
148 | */ | |
149 | int ewah_iterator_next(eword_t *next, struct ewah_iterator *it); | |
150 | ||
e1273106 VM |
151 | void ewah_xor( |
152 | struct ewah_bitmap *ewah_i, | |
153 | struct ewah_bitmap *ewah_j, | |
154 | struct ewah_bitmap *out); | |
155 | ||
e1273106 VM |
156 | /** |
157 | * Direct word access | |
158 | */ | |
159 | size_t ewah_add_empty_words(struct ewah_bitmap *self, int v, size_t number); | |
160 | void ewah_add_dirty_words( | |
161 | struct ewah_bitmap *self, const eword_t *buffer, size_t number, int negate); | |
162 | size_t ewah_add(struct ewah_bitmap *self, eword_t word); | |
163 | ||
164 | ||
165 | /** | |
166 | * Uncompressed, old-school bitmap that can be efficiently compressed | |
167 | * into an `ewah_bitmap`. | |
168 | */ | |
169 | struct bitmap { | |
170 | eword_t *words; | |
171 | size_t word_alloc; | |
172 | }; | |
173 | ||
174 | struct bitmap *bitmap_new(void); | |
14fbd260 | 175 | struct bitmap *bitmap_word_alloc(size_t word_alloc); |
e1273106 | 176 | void bitmap_set(struct bitmap *self, size_t pos); |
cc4aa285 | 177 | void bitmap_unset(struct bitmap *self, size_t pos); |
e1273106 VM |
178 | int bitmap_get(struct bitmap *self, size_t pos); |
179 | void bitmap_reset(struct bitmap *self); | |
180 | void bitmap_free(struct bitmap *self); | |
181 | int bitmap_equals(struct bitmap *self, struct bitmap *other); | |
182 | int bitmap_is_subset(struct bitmap *self, struct bitmap *super); | |
183 | ||
184 | struct ewah_bitmap * bitmap_to_ewah(struct bitmap *bitmap); | |
185 | struct bitmap *ewah_to_bitmap(struct ewah_bitmap *ewah); | |
186 | ||
187 | void bitmap_and_not(struct bitmap *self, struct bitmap *other); | |
188 | void bitmap_or_ewah(struct bitmap *self, struct ewah_bitmap *other); | |
189 | void bitmap_or(struct bitmap *self, const struct bitmap *other); | |
190 | ||
e1273106 VM |
191 | size_t bitmap_popcount(struct bitmap *self); |
192 | ||
193 | #endif |