static really_inline
m128 compress128_impl(m128 x, m128 m) {
-
m128 one = set1_2x64(1);
m128 bitset = one;
m128 vres = zeroes128();
m128 mask = not128(eq64_m128(tv, zeroes128()));
mask = vandq_s64(bitset, mask);
vres = or128(vres, mask);
- m = and128(m, sub_2x64(m, set1_2x64(1)));
+ m = and128(m, sub_2x64(m, one));
bitset = lshift64_m128(bitset, 1);
}
return vres;
return expand64_impl_c(x, m);
}
+static really_inline
+m128 expand128_impl(m128 x, m128 m) {
+ m128 one = set1_2x64(1);
+ m128 bitset = one;
+ m128 vres = zeroes128();
+ while (isnonzero128(m)) {
+ m128 tv = and128(x, m);
+
+ m128 mm = sub_2x64(zeroes128(), m);
+ m128 mask = not128(eq64_m128(tv, zeroes128()));
+ mask = vandq_s64(bitset, mask);
+ mask = and128(mask, mm);
+ vres = or128(vres, mask);
+ m = and128(m, sub_2x64(m, one));
+ bitset = lshift64_m128(bitset, 1);
+ }
+ return vres;
+}
+
/* returns the first set bit after begin (if not ~0U). If no bit is set after
* begin returns ~0U
*/
return x & m0; // clear out extraneous bits*/
}
+static really_inline
+m128 expand128_impl_c(m128 xvec, m128 mvec) {
+ u64a ALIGN_ATTR(16) x[2];
+ u64a ALIGN_ATTR(16) m[2];
+ store128(x, xvec);
+ store128(m, mvec);
+
+ expand64_impl_c(x[0], m[0]);
+ expand64_impl_c(x[1], m[1]);
+
+ return xvec;
+}
/* returns the first set bit after begin (if not ~0U). If no bit is set after
* begin returns ~0U
#endif
}
+static really_inline
+m128 expand128_impl(m128 x, m128 m) {
+ return expand128_impl_c(x, m);
+}
+
/* returns the first set bit after begin (if not ~0U). If no bit is set after
* begin returns ~0U
*/
return expand64_impl(x, m);
}
+static really_inline
+m128 expand128(m128 x, m128 m) {
+ return expand128_impl(x, m);
+}
/* returns the first set bit after begin (if not ~0U). If no bit is set after
* begin returns ~0U
u64a ALIGN_ATTR(16) m[2];
store128(m, mvec);
- u32 bits[2] = { popcount64(m[0]), popcount64(m[1]) };
- u64a ALIGN_ATTR(16) v[2];
+ // Count the number of bits of compressed state we're writing out per
+ // chunk.
+ u32 ALIGN_ATTR(16) bits[2] = { popcount64(m[0]), popcount64(m[1]) };
+ u64a ALIGN_ATTR(16) v[2];
unpack_bits_64(v, (const u8 *)ptr, bits, 2);
+ m128 xvec = load128(v);
- u64a x[2] = { expand64(v[0], m[0]), expand64(v[1], m[1]) };
-
- return set2x64(x[1], x[0]);
+ // Expand vector
+ return expand128(xvec, mvec);
}
#endif