DEBUG_PRINTF("d - d0: %ld \n", d - d0);
#if defined(HAVE_MASKED_LOADS)
uint8_t l = d - d0;
- typename SuperVector<S>::comparemask_type mask = ~SuperVector<S>::single_load_mask(l);
+ typename SuperVector<S>::comparemask_type mask = ~SuperVector<S>::load_mask(l);
SuperVector<S> chars = SuperVector<S>::loadu_maskz(d0, mask) & caseMask;
typename SuperVector<S>::comparemask_type z = mask1.eqmask(chars);
DEBUG_PRINTF("mask: %08llx\n", mask);
hwlm_error_t rv = single_zscan<S>(n, d0, buf, z, len, cbi);
#else
uint8_t l = d0 + S - d;
+ DEBUG_PRINTF("l: %d \n", l);
SuperVector<S> chars = SuperVector<S>::loadu_maskz(d, l) & caseMask;
+ chars.print8("chars");
typename SuperVector<S>::comparemask_type z = mask1.eqmask(chars);
+ DEBUG_PRINTF("z: %08llx\n", (u64a) z);
+ z = SuperVector<S>::iteration_mask(z);
+ DEBUG_PRINTF("z: %08llx\n", (u64a) z);
+
hwlm_error_t rv = single_zscan<S>(n, d, buf, z, len, cbi);
#endif
chars.print32("chars");
uint8_t l = buf_end - d;
SuperVector<S> chars = SuperVector<S>::loadu_maskz(d, l) & caseMask;
typename SuperVector<S>::comparemask_type z = mask1.eqmask(chars);
+ z = SuperVector<S>::iteration_mask(z);
+
hwlm_error_t rv = single_zscan<S>(n, d, buf, z, len, cbi);
RETURN_IF_TERMINATED(rv);
}
const u8 *d0 = ROUNDDOWN_PTR(d, S);
#if defined(HAVE_MASKED_LOADS)
uint8_t l = d - d0;
- typename SuperVector<S>::comparemask_type mask = ~SuperVector<S>::double_load_mask(l);
+ typename SuperVector<S>::comparemask_type mask = ~SuperVector<S>::load_mask(l);
SuperVector<S> chars = SuperVector<S>::loadu_maskz(d0, mask) & caseMask;
typename SuperVector<S>::comparemask_type z1 = mask1.eqmask(chars);
typename SuperVector<S>::comparemask_type z2 = mask2.eqmask(chars);
typename SuperVector<S>::comparemask_type z = (z1 << SuperVector<S>::mask_width()) & z2;
- DEBUG_PRINTF("z: %0llx\n", z);
+ z = SuperVector<S>::iteration_mask(z);
lastz1 = z1 >> (S - 1);
DEBUG_PRINTF("mask: %08llx\n", mask);
chars.print8("chars");
typename SuperVector<S>::comparemask_type z1 = mask1.eqmask(chars);
typename SuperVector<S>::comparemask_type z2 = mask2.eqmask(chars);
-
typename SuperVector<S>::comparemask_type z = (z1 << SuperVector<S>::mask_width()) & z2;
+ z = SuperVector<S>::iteration_mask(z);
+
hwlm_error_t rv = double_zscan<S>(n, d, buf, z, len, cbi);
lastz1 = z1 >> (l - 1);
#endif
template <>
really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, uint8_t const len)
{
- SuperVector mask = Ones_vshr(16 -len);
- SuperVector<16> v = loadu(ptr);
+ SuperVector mask = Ones_vshr(16 - len);
+ SuperVector v = loadu(ptr);
return mask & v;
}
+template <>
+really_inline SuperVector<16> SuperVector<16>::loadu_maskz(void const *ptr, typename base_type::comparemask_type const mask)
+{
+ DEBUG_PRINTF("mask = %08llx\n", mask);
+ SuperVector v = loadu(ptr);
+ (void)mask;
+ return v; // FIXME: & mask
+}
+
+template<>
+really_inline typename SuperVector<16>::comparemask_type SuperVector<16>::findLSB(typename SuperVector<16>::comparemask_type &z)
+{
+ return findAndClearLSB_64(&z) >> 2;
+}
+
template<>
really_inline SuperVector<16> SuperVector<16>::alignr(SuperVector<16> &other, int8_t offset)
{
static constexpr bool is_valid = true;
static constexpr u16 size = 16;
using type = m128;
+#if defined(ARCH_ARM32) || defined(ARCH_AARCH64)
+ using comparemask_type = u64a;
+#else
using comparemask_type = u32;
+#endif
static constexpr bool has_previous = false;
using previous_type = u64a;
static constexpr u16 previous_size = 8;
static typename base_type::comparemask_type
iteration_mask(typename base_type::comparemask_type mask);
- static typename base_type::comparemask_type single_load_mask(uint8_t const len) { return (((1ULL) << (len)) - 1ULL); }
- static typename base_type::comparemask_type double_load_mask(uint8_t const len) { return (((1ULL) << (len)) - 1ULL); }
+ static typename base_type::comparemask_type load_mask(uint8_t const len) { return (((1ULL) << (len)) - 1ULL); }
static typename base_type::comparemask_type findLSB(typename base_type::comparemask_type &z);
static SuperVector loadu(void const *ptr);
static SuperVector load(void const *ptr);