return HWLM_SUCCESS;
}
-// The short scan routine. It is used both to scan data up to an
-// alignment boundary if needed and to finish off data that the aligned scan
-// function can't handle (due to small/unaligned chunk at end)
+
template<uint16_t S>
static really_inline
-hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
+hwlm_error_t scanSingleShort(const struct noodTable *n, const u8 *buf,
SuperVector<S> caseMask, SuperVector<S> mask1,
const struct cb_info *cbi, size_t len, size_t start,
size_t end) {
return HWLM_SUCCESS;
}
+ SuperVector<S> v = SuperVector<S>::Zeroes();
+ memcpy(&v.u, d, l);
+
typename SuperVector<S>::movemask_type mask = SINGLE_LOAD_MASK(l);
+ v = v & caseMask;
+ typename SuperVector<S>::movemask_type z = mask & mask1.eqmask(v);
+
+ return single_zscan(n, d, buf, z, len, cbi);
+}
+
+// The short scan routine. It is used both to scan data up to an
+// alignment boundary if needed and to finish off data that the aligned scan
+// function can't handle (due to small/unaligned chunk at end)
+template<uint16_t S>
+static really_inline
+hwlm_error_t scanSingleUnaligned(const struct noodTable *n, const u8 *buf,
+ SuperVector<S> caseMask, SuperVector<S> mask1,
+ const struct cb_info *cbi, size_t len, size_t offset,
+ size_t start,
+ size_t end) {
+ const u8 *d = buf + offset;
+ DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
+ const size_t l = end - start;
+ DEBUG_PRINTF("l = %ld\n", l);
+ assert(l <= 64);
+ if (!l) {
+ return HWLM_SUCCESS;
+ }
+ size_t buf_off = start - offset;
+ typename SuperVector<S>::movemask_type mask = SINGLE_LOAD_MASK(l) << buf_off;
SuperVector<S> v = SuperVector<S>::loadu(d) & caseMask;
typename SuperVector<S>::movemask_type z = mask & mask1.eqmask(v);
template<uint16_t S>
static really_inline
-hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
- SuperVector<S> caseMask, SuperVector<S> mask1, SuperVector<S> mask2, typename SuperVector<S>::movemask_type *lastz1,
+hwlm_error_t scanDoubleShort(const struct noodTable *n, const u8 *buf,
+ SuperVector<S> caseMask, SuperVector<S> mask1, SuperVector<S> mask2,
const struct cb_info *cbi, size_t len, size_t start, size_t end) {
const u8 *d = buf + start;
DEBUG_PRINTF("start %zu end %zu\n", start, end);
if (!l) {
return HWLM_SUCCESS;
}
- SuperVector<S> v = SuperVector<S>::loadu(d) & caseMask;
+ SuperVector<S> v = SuperVector<S>::Zeroes();
+ memcpy(&v.u, d, l);
+ v = v & caseMask;
typename SuperVector<S>::movemask_type mask = DOUBLE_LOAD_MASK(l);
typename SuperVector<S>::movemask_type z1 = mask1.eqmask(v);
typename SuperVector<S>::movemask_type z2 = mask2.eqmask(v);
- typename SuperVector<S>::movemask_type z = mask & (*lastz1 | z1 << 1) & z2;
- *lastz1 = z1 >> (l -1);
+ typename SuperVector<S>::movemask_type z = mask & (z1 << 1) & z2;
+
+ return double_zscan(n, d, buf, z, len, cbi);
+}
+
+template<uint16_t S>
+static really_inline
+hwlm_error_t scanDoubleUnaligned(const struct noodTable *n, const u8 *buf,
+ SuperVector<S> caseMask, SuperVector<S> mask1, SuperVector<S> mask2,
+ const struct cb_info *cbi, size_t len, size_t offset, size_t start, size_t end) {
+ const u8 *d = buf + offset;
+ DEBUG_PRINTF("start %zu end %zu offset %zu\n", start, end, offset);
+ const size_t l = end - start;
+ assert(l <= S);
+ if (!l) {
+ return HWLM_SUCCESS;
+ }
+ SuperVector<S> v = SuperVector<S>::loadu(d) & caseMask;
+ size_t buf_off = start - offset;
+ typename SuperVector<S>::movemask_type mask = DOUBLE_LOAD_MASK(l) << buf_off;
+ typename SuperVector<S>::movemask_type z1 = mask1.eqmask(v);
+ typename SuperVector<S>::movemask_type z2 = mask2.eqmask(v);
+ typename SuperVector<S>::movemask_type z = mask & (z1 << 1) & z2;
return double_zscan(n, d, buf, z, len, cbi);
}
const u8 *e = buf + end;
DEBUG_PRINTF("start %p end %p \n", d, e);
assert(d < e);
+ if (e - d < S) {
+ return scanSingleShort(n, buf, caseMask, mask1, cbi, len, start, end);
+ }
if (d + S <= e) {
// peel off first part to cacheline boundary
const u8 *d1 = ROUNDUP_PTR(d, S);
DEBUG_PRINTF("until aligned %p \n", d1);
- if (scanSingleUnaligned(n, buf, caseMask, mask1, cbi, len, start, d1 - buf) == HWLM_TERMINATED) {
+ if (scanSingleUnaligned(n, buf, caseMask, mask1, cbi, len, start, start, d1 - buf) == HWLM_TERMINATED) {
return HWLM_TERMINATED;
}
d = d1;
DEBUG_PRINTF("d %p e %p \n", d, e);
// finish off tail
+ size_t s2End = ROUNDDOWN_PTR(e, S) - buf;
+ if (s2End == end) {
+ return HWLM_SUCCESS;
+ }
- return scanSingleUnaligned(n, buf, caseMask, mask1, cbi, len, d - buf, end);
+ return scanSingleUnaligned(n, buf, caseMask, mask1, cbi, len, end - S, s2End, len);
}
template <uint16_t S>
const u8 *e = buf + end;
DEBUG_PRINTF("start %p end %p \n", d, e);
assert(d < e);
+ if (e - d < S) {
+ return scanDoubleShort(n, buf, caseMask, mask1, mask2, cbi, len, d - buf, end);
+ }
if (d + S <= e) {
// peel off first part to cacheline boundary
- const u8 *d1 = ROUNDUP_PTR(d, S);
+ const u8 *d1 = ROUNDUP_PTR(d, S) + 1;
DEBUG_PRINTF("until aligned %p \n", d1);
- if (scanDoubleUnaligned(n, buf, caseMask, mask1, mask2, &lastz1, cbi, len, start, d1 - buf) == HWLM_TERMINATED) {
+ if (scanDoubleUnaligned(n, buf, caseMask, mask1, mask2, cbi, len, start, start, d1 - buf) == HWLM_TERMINATED) {
return HWLM_TERMINATED;
}
- d = d1;
+ d = d1 - 1;
size_t loops = (end - (d - buf)) / S;
DEBUG_PRINTF("loops %ld \n", loops);
hwlm_error_t rv = double_zscan(n, d, buf, z, len, cbi);
RETURN_IF_TERMINATED(rv);
}
+ if (loops == 0) {
+ d = d1;
+ }
}
-
- DEBUG_PRINTF("d %p e %p \n", d, e);
// finish off tail
-
- return scanDoubleUnaligned(n, buf, caseMask, mask1, mask2, &lastz1, cbi, len, d - buf, end);
+ size_t s2End = ROUNDDOWN_PTR(e, S) - buf;
+ if (s2End == end) {
+ return HWLM_SUCCESS;
+ }
+ return scanDoubleUnaligned(n, buf, caseMask, mask1, mask2, cbi, len, end - S, d - buf, end);
}
// Single-character specialisation, used when keyLen = 1
return last_zero_match_inverted<S>(buf, mask, len);
}
-template <uint16_t S>
+template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {
SuperVector<S> mask = mask1 & (mask2 >> 1);
DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
- bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
+ bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
- if (partial_match) return buf - 1;
+ if (partial_match) {
+ mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
+ }
return first_non_zero_match<S>(buf, mask, len);
}
-template <uint16_t S>
+template <uint16_t S, bool check_partial>
static really_inline
const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {
SuperVector<S> mask = (mask1 << 1)& mask2;
DEBUG_PRINTF("buf[0] = %02hhx, buf[-1] = %02hhx\n", buf[0], buf[-1]);
- bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
+ bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) {
mask = mask | (SuperVector<S>::Ones() >> (S-1));
return last_non_zero_match<S>(buf, mask, len);
}
-template <uint16_t S>
+template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2,
SuperVector<S> const mask1, SuperVector<S> const mask2,
SuperVector<S> mask = v1 & (v2 >> 1);
DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
- bool partial_match = (((buf[0] & m1) == c2) && ((buf[-1] & m2) == c1));
+ bool partial_match = (check_partial && ((buf[0] & m2) == c2) && ((buf[-1] & m1) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
- if (partial_match) return buf - 1;
+ if (partial_match) {
+ mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
+ }
return first_non_zero_match<S>(buf, mask, len);
}
return last_zero_match_inverted<S>(buf, mask, len);
}
-template <uint16_t S>
+template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {
SuperVector<S> mask = mask1 & (mask2 >> 1);
DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
- bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
+ bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
- if (partial_match) return buf - 1;
+ if (partial_match) {
+ mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
+ }
return first_non_zero_match<S>(buf, mask, len);
}
-template <uint16_t S>
+template <uint16_t S, bool check_partial>
static really_inline
const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {
SuperVector<S> mask = (mask1 << 1)& mask2;
DEBUG_PRINTF("buf[0] = %02hhx, buf[-1] = %02hhx\n", buf[0], buf[-1]);
- bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
+ bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) {
mask = mask | (SuperVector<S>::Ones() >> (S-1));
return last_non_zero_match<S>(buf, mask, len);
}
-template <uint16_t S>
+template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2,
SuperVector<S> const mask1, SuperVector<S> const mask2,
SuperVector<S> mask = v1 & (v2 >> 1);
DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
- bool partial_match = (((buf[0] & m1) == c2) && ((buf[-1] & m2) == c1));
+ bool partial_match = (check_partial && ((buf[0] & m2) == c2) && ((buf[-1] & m1) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
- if (partial_match) return buf - 1;
+ if (partial_match) {
+ mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
+ }
return first_non_zero_match<S>(buf, mask, len);
}
// finish off tail
if (d != buf_end) {
- SuperVector<S> chars = SuperVector<S>::loadu_maskz(d, buf_end - d);
- rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, d);
+ SuperVector<S> chars = SuperVector<S>::loadu(buf_end - S);
+ rv = fwdBlock(wide_mask_lo, wide_mask_hi, chars, buf_end - S);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
// finish off tail
if (d != buf_end) {
- SuperVector<S> chars = SuperVector<S>::loadu(d);
- rv = fwdBlockDouble(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, chars, d);
+ SuperVector<S> chars = SuperVector<S>::Zeroes();
+ const u8 *end_buf;
+ if (buf_end - buf < S) {
+ memcpy(&chars.u, buf, buf_end - buf);
+ end_buf = buf;
+ } else {
+ chars = SuperVector<S>::loadu(buf_end - S);
+ end_buf = buf_end - S;
+ }
+ rv = fwdBlockDouble(wide_mask1_lo, wide_mask1_hi, wide_mask2_lo, wide_mask2_hi, chars, end_buf);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
-
+
return buf_end;
}
const u8 *shuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
const u8 *buf_end) {
- return shuftiExecReal<VECTORSIZE>(mask_lo, mask_hi, buf, buf_end);
+ if (buf_end - buf < VECTORSIZE) {
+ return shuftiFwdSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end);
+ }
+ return shuftiExecReal<VECTORSIZE>(mask_lo, mask_hi, buf, buf_end);
}
const u8 *rshuftiExec(m128 mask_lo, m128 mask_hi, const u8 *buf,
const u8 *buf_end) {
+ if (buf_end - buf < VECTORSIZE) {
+ return shuftiRevSlow((const u8 *)&mask_lo, (const u8 *)&mask_hi, buf, buf_end);
+ }
return rshuftiExecReal<VECTORSIZE>(mask_lo, mask_hi, buf, buf_end);
}
// finish off tail
if (d != buf_end) {
- SuperVector<S> chars = SuperVector<S>::loadu_maskz(d, buf_end - d);
- rv = fwdBlock(wide_shuf_mask_lo_highclear, wide_shuf_mask_lo_highset, chars, d);
+ SuperVector<S> chars = SuperVector<S>::Zeroes();
+ const u8* end_buf;
+ if (buf_end - buf < S) {
+ memcpy(&chars.u, buf, buf_end - buf);
+ end_buf = buf;
+ } else {
+ chars = SuperVector<S>::loadu(buf_end - S);
+ end_buf = buf_end - S;
+ }
+ rv = fwdBlock(wide_shuf_mask_lo_highclear, wide_shuf_mask_lo_highset, chars, end_buf);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
// finish off head
if (d != buf) {
- SuperVector<S> chars = SuperVector<S>::loadu(buf);
+ SuperVector<S> chars = SuperVector<S>::Zeroes();
+ if (buf_end - buf < S) {
+ memcpy(&chars.u, buf, buf_end - buf);
+ } else {
+ chars = SuperVector<S>::loadu(buf);
+ }
rv = revBlock(wide_shuf_mask_lo_highclear, wide_shuf_mask_lo_highset, chars, buf);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
static really_inline
const u8 *rvermicelliBlockNeg(SuperVector<S> const data, SuperVector<S> const chars, SuperVector<S> const casemask, const u8 *buf, u16 const len);
-template <uint16_t S>
+template <uint16_t S, bool check_partial = true>
static really_inline
const u8 *vermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len);
-template <uint16_t S>
+template <uint16_t S, bool check_partial = true>
static really_inline
const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len);
-template <uint16_t S>
+template <uint16_t S, bool check_partial = true>
static really_inline
const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2,
SuperVector<S> const mask1, SuperVector<S> const mask2,
// finish off tail
if (d != buf_end) {
- SuperVector<S> data = SuperVector<S>::loadu_maskz(d, buf_end - d);
- rv = vermicelliBlock(data, chars, casemask, d, buf_end - d);
+ SuperVector<S> data = SuperVector<S>::loadu(buf_end - S);
+ rv = vermicelliBlock(data, chars, casemask, buf_end - S, buf_end - d);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
// finish off tail
if (d != buf_end) {
- SuperVector<S> data = SuperVector<S>::loadu_maskz(d, buf_end - d);
- rv = vermicelliBlockNeg(data, chars, casemask, d, buf_end - d);
+ SuperVector<S> data = SuperVector<S>::loadu(buf_end - S);
+ rv = vermicelliBlockNeg(data, chars, casemask, buf_end - S, buf_end - d);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
if (!ISALIGNED_N(d, S)) {
u8 const *d1 = ROUNDUP_PTR(d, S);
SuperVector<S> data = SuperVector<S>::loadu(d);
- rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d, S);
- if (rv) return rv;
+ rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d + S, S);
+ if (rv) return rv - S;
d = d1;
}
- while(d + S <= buf_end) {
+ while(d + S < buf_end) {
__builtin_prefetch(d + 64);
DEBUG_PRINTF("d %p \n", d);
SuperVector<S> data = SuperVector<S>::load(d);
- rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d, S);
- if (rv) return rv;
+ rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d + S, S);
+ if (rv) return rv - S;
d += S;
}
}
// finish off tail
if (d != buf_end) {
- SuperVector<S> data = SuperVector<S>::loadu_maskz(d, buf_end - d);
- rv = vermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, d, buf_end - d);
+ SuperVector<S> data = SuperVector<S>::Zeroes();
+ const u8* end_buf;
+ if (buf_end - buf < S) {
+ memcpy(&data.u, buf, buf_end - buf);
+ end_buf = buf;
+ } else {
+ data = SuperVector<S>::loadu(buf_end - S);
+ end_buf = buf_end - S;
+ }
+ rv = vermicelliDoubleBlock<S, false>(data, chars1, chars2, casemask, c1, c2, casechar, end_buf, buf_end - d);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
// finish off head
if (d != buf) {
- SuperVector<S> data = SuperVector<S>::loadu(buf);
- rv = rvermicelliDoubleBlock(data, chars1, chars2, casemask, c1, c2, casechar, buf, d - buf);
+ SuperVector<S> data = SuperVector<S>::Zeroes();
+ if (d - buf < S) {
+ memcpy(&data.u, buf, d - buf);
+ } else {
+ data = SuperVector<S>::loadu(buf);
+ }
+ rv = rvermicelliDoubleBlock<S, false>(data, chars1, chars2, casemask, c1, c2, casechar, buf, d - buf);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
if (!ISALIGNED_N(d, S)) {
u8 const *d1 = ROUNDUP_PTR(d, S);
SuperVector<S> data = SuperVector<S>::loadu(d);
- rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d, S);
- if (rv) return rv;
+ rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d + S, S);
+ if (rv) return rv - S;
d = d1;
}
- while(d + S <= buf_end) {
+ while(d + S < buf_end) {
__builtin_prefetch(d + 64);
DEBUG_PRINTF("d %p \n", d);
SuperVector<S> data = SuperVector<S>::load(d);
- rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d, S);
- if (rv) return rv;
+ rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d + S, S);
+ if (rv) return rv - S;
d += S;
}
}
// finish off tail
if (d != buf_end) {
- SuperVector<S> data = SuperVector<S>::loadu_maskz(d, buf_end - d);
- rv = vermicelliDoubleMaskedBlock(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, d, buf_end - d);
+ SuperVector<S> data = SuperVector<S>::Zeroes();
+ const u8* end_buf;
+ if (buf_end - buf < S) {
+ memcpy(&data.u, buf, buf_end - buf);
+ end_buf = buf;
+ } else {
+ data = SuperVector<S>::loadu(buf_end - S);
+ end_buf = buf_end - S;
+ }
+ rv = vermicelliDoubleMaskedBlock<S, false>(data, chars1, chars2, mask1, mask2, c1, c2, m1, m2, end_buf, buf_end - d);
DEBUG_PRINTF("rv %p \n", rv);
if (rv && rv < buf_end) return rv;
}
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
+ // Small ranges.
+ if (buf_end - buf < VECTORSIZE) {
+ for (; buf < buf_end; buf++) {
+ char cur = (char)*buf;
+ if (nocase) {
+ cur &= CASE_CLEAR;
+ }
+ if (cur == c) {
+ break;
+ }
+ }
+ return buf;
+ }
+
const SuperVector<VECTORSIZE> chars = SuperVector<VECTORSIZE>::dup_u8(c);
const SuperVector<VECTORSIZE> casemask{nocase ? getCaseMask<VECTORSIZE>() : SuperVector<VECTORSIZE>::Ones()};
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
+ // Small ranges.
+ if (buf_end - buf < VECTORSIZE) {
+ for (; buf < buf_end; buf++) {
+ char cur = *buf;
+ if (nocase) {
+ cur &= CASE_CLEAR;
+ }
+ if (cur != c) {
+ break;
+ }
+ }
+ return buf;
+ }
+
const SuperVector<VECTORSIZE> chars = SuperVector<VECTORSIZE>::dup_u8(c);
const SuperVector<VECTORSIZE> casemask{nocase ? getCaseMask<VECTORSIZE>() : SuperVector<VECTORSIZE>::Ones()};
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
+ // Small ranges.
+ if (buf_end - buf < VECTORSIZE) {
+ for (buf_end--; buf_end >= buf; buf_end--) {
+ char cur = (char)*buf_end;
+ if (nocase) {
+ cur &= CASE_CLEAR;
+ }
+ if (cur == c) {
+ break;
+ }
+ }
+ return buf_end;
+ }
+
const SuperVector<VECTORSIZE> chars = SuperVector<VECTORSIZE>::dup_u8(c);
const SuperVector<VECTORSIZE> casemask{nocase ? getCaseMask<VECTORSIZE>() : SuperVector<VECTORSIZE>::Ones()};
nocase ? "nocase " : "", c, (size_t)(buf_end - buf));
assert(buf < buf_end);
+ // Small ranges.
+ if (buf_end - buf < VECTORSIZE) {
+ for (buf_end--; buf_end >= buf; buf_end--) {
+ char cur = (char)*buf_end;
+ if (nocase) {
+ cur &= CASE_CLEAR;
+ }
+ if (cur != c) {
+ break;
+ }
+ }
+ return buf_end;
+ }
+
const SuperVector<VECTORSIZE> chars = SuperVector<VECTORSIZE>::dup_u8(c);
const SuperVector<VECTORSIZE> casemask{nocase ? getCaseMask<VECTORSIZE>() : SuperVector<VECTORSIZE>::Ones()};
return last_zero_match_inverted<S>(buf, mask, len);
}
-template <uint16_t S>
+template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {
SuperVector<S> mask = mask1 & (mask2 >> 1);
DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
- bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
+ bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
- if (partial_match) return buf - 1;
+ if (partial_match) {
+ mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
+ }
return first_non_zero_match<S>(buf, mask, len);
}
-template <uint16_t S>
+template <uint16_t S, bool check_partial>
static really_inline
const u8 *rvermicelliDoubleBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2, SuperVector<S> const casemask,
u8 const c1, u8 const c2, u8 const casechar, u8 const *buf, u16 const len) {
SuperVector<S> mask = (mask1 << 1)& mask2;
DEBUG_PRINTF("buf[0] = %02hhx, buf[-1] = %02hhx\n", buf[0], buf[-1]);
- bool partial_match = (((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
+ bool partial_match = (check_partial && ((buf[0] & casechar) == c2) && ((buf[-1] & casechar) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
if (partial_match) {
mask = mask | (SuperVector<S>::Ones() >> (S-1));
return last_non_zero_match<S>(buf, mask, len);
}
-template <uint16_t S>
+template <uint16_t S, bool check_partial>
static really_inline
const u8 *vermicelliDoubleMaskedBlock(SuperVector<S> const data, SuperVector<S> const chars1, SuperVector<S> const chars2,
SuperVector<S> const mask1, SuperVector<S> const mask2,
SuperVector<S> mask = v1 & (v2 >> 1);
DEBUG_PRINTF("rv[0] = %02hhx, rv[-1] = %02hhx\n", buf[0], buf[-1]);
- bool partial_match = (((buf[0] & m1) == c2) && ((buf[-1] & m2) == c1));
+ bool partial_match = (check_partial && ((buf[0] & m2) == c2) && ((buf[-1] & m1) == c1));
DEBUG_PRINTF("partial = %d\n", partial_match);
- if (partial_match) return buf - 1;
+ if (partial_match) {
+ mask = mask | ((SuperVector<S>::Ones() >> (S-1)) << (S-1));
+ }
return first_non_zero_match<S>(buf, mask, len);
}
ASSERT_NE(nullptr, compile_err);
EXPECT_STREQ("Allocator returned misaligned memory.", compile_err->message);
hs_free_compile_error(compile_err);
- hs_set_database_allocator(nullptr, nullptr);
+ hs_set_misc_allocator(nullptr, nullptr);
}
TEST(CustomAllocator, TwoAlignedDatabaseInfo) {
#include"util/supervector/supervector.hpp"
#include "nfa/limex_shuffle.hpp"
+#ifdef setbit
+#undef setbit
+#endif
namespace {
#include "util/bytecode_ptr.h"
#include "util/simd_utils.h"
+#ifdef setbit
+#undef setbit
+#endif
+
using namespace std;
using namespace ue2;