From: Byeonguk Jeong Date: Tue, 7 Apr 2026 10:10:31 +0000 (+0900) Subject: shufti-double: fix regressions from #325 (#368) X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=HEAD;p=thirdparty%2Fvectorscan.git shufti-double: fix regressions from #325 (#368) * shufti-double: Fix double shufti's caller run_accel() passed c_end - 1 to shuftiDoubleExec(). shuftiDoubleExecReal already handles the last-byte boundary internally via check_last_byte(), so shortening the buffer caused it to miss valid matches near the end and apply the wildcard check to the wrong byte. Changed to pass c_end. Fixes: ca70a3d9beca61b58c6709fead60ec662482d36e ("Fix double shufti's vector end false positive (#325)") Signed-off-by: Byeonguk Jeong * shufti-double: Preserve first_char_mask after peel-off first_char_mask was reset to Ones() after the peel-off block, discarding carry-over state for cross-boundary pattern detection. Remove the reset. Fixes: ca70a3d9beca61b58c6709fead60ec662482d36e ("Fix double shufti's vector end false positive (#325)") Signed-off-by: Byeonguk Jeong * shufti-double: Fix check_last_byte() loop condition loop condition i > 2 missed the final vshr(1), leaving odd-indexed bytes out of the reduce. Use i >= 2. Fixes: ca70a3d9beca61b58c6709fead60ec662482d36e ("Fix double shufti's vector end false positive (#325)") Signed-off-by: Byeonguk Jeong * shufti-double: Add regression tests for double shufti Add three tests exercising the double shufti edge cases. - ExecMatchVectorEdge: Two-byte pair ("ab") spanning the peel-off to aligned block boundary is correctly detected. Validates that first_char_mask state carries over and is not reset after peel-off. - ExecNoMatchLastByte: First character of a double-byte pair ('x' from "xy") at the last buffer byte does not cause a false positive when the second character is absent. - ExecMatchLastByte: Single-byte pattern ('a') at the last buffer byte is detected via check_last_byte's reduce. Signed-off-by: Byeonguk Jeong --------- Signed-off-by: Byeonguk Jeong --- diff --git a/src/nfa/accel.c b/src/nfa/accel.c index 027f1182..b59d1c8c 100644 --- a/src/nfa/accel.c +++ b/src/nfa/accel.c @@ -160,11 +160,12 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) { return c; } - /* need to stop one early to get an accurate end state */ + /* Shufti double handles its own ending boundary checks internally + * to correctly identify matches at the last byte. */ rv = shuftiDoubleExec(accel->dshufti.lo1, accel->dshufti.hi1, accel->dshufti.lo2, - accel->dshufti.hi2, c, c_end - 1); + accel->dshufti.hi2, c, c_end); break; case ACCEL_RED_TAPE: diff --git a/src/nfa/shufti_simd.hpp b/src/nfa/shufti_simd.hpp index c5989aa0..9b3d5a29 100644 --- a/src/nfa/shufti_simd.hpp +++ b/src/nfa/shufti_simd.hpp @@ -208,7 +208,7 @@ const u8 *check_last_byte(SuperVector mask2_lo, SuperVector mask2_hi, uint8_t last_elem = mask.u.u8[mask_len - 1]; SuperVector reduce = mask2_lo | mask2_hi; - for(uint16_t i = S; i > 2; i/=2) { + for(uint16_t i = S; i >= 2; i/=2) { reduce = reduce | reduce.vshr(i/2); } uint8_t match_inverted = reduce.u.u8[0] | last_elem; @@ -260,7 +260,6 @@ const u8 *shuftiDoubleExecReal(m128 mask1_lo, m128 mask1_hi, m128 mask2_lo, m128 first_char_mask.print8("inout_c1 shifted"); } - first_char_mask = SuperVector::Ones(); while(d + S <= buf_end) { __builtin_prefetch(d + 64); DEBUG_PRINTF("d %p \n", d); diff --git a/unit/internal/shufti.cpp b/unit/internal/shufti.cpp index d5f291d9..29e6f64f 100644 --- a/unit/internal/shufti.cpp +++ b/unit/internal/shufti.cpp @@ -990,6 +990,94 @@ TEST(DoubleShufti, ExecNoOverreadPageBoundary) { munmap(pages, page_size); } +TEST(DoubleShufti, ExecMatchVectorEdge) { + m128 lo1, hi1, lo2, hi2; + + flat_set> lits; + lits.insert(make_pair('a', 'b')); + + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, reinterpret_cast(&lo1), reinterpret_cast(&hi1), + reinterpret_cast(&lo2), reinterpret_cast(&hi2)); + ASSERT_TRUE(ret); + + const int len = 80; + const int vector_size = 16; + for (size_t start = 1; start < vector_size; start++) { + char t[len]; + memset(t, 'z', len); + char *buf = t + start; + int buf_len = len - start; + + uintptr_t aligned = (reinterpret_cast(buf) + vector_size - 1) & + ~static_cast(vector_size - 1); + ptrdiff_t boundary = reinterpret_cast(aligned) - buf; + + if (boundary < 1 || boundary >= buf_len - 1) continue; + + buf[boundary - 1] = 'a'; + buf[boundary] = 'b'; + + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, + reinterpret_cast(buf), + reinterpret_cast(buf) + buf_len); + + ASSERT_EQ(reinterpret_cast(buf + boundary - 1), rv) + << "Failed for start=" << start << " boundary=" << boundary; + } +} + +TEST(DoubleShufti, ExecNoMatchLastByte) { + m128 lo1, hi1, lo2, hi2; + + flat_set> lits; + lits.insert(make_pair('x', 'y')); + + bool ret = shuftiBuildDoubleMasks(CharReach(), lits, reinterpret_cast(&lo1), reinterpret_cast(&hi1), + reinterpret_cast(&lo2), reinterpret_cast(&hi2)); + ASSERT_TRUE(ret); + + const int maxlen = 80; + char t1[maxlen + 1]; + for (int len = 17; len < maxlen; len++) { + memset(t1, 'b', len + 1); + t1[len - 1] = 'x'; + + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, + reinterpret_cast(t1), + reinterpret_cast(t1) + len); + + ASSERT_EQ(reinterpret_cast(t1 + len), rv) + << "Failed for len=" << len; + } +} + +TEST(DoubleShufti, ExecMatchLastByte) { + m128 lo1, hi1, lo2, hi2; + + CharReach onebyte; + flat_set> twobyte; + + onebyte.set('a'); + twobyte.insert(make_pair('x', 'y')); + + bool ret = shuftiBuildDoubleMasks(onebyte, twobyte, reinterpret_cast(&lo1), reinterpret_cast(&hi1), + reinterpret_cast(&lo2), reinterpret_cast(&hi2)); + ASSERT_TRUE(ret); + + const int maxlen = 80; + char t1[maxlen + 1]; + for (int len = 17; len < maxlen; len++) { + memset(t1, 'b', len + 1); + t1[len - 1] = 'a'; + + const u8 *rv = shuftiDoubleExec(lo1, hi1, lo2, hi2, + reinterpret_cast(t1), + reinterpret_cast(t1) + len); + + ASSERT_EQ(reinterpret_cast(t1 + len - 1), rv); + } +} + TEST(ReverseShufti, ExecNoMatch1) { m128 lo, hi;