return MO_CONTINUE_MATCHING; /* continue execution */
}
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
// Sheng32
static really_inline
const struct sheng32 *get_sheng32(const struct NFA *n) {
}
return MO_CONTINUE_MATCHING; /* continue execution */
}
-#endif // end of HAVE_AVX512VBMI
+#endif // end of HAVE_AVX512VBMI || HAVE_SVE
/* include Sheng function definitions */
#include "sheng_defs.h"
return 0;
}
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
// Sheng32
static really_inline
char runSheng32Cb(const struct sheng32 *sh, NfaCallback cb, void *ctxt,
*(u8 *)dest = *(const u8 *)src;
return 0;
}
-#endif // end of HAVE_AVX512VBMI
+#endif // end of HAVE_AVX512VBMI || HAVE_SVE
char nfaExecSheng_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context);
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
char nfaExecSheng64_B(const struct NFA *n, u64a offset, const u8 *buffer,
size_t length, NfaCallback cb, void *context);
-
-#else // !HAVE_AVX512VBMI
+#else // !HAVE_AVX512VBMI && !HAVE_SVE
#define nfaExecSheng32_B_Reverse NFA_API_NO_IMPL
#define nfaExecSheng32_zombie_status NFA_API_ZOMBIE_NO_IMPL
#define nfaExecSheng64_testEOD NFA_API_NO_IMPL
#define nfaExecSheng64_reportCurrent NFA_API_NO_IMPL
#define nfaExecSheng64_B NFA_API_NO_IMPL
-#endif // end of HAVE_AVX512VBMI
+#endif // end of HAVE_AVX512VBMI || defined(HAVE_SVE)
+
#endif /* SHENG_H_ */
return (a | b | c | d) & (SHENG_STATE_FLAG_MASK);
}
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static really_inline
u8 isDeadState32(const u8 a) {
return a & SHENG32_STATE_DEAD;
#define SHENG_IMPL sheng_cod
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_cod
#define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 isAcceptState32
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
#define SHENG_IMPL sheng_co
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_co
#define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
#define SHENG_IMPL sheng_samd
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_samd
#define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 isAcceptState32
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
#define SHENG_IMPL sheng_sam
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_sam
#define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 isAcceptState32
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
#define SHENG_IMPL sheng_nmd
#define DEAD_FUNC isDeadState
#define ACCEPT_FUNC dummyFunc
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_nmd
#define DEAD_FUNC32 isDeadState32
#define ACCEPT_FUNC32 dummyFunc
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
#define SHENG_IMPL sheng_nm
#define DEAD_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_nm
#define DEAD_FUNC32 dummyFunc
#define ACCEPT_FUNC32 dummyFunc
#undef SHENG_IMPL
#undef DEAD_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef DEAD_FUNC32
#undef ACCEPT_FUNC32
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_coda
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_cod
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_coa
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_co
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_samda
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_samd
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 isDeadState32
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#define INNER_ACCEL_FUNC isAccelState
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_sama
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC isAcceptState
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_sam
#define INTERESTING_FUNC32 hasInterestingStates32
#define INNER_DEAD_FUNC32 dummyFunc
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC isAccelState
#define ACCEPT_FUNC dummyFunc
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_nmda
#define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_nmd
#define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
#define INNER_ACCEL_FUNC dummyFunc
#define OUTER_ACCEL_FUNC dummyFunc
#define ACCEPT_FUNC dummyFunc
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#define SHENG32_IMPL sheng32_4_nm
#define INTERESTING_FUNC32 dummyFunc4
#define INNER_DEAD_FUNC32 dummyFunc
#undef INNER_ACCEL_FUNC
#undef OUTER_ACCEL_FUNC
#undef ACCEPT_FUNC
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
#undef SHENG32_IMPL
#undef INTERESTING_FUNC32
#undef INNER_DEAD_FUNC32
return MO_CONTINUE_MATCHING;
}
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static really_inline
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const struct sheng32 *s,
}
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
+#if defined(HAVE_SVE)
+ const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
+ svuint8_t cur_state = svdup_u8(*state);
+ svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
+ const m512 *masks = s->succ_masks;
+#else
m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks;
+#endif
while (likely(cur_buf != end)) {
const u8 c = *cur_buf;
+
+#if defined(HAVE_SVE)
+ svuint8_t succ_mask = svld1(lane_pred_32, (const u8*)(masks + c));
+ cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
+ const u8 tmp = svlastb(lane_pred_32, cur_state);
+#else
const m512 succ_mask = masks[c];
cur_state = vpermb512(cur_state, succ_mask);
const u8 tmp = movd512(cur_state);
+#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG32_STATE_MASK,
}
cur_buf++;
}
+#if defined(HAVE_SVE)
+ *state = svlastb(lane_pred_32, cur_state);
+#else
*state = movd512(cur_state);
+#endif
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}
}
DEBUG_PRINTF("Scanning %lli bytes\n", (s64a)(end - start));
+#if defined(HAVE_SVE)
+ const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
+ svuint8_t cur_state = svdup_u8(*state);
+ svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
+ const m512 *masks = s->succ_masks;
+#else
m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks;
+#endif
while (likely(cur_buf != end)) {
const u8 c = *cur_buf;
+
+#if defined(HAVE_SVE)
+ svuint8_t succ_mask = svld1(lane_pred_64, (const u8*)(masks + c));
+ cur_state = svtbl(succ_mask, svand_x(svptrue_b8(), tbl_mask, cur_state));
+ const u8 tmp = svlastb(lane_pred_64, cur_state);
+#else
const m512 succ_mask = masks[c];
cur_state = vpermb512(cur_state, succ_mask);
const u8 tmp = movd512(cur_state);
+#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c, ourisprint(c) ? c : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", tmp & SHENG64_STATE_MASK,
}
cur_buf++;
}
+#if defined(HAVE_SVE)
+ *state = svlastb(lane_pred_64, cur_state);
+#else
*state = movd512(cur_state);
+#endif
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}
return MO_CONTINUE_MATCHING;
}
-#if defined(HAVE_AVX512VBMI)
+#if defined(HAVE_AVX512VBMI) || defined(HAVE_SVE)
static really_inline
char SHENG32_IMPL(u8 *state, NfaCallback cb, void *ctxt,
const struct sheng32 *s,
return MO_CONTINUE_MATCHING;
}
+#if defined(HAVE_SVE)
+ const svbool_t lane_pred_32 = svwhilelt_b8(0, 32);
+ svuint8_t cur_state = svdup_u8(*state);
+ svuint8_t tbl_mask = svdup_u8((unsigned char)0x1F);
+ const m512 *masks = s->succ_masks;
+#else
m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks;
+#endif
while (likely(end - cur_buf >= 4)) {
const u8 *b1 = cur_buf;
const u8 c3 = *b3;
const u8 c4 = *b4;
+#if defined(HAVE_SVE)
+ svuint8_t succ_mask1 = svld1(lane_pred_32, (const u8*)(masks+c1));
+ cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
+ const u8 a1 = svlastb(lane_pred_32, cur_state);
+
+ svuint8_t succ_mask2 = svld1(lane_pred_32, (const u8*)(masks+c2));
+ cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
+ const u8 a2 = svlastb(lane_pred_32, cur_state);
+
+ svuint8_t succ_mask3 = svld1(lane_pred_32, (const u8*)(masks+c3));
+ cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
+ const u8 a3 = svlastb(lane_pred_32, cur_state);
+
+ svuint8_t succ_mask4 = svld1(lane_pred_32, (const u8*)(masks+c4));
+ cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
+ const u8 a4 = svlastb(lane_pred_32, cur_state);
+#else
const m512 succ_mask1 = masks[c1];
cur_state = vpermb512(cur_state, succ_mask1);
const u8 a1 = movd512(cur_state);
const m512 succ_mask4 = masks[c4];
cur_state = vpermb512(cur_state, succ_mask4);
const u8 a4 = movd512(cur_state);
+#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG32_STATE_MASK,
};
cur_buf += 4;
}
+#if defined(HAVE_SVE)
+ *state = svlastb(lane_pred_32, cur_state);
+#else
*state = movd512(cur_state);
+#endif
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}
*scan_end = end;
return MO_CONTINUE_MATCHING;
}
-
+#if defined(HAVE_SVE)
+ const svbool_t lane_pred_64 = svwhilelt_b8(0, 64);
+ svuint8_t cur_state = svdup_u8(*state);
+ svuint8_t tbl_mask = svdup_u8((unsigned char)0x3F);
+ const m512 *masks = s->succ_masks;
+#else
m512 cur_state = set1_64x8(*state);
const m512 *masks = s->succ_masks;
+#endif
while (likely(end - cur_buf >= 4)) {
const u8 *b1 = cur_buf;
const u8 c3 = *b3;
const u8 c4 = *b4;
+#if defined(HAVE_SVE)
+ svuint8_t succ_mask1 = svld1(lane_pred_64, (const u8*)(masks+c1));
+ cur_state = svtbl(succ_mask1, svand_x(svptrue_b8(), tbl_mask, cur_state));
+ const u8 a1 = svlastb(lane_pred_64, cur_state);
+
+ svuint8_t succ_mask2 = svld1(lane_pred_64, (const u8*)(masks+c2));
+ cur_state = svtbl(succ_mask2, svand_x(svptrue_b8(), tbl_mask, cur_state));
+ const u8 a2 = svlastb(lane_pred_64, cur_state);
+
+ svuint8_t succ_mask3 = svld1(lane_pred_64, (const u8*)(masks+c3));
+ cur_state = svtbl(succ_mask3, svand_x(svptrue_b8(), tbl_mask, cur_state));
+ const u8 a3 = svlastb(lane_pred_64, cur_state);
+
+ svuint8_t succ_mask4 = svld1(lane_pred_64, (const u8*)(masks+c4));
+ cur_state = svtbl(succ_mask4, svand_x(svptrue_b8(), tbl_mask, cur_state));
+ const u8 a4 = svlastb(lane_pred_64, cur_state);
+#else
const m512 succ_mask1 = masks[c1];
cur_state = vpermb512(cur_state, succ_mask1);
const u8 a1 = movd512(cur_state);
const m512 succ_mask4 = masks[c4];
cur_state = vpermb512(cur_state, succ_mask4);
const u8 a4 = movd512(cur_state);
+#endif
DEBUG_PRINTF("c: %02hhx '%c'\n", c1, ourisprint(c1) ? c1 : '?');
DEBUG_PRINTF("s: %u (flag: %u)\n", a1 & SHENG64_STATE_MASK,
}
cur_buf += 4;
}
+#if defined(HAVE_SVE)
+ *state = svlastb(lane_pred_64, cur_state);
+#else
*state = movd512(cur_state);
+#endif
*scan_end = cur_buf;
return MO_CONTINUE_MATCHING;
}
return nullptr;
}
+#ifdef HAVE_SVE
+ if (svcntb()<32) {
+ DEBUG_PRINTF("Sheng32 failed, SVE width is too small!\n");
+ return nullptr;
+ }
+#else
if (!cc.target_info.has_avx512vbmi()) {
DEBUG_PRINTF("Sheng32 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
return nullptr;
}
+#endif
sheng_build_strat strat(raw, rm, only_accel_init);
dfa_info info(strat);
return nullptr;
}
+#ifdef HAVE_SVE
+ if (svcntb()<64) {
+ DEBUG_PRINTF("Sheng64 failed, SVE width is too small!\n");
+ return nullptr;
+ }
+#else
if (!cc.target_info.has_avx512vbmi()) {
DEBUG_PRINTF("Sheng64 failed, no HS_CPU_FEATURES_AVX512VBMI!\n");
return nullptr;
}
+#endif
sheng_build_strat strat(raw, rm, only_accel_init);
dfa_info info(strat);