STYPE state = 0;
int c = xlate[buf[0]];
/* If buflen at least 4 bytes and buf 4-byte aligned. */
- if (buflen >= 4 && ((uint64_t)buf & 0x3) == 0) {
+ if (buflen >= (4 + EXTRA) && ((uint64_t)buf & 0x3) == 0) {
BTYPE data = *(BTYPE* restrict)(&buf[0]);
uint64_t index = 0;
/* Process 4*floor(buflen/4) bytes. */
i = 0;
- while (i < (buflen & ~0x3)) {
+ while ((i + EXTRA) < (buflen & ~0x3)) {
BTYPE data1 = *(BTYPE* restrict)(&buf[i + 4]);
index = SINDEX(index, state);
state = SLOAD(state_table + index + c);
uint64_t index = 0 ;
index = SINDEX(index, state);
state = SLOAD(state_table + index + c);
- c = xlate[buf[i+1]];
+#ifndef __tile__
+ if (likely(i+1 < buflen))
+#endif
+ c = xlate[buf[i+1]];
if (unlikely(SCHECK(state))) {
matches = CheckMatch(ctx, pmq, buf, buflen, state, i, matches, mpm_bitarray);
}
#include "util-memcpy.h"
#include "util-mpm-ac-tile.h"
-#ifndef __tile__
-void MpmACTileRegister(void)
-{
-}
-#endif
-
-/* There are Tilera Tile-Gx specific optimizations in this code. */
-#ifdef __tile__
-
void SCACTileInitCtx(MpmCtx *);
void SCACTileInitThreadCtx(MpmCtx *, MpmThreadCtx *, uint32_t);
void SCACTileDestroyCtx(MpmCtx *);
mpm_ctx->memory_cnt++;
mpm_ctx->memory_size += size;
- SCLogInfo("Delta Table size %d, alphabet: %d, %d-byte states: %d",
+ SCLogDebug("Delta Table size %d, alphabet: %d, %d-byte states: %d",
size, ctx->alphabet_size, ctx->bytes_per_state, ctx->state_count);
/* Copy next state from Goto table, which is 32 bits and encode it into the next
#define SCHECK(x) ((x) > 0)
#define BTYPE int32_t
// Extract byte N=0,1,2,3 from x
+#ifdef __tile__
#define BYTE0(x) __insn_bfextu(x, 0, 7)
#define BYTE1(x) __insn_bfextu(x, 8, 15)
#define BYTE2(x) __insn_bfextu(x, 16, 23)
#define BYTE3(x) __insn_bfextu(x, 24, 31)
+#define EXTRA 0
+#else /* fallback */
+#define BYTE0(x) (((x) & 0x000000ff) >> 0)
+#define BYTE1(x) (((x) & 0x0000ff00) >> 8)
+#define BYTE2(x) (((x) & 0x00ff0000) >> 16)
+#define BYTE3(x) (((x) & 0xff000000) >> 24)
+#define EXTRA 4 // need 4 extra bytes to avoid OOB reads
+#endif
int CheckMatch(SCACTileSearchCtx *ctx, PatternMatcherQueue *pmq,
uint8_t *buf, uint16_t buflen,
/* Double check case-sensitve match now. */
if (patterns[k] >> 31) {
uint16_t patlen = pattern_list[pindex].patlen;
+#ifdef __tile__
if (SCMemcmpNZ(pattern_list[pindex].cs, buf_offset - patlen, patlen) != 0) {
+#else
+ if (SCMemcmp(pattern_list[pindex].cs, buf_offset - patlen, patlen) != 0) {
+#endif
/* Case-sensitive match failed. */
continue;
}
* Next state entry has MSB as "match" and 15 LSB bits as next-state index.
*/
// y = 1<<log_mult * (x & (1<<width -1))
+#ifdef __tile__
#define SINDEX_INTERNAL(y, x, log_mult, width) \
__insn_bfins(y, x, log_mult, log_mult + (width - 1))
+#else
+#define SINDEX_INTERNAL(y, x, log_mult, width) \
+ ((1<<log_mult) * (x & ((1<<width) - 1)))
+#endif
/* Type of next_state */
#define STYPE int16_t
+#ifdef __tile__
// Hint to compiler to expect L2 hit latency for Load int16_t
#define SLOAD(x) __insn_ld2s_L2((STYPE* restrict)(x))
+#else
+#define SLOAD(x) *(STYPE * restrict)(x)
+#endif
#define FUNC_NAME SCACTileSearchSmall256
// y = 256 * (x & 0x7FFF)
#undef STYPE
#define STYPE int8_t
// Hint to compiler to expect L2 hit latency for Load int8_t
+#ifdef __tile__
#undef SLOAD
#define SLOAD(x) __insn_ld1s_L2((STYPE* restrict)(x))
+#else
+/* no op for !__tile__ case */
+#endif
#undef FUNC_NAME
#undef SINDEX
*/
void MpmACTileRegister(void)
{
+#ifdef __tile__
mpm_table[MPM_AC_TILE].name = "ac-tile";
+#else
+ mpm_table[MPM_AC_TILE].name = "ac-ks";
+#endif
mpm_table[MPM_AC_TILE].max_pattern_length = 0;
mpm_table[MPM_AC_TILE].InitCtx = SCACTileInitCtx;
#endif
}
-#endif /* __tile__ */