]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
Use fatbit for anch log, delay slots in scratch
authorJustin Viiret <justin.viiret@intel.com>
Thu, 7 Jan 2016 00:56:57 +0000 (11:56 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Tue, 1 Mar 2016 00:24:17 +0000 (11:24 +1100)
Since these structures are in scratch, they do not have to be as small
as possible and we can use fatbit instead of multibit to improve
performance.

src/rose/catchup.c
src/rose/match.c
src/rose/match.h
src/rose/program_runtime.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_dump.cpp
src/rose/rose_internal.h
src/scratch.c
src/scratch.h
src/util/fatbit.h

index d1ef41ff1a9ea4a79d2cb77622f0b9714e07c458..6893df0eb8e35b786b3d56e83b83c3243d477d45 100644 (file)
@@ -105,13 +105,13 @@ void nextAnchoredMatch(const struct RoseEngine *t, struct RoseContext *tctxt,
     assert(tctxt->curr_anchored_loc != MMB_INVALID);
 
     struct hs_scratch *scratch = tctxtToScratch(tctxt);
-    u8 **anchoredRows = getAnchoredLog(scratch);
+    struct fatbit **anchoredRows = getAnchoredLog(scratch);
 
     u32 region_width = t->anchoredMatches;
-    u8 *curr_row = anchoredRows[tctxt->curr_anchored_loc];
+    struct fatbit *curr_row = anchoredRows[tctxt->curr_anchored_loc];
 
-    tctxt->curr_row_offset = mmbit_iterate(curr_row, region_width,
-                                           tctxt->curr_row_offset);
+    tctxt->curr_row_offset = fatbit_iterate(curr_row, region_width,
+                                            tctxt->curr_row_offset);
     DEBUG_PRINTF("next %u [idx = %u] @%llu\n", *reportId,
                  tctxt->curr_row_offset, *end);
     if (tctxt->curr_row_offset != MMB_INVALID) {
@@ -132,8 +132,8 @@ void nextAnchoredMatch(const struct RoseEngine *t, struct RoseContext *tctxt,
     assert(tctxt->curr_anchored_loc < scratch->anchored_region_len);
     curr_row = anchoredRows[tctxt->curr_anchored_loc];
 
-    tctxt->curr_row_offset = mmbit_iterate(curr_row, region_width,
-                                           MMB_INVALID);
+    tctxt->curr_row_offset = fatbit_iterate(curr_row, region_width,
+                                            MMB_INVALID);
     assert(tctxt->curr_row_offset != MMB_INVALID);
 
     *end = tctxt->curr_anchored_loc + t->maxSafeAnchoredDROffset + 1;
index f614423b7f99f9aefe0d236d8ad211caa4014f61..6397b90eed9acf3515f9928b0e9ef3dee93b772f 100644 (file)
@@ -125,7 +125,7 @@ void recordAnchoredMatch(struct RoseContext *tctxt, ReportID reportId,
                          u64a end) {
     struct hs_scratch *scratch = tctxtToScratch(tctxt);
     const struct RoseEngine *t = scratch->core_info.rose;
-    u8 **anchoredRows = getAnchoredLog(scratch);
+    struct fatbit **anchoredRows = getAnchoredLog(scratch);
 
     DEBUG_PRINTF("record %u @ %llu\n", reportId, end);
     assert(end - t->maxSafeAnchoredDROffset >= 1);
@@ -135,13 +135,13 @@ void recordAnchoredMatch(struct RoseContext *tctxt, ReportID reportId,
 
     if (!bf64_set(&scratch->am_log_sum, adj_end)) {
         // first time, clear row
-        mmbit_clear(anchoredRows[adj_end], t->anchoredMatches);
+        fatbit_clear(anchoredRows[adj_end]);
     }
 
     u32 idx = getAnchoredInverseMap(t)[reportId];
     DEBUG_PRINTF("record %u @ %llu index %u\n", reportId, end, idx);
     assert(idx < t->anchoredMatches);
-    mmbit_set(anchoredRows[adj_end], t->anchoredMatches, idx);
+    fatbit_set(anchoredRows[adj_end], t->anchoredMatches, idx);
 }
 
 static rose_inline
@@ -150,21 +150,21 @@ void recordAnchoredLiteralMatch(struct RoseContext *tctxt, u32 literal_id,
     assert(end);
     struct hs_scratch *scratch = tctxtToScratch(tctxt);
     const struct RoseEngine *t = scratch->core_info.rose;
-    u8 **anchoredLiteralRows = getAnchoredLiteralLog(scratch);
+    struct fatbit **anchoredLiteralRows = getAnchoredLiteralLog(scratch);
 
     DEBUG_PRINTF("record %u @ %llu\n", literal_id, end);
 
     if (!bf64_set(&scratch->al_log_sum, end - 1)) {
         // first time, clear row
         DEBUG_PRINTF("clearing %llu/%u\n", end - 1, t->anchored_count);
-        mmbit_clear(anchoredLiteralRows[end - 1], t->anchored_count);
+        fatbit_clear(anchoredLiteralRows[end - 1]);
     }
 
     u32 rel_idx = literal_id - t->anchored_base_id;
     DEBUG_PRINTF("record %u @ %llu index %u/%u\n", literal_id, end, rel_idx,
                  t->anchored_count);
     assert(rel_idx < t->anchored_count);
-    mmbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx);
+    fatbit_set(anchoredLiteralRows[end - 1], t->anchored_count, rel_idx);
 }
 
 hwlmcb_rv_t roseHandleChainMatch(const struct RoseEngine *t, ReportID r,
@@ -447,11 +447,11 @@ hwlmcb_rv_t roseProcessMainMatch(const struct RoseEngine *t, u64a end,
 
 static rose_inline
 hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt,
-                          const u8 *delaySlotBase, size_t delaySlotSize,
-                          u32 vicIndex, u64a offset) {
+                          struct fatbit **delaySlots, u32 vicIndex,
+                          u64a offset) {
     /* assert(!tctxt->in_anchored); */
     assert(vicIndex < DELAY_SLOT_COUNT);
-    const u8 *vicSlot = delaySlotBase + delaySlotSize * vicIndex;
+    const struct fatbit *vicSlot = delaySlots[vicIndex];
     u32 delay_count = t->delay_count;
 
     if (offset < t->floatingMinLiteralMatchOffset) {
@@ -463,8 +463,8 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt,
     roseFlushLastByteHistory(t, scratch->core_info.state, offset, tctxt);
     tctxt->lastEndOffset = offset;
 
-    for (u32 it = mmbit_iterate(vicSlot, delay_count, MMB_INVALID);
-         it != MMB_INVALID; it = mmbit_iterate(vicSlot, delay_count, it)) {
+    for (u32 it = fatbit_iterate(vicSlot, delay_count, MMB_INVALID);
+         it != MMB_INVALID; it = fatbit_iterate(vicSlot, delay_count, it)) {
         u32 literal_id = t->delay_base_id + it;
 
         UNUSED rose_group old_groups = tctxt->groups;
@@ -490,12 +490,13 @@ hwlmcb_rv_t playDelaySlot(const struct RoseEngine *t, struct RoseContext *tctxt,
 static really_inline
 hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t,
                                       struct RoseContext *tctxt, u32 curr_loc) {
-    u8 *curr_row = getAnchoredLiteralLog(tctxtToScratch(tctxt))[curr_loc - 1];
+    struct hs_scratch *scratch = tctxtToScratch(tctxt);
+    struct fatbit *curr_row = getAnchoredLiteralLog(scratch)[curr_loc - 1];
     u32 region_width = t->anchored_count;
 
     DEBUG_PRINTF("report matches at curr loc\n");
-    for (u32 it = mmbit_iterate(curr_row, region_width, MMB_INVALID);
-         it != MMB_INVALID; it = mmbit_iterate(curr_row, region_width, it)) {
+    for (u32 it = fatbit_iterate(curr_row, region_width, MMB_INVALID);
+         it != MMB_INVALID; it = fatbit_iterate(curr_row, region_width, it)) {
         DEBUG_PRINTF("it = %u/%u\n", it, region_width);
         u32 literal_id = t->anchored_base_id + it;
 
@@ -519,7 +520,6 @@ hwlmcb_rv_t flushAnchoredLiteralAtLoc(const struct RoseEngine *t,
     }
 
     /* clear row; does not invalidate iteration */
-    struct hs_scratch *scratch = tctxtToScratch(tctxt);
     bf64_unset(&scratch->al_log_sum, curr_loc - 1);
 
     return HWLM_CONTINUE_MATCHING;
@@ -566,7 +566,7 @@ hwlmcb_rv_t flushAnchoredLiterals(const struct RoseEngine *t,
 static really_inline
 hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct RoseContext *tctxt,
                         u32 *anchored_it, u64a lastEnd, u64a victimDelaySlots,
-                        u8 *delaySlotBase, size_t delaySlotSize) {
+                        struct fatbit **delaySlots) {
     /* assert (!tctxt->in_anchored); */
 
     while (victimDelaySlots) {
@@ -579,9 +579,8 @@ hwlmcb_rv_t playVictims(const struct RoseEngine *t, struct RoseContext *tctxt,
             return HWLM_TERMINATE_MATCHING;
         }
 
-        if (playDelaySlot(t, tctxt, delaySlotBase, delaySlotSize,
-                          vic % DELAY_SLOT_COUNT, vicOffset)
-            == HWLM_TERMINATE_MATCHING) {
+        if (playDelaySlot(t, tctxt, delaySlots, vic % DELAY_SLOT_COUNT,
+                          vicOffset) == HWLM_TERMINATE_MATCHING) {
             return HWLM_TERMINATE_MATCHING;
         }
     }
@@ -609,8 +608,7 @@ hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) {
     }
 
     {
-        u8 *delaySlotBase = getDelaySlots(scratch);
-        size_t delaySlotSize = t->delay_slot_size;
+        struct fatbit **delaySlots = getDelaySlots(tctxtToScratch(tctxt));
 
         u32 lastIndex = lastEnd & DELAY_MASK;
         u32 currIndex = currEnd & DELAY_MASK;
@@ -664,8 +662,7 @@ hwlmcb_rv_t flushQueuedLiterals_i(struct RoseContext *tctxt, u64a currEnd) {
         }
 
         if (playVictims(t, tctxt, &anchored_it, lastEnd, victimDelaySlots,
-                        delaySlotBase, delaySlotSize)
-            == HWLM_TERMINATE_MATCHING) {
+                        delaySlots) == HWLM_TERMINATE_MATCHING) {
             return HWLM_TERMINATE_MATCHING;
         }
     }
index f3b8fe73c8845dcb6a646c737d4a2245ee86dfca..2b6dfb5d17d164d0c26166ef8adac0b5f6647d8d 100644 (file)
@@ -40,6 +40,7 @@
 #include "nfa/nfa_api_util.h"
 #include "som/som_runtime.h"
 #include "util/bitutils.h"
+#include "util/fatbit.h"
 #include "util/internal_report.h"
 #include "util/multibit.h"
 
@@ -60,16 +61,16 @@ int roseAnchoredCallback(u64a end, u32 id, void *ctx);
 
 static rose_inline
 void resetAnchoredLog(const struct RoseEngine *t, struct hs_scratch *scratch) {
-    u8 **anchoredRows = getAnchoredLog(scratch);
+    struct fatbit **anchoredRows = getAnchoredLog(scratch);
     u32 region_width = t->anchoredMatches;
     struct RoseContext *tctxt = &scratch->tctxt;
 
     tctxt->curr_anchored_loc = bf64_iterate(scratch->am_log_sum, MMB_INVALID);
     if (tctxt->curr_anchored_loc != MMB_INVALID) {
         assert(tctxt->curr_anchored_loc < scratch->anchored_region_len);
-        u8 *curr_row = anchoredRows[tctxt->curr_anchored_loc];
-        tctxt->curr_row_offset = mmbit_iterate(curr_row, region_width,
-                                               MMB_INVALID);
+        struct fatbit *curr_row = anchoredRows[tctxt->curr_anchored_loc];
+        tctxt->curr_row_offset = fatbit_iterate(curr_row, region_width,
+                                                MMB_INVALID);
         assert(tctxt->curr_row_offset != MMB_INVALID);
     }
     DEBUG_PRINTF("AL reset --> %u, %u\n", tctxt->curr_anchored_loc,
index e8e60c7ff8c8fc893dd19ec7c9d208583be6fc0c..309fee5ba237433b2ab2413edc6a11cdbf78432c 100644 (file)
@@ -127,16 +127,16 @@ void rosePushDelayedMatch(const struct RoseEngine *t, u32 delay,
     }
 
     const u32 delay_count = t->delay_count;
-    u8 *slot = getDelaySlots(tctxtToScratch(tctxt)) +
-               (t->delay_slot_size * slot_index);
+    struct fatbit **delaySlots = getDelaySlots(tctxtToScratch(tctxt));
+    struct fatbit *slot = delaySlots[slot_index];
 
     DEBUG_PRINTF("pushing tab %u into slot %u\n", delay_index, slot_index);
     if (!(tctxt->filledDelayedSlots & (1U << slot_index))) {
         tctxt->filledDelayedSlots |= 1U << slot_index;
-        mmbit_clear(slot, delay_count);
+        fatbit_clear(slot);
     }
 
-    mmbit_set(slot, delay_count, delay_index);
+    fatbit_set(slot, delay_count, delay_index);
 }
 
 static rose_inline
index 45af3bb73fcb83e2a88dc15a637489fc9d82960a..c640f0912ea5affe2f0e1d04c22d360b0693618a 100644 (file)
@@ -4311,7 +4311,6 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
 
     u32 delay_count = verify_u32(final_id_to_literal.size() - delay_base_id);
     engine->delay_count = delay_count;
-    engine->delay_slot_size = mmbit_size(delay_count);
     engine->delay_base_id = delay_base_id;
     engine->anchored_base_id = anchored_base_id;
     engine->anchored_count = delay_base_id - anchored_base_id;
index cd70c734cddb672586fe73a7edab9cb508ac8c0b..25ec7bae66dca36da9b3f6c4d80ae523f0336b31 100644 (file)
@@ -884,7 +884,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
     DUMP_U32(t, size);
     DUMP_U32(t, anchoredMatches);
     DUMP_U32(t, delay_count);
-    DUMP_U32(t, delay_slot_size);
     DUMP_U32(t, delay_base_id);
     DUMP_U32(t, anchored_count);
     DUMP_U32(t, anchored_base_id);
index c9025600833d7f6e4bbdca631a5cc2e607667e47..a1f91cd3991ca71f3e11e9f65f051941ddcc1bb0 100644 (file)
@@ -447,7 +447,6 @@ struct RoseEngine {
     u32 size; // (bytes)
     u32 anchoredMatches; /* number of anchored roles generating matches */
     u32 delay_count; /* number of delayed literal ids. */
-    u32 delay_slot_size; /* size of delay slot mmbit. */
     u32 delay_base_id; /* literal id of the first delayed literal.
                         * delayed literal ids are contiguous */
     u32 anchored_count; /* number of anchored literal ids */
index 30241ab49aa856263b41243ddc9a211931df6f07..eff2289af596d79520ed4f4209b1070711309292 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -74,14 +74,16 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
     assert(anchored_literal_region_len < 8 * sizeof(s->am_log_sum));
 
     size_t anchored_region_size = anchored_region_len
-        * (mmbit_size(anchored_region_width) + sizeof(u8 *));
+        * (fatbit_size(anchored_region_width) + sizeof(struct fatbit *));
     anchored_region_size = ROUNDUP_N(anchored_region_size, 8);
 
     size_t anchored_literal_region_size = anchored_literal_region_len
-        * (mmbit_size(anchored_literal_region_width) + sizeof(u8 *));
+        * (fatbit_size(anchored_literal_region_width) + sizeof(struct fatbit *));
     anchored_literal_region_size = ROUNDUP_N(anchored_literal_region_size, 8);
 
-    size_t delay_size = mmbit_size(proto->delay_count) * DELAY_SLOT_COUNT;
+    size_t delay_region_size = DELAY_SLOT_COUNT *
+        (fatbit_size(proto->delay_count) + sizeof(struct fatbit *));
+    delay_region_size = ROUNDUP_N(delay_region_size, 8);
 
     size_t nfa_context_size = 2 * sizeof(struct NFAContext512) + 127;
 
@@ -96,7 +98,8 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
                   + 2 * fatbit_size(deduperCount) /* ditto som logs */
                   + 2 * sizeof(u64a) * deduperCount /* start offsets for som */
                   + anchored_region_size
-                  + anchored_literal_region_size + qmpq_size + delay_size
+                  + anchored_literal_region_size + qmpq_size
+                  + delay_region_size
                   + som_store_size
                   + som_now_size
                   + som_attempted_size
@@ -140,23 +143,28 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) {
     s->som_attempted_store = (u64a *)current;
     current += som_attempted_store_size;
 
-    s->delay_slots = (u8 *)current;
-    current += delay_size;
+    current = ROUNDUP_PTR(current, 8);
+    s->delay_slots = (struct fatbit **)current;
+    current += sizeof(struct fatbit *) * DELAY_SLOT_COUNT;
+    for (u32 i = 0; i < DELAY_SLOT_COUNT; i++) {
+        s->delay_slots[i] = (struct fatbit *)current;
+        current += fatbit_size(proto->delay_count);
+    }
 
     current = ROUNDUP_PTR(current, 8);
-    s->am_log = (u8 **)current;
-    current += sizeof(u8 *) * anchored_region_len;
+    s->am_log = (struct fatbit **)current;
+    current += sizeof(struct fatbit *) * anchored_region_len;
     for (u32 i = 0; i < anchored_region_len; i++) {
-        s->am_log[i] = (u8 *)current;
-        current += mmbit_size(anchored_region_width);
+        s->am_log[i] = (struct fatbit *)current;
+        current += fatbit_size(anchored_region_width);
     }
 
     current = ROUNDUP_PTR(current, 8);
-    s->al_log = (u8 **)current;
-    current += sizeof(u8 *) * anchored_literal_region_len;
+    s->al_log = (struct fatbit **)current;
+    current += sizeof(struct fatbit *) * anchored_literal_region_len;
     for (u32 i = 0; i < anchored_literal_region_len; i++) {
-        s->al_log[i] = (u8 *)current;
-        current += mmbit_size(anchored_literal_region_width);
+        s->al_log[i] = (struct fatbit *)current;
+        current += fatbit_size(anchored_literal_region_width);
     }
 
     current = ROUNDUP_PTR(current, 8);
index f23ff5dcf528dacb22ccbb412f21f32c63747286..fa112a568871b1a40ab539ca637c3a38c090f748 100644 (file)
@@ -37,7 +37,6 @@
 #define SCRATCH_H_DA6D4FC06FF410
 
 #include "ue2common.h"
-#include "util/multibit_internal.h"
 #include "rose/rose_types.h"
 
 #ifdef __cplusplus
@@ -133,7 +132,7 @@ struct RoseContext {
 
 struct match_deduper {
     struct fatbit *log[2]; /**< even, odd logs */
-    struct fatbit *som_log[2]; /**< even, odd mmbit logs for som */
+    struct fatbit *som_log[2]; /**< even, odd fatbit logs for som */
     u64a *som_start_log[2]; /**< even, odd start offset logs for som */
     u32 log_size;
     u64a current_report_offset;
@@ -162,9 +161,9 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
     struct mq *queues;
     struct fatbit *aqa; /**< active queue array; fatbit of queues that are valid
                          * & active */
-    u8 *delay_slots;
-    u8 **am_log;
-    u8 **al_log;
+    struct fatbit **delay_slots;
+    struct fatbit **am_log;
+    struct fatbit **al_log;
     u64a am_log_sum;
     u64a al_log_sum;
     struct catchup_pq catchup_pq;
@@ -178,7 +177,7 @@ struct ALIGN_CL_DIRECTIVE hs_scratch {
     u32 scratchSize;
     u8 ALIGN_DIRECTIVE fdr_temp_buf[FDR_TEMP_BUF_SIZE];
     u32 handledKeyCount;
-    struct fatbit *handled_roles; /**< mmbit of ROLES (not states) already
+    struct fatbit *handled_roles; /**< fatbit of ROLES (not states) already
                                    * handled by this literal */
     u64a *som_store; /**< array of som locations */
     u64a *som_attempted_store; /**< array of som locations for fail stores */
@@ -198,18 +197,18 @@ struct hs_scratch *tctxtToScratch(struct RoseContext *tctxt) {
 }
 
 static really_inline
-u8 **getAnchoredLog(struct hs_scratch *scratch) { /* array of mmbit ptr */
+struct fatbit **getAnchoredLog(struct hs_scratch *scratch) {
     return scratch->am_log;
 }
 
-/* array of mmbit ptr; TODO: why not an array of mmbits? */
+/* array of fatbit ptr; TODO: why not an array of fatbits? */
 static really_inline
-u8 **getAnchoredLiteralLog(struct hs_scratch *scratch) {
+struct fatbit **getAnchoredLiteralLog(struct hs_scratch *scratch) {
     return scratch->al_log;
 }
 
 static really_inline
-u8 *getDelaySlots(struct hs_scratch *scratch) {
+struct fatbit **getDelaySlots(struct hs_scratch *scratch) {
     return scratch->delay_slots;
 }
 
index cf90626956d29a6cd8c37c905a8fd0895cf09265..ad607638609ee4b7d37a173a77950af2c3fd94a0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -58,21 +58,25 @@ void fatbit_clear(struct fatbit *bits) {
 
 static really_inline
 char fatbit_set(struct fatbit *bits, u32 total_bits, u32 key) {
+    assert(ISALIGNED(bits));
     return mmbit_set(bits->fb_int.raw, total_bits, key);
 }
 
 static really_inline
 void fatbit_unset(struct fatbit *bits, u32 total_bits, u32 key) {
+    assert(ISALIGNED(bits));
      mmbit_unset(bits->fb_int.raw, total_bits, key);
 }
 
 static really_inline
 char fatbit_isset(const struct fatbit *bits, u32 total_bits, u32 key) {
+    assert(ISALIGNED(bits));
     return mmbit_isset(bits->fb_int.raw, total_bits, key);
 }
 
 static really_inline
 u32 fatbit_iterate(const struct fatbit *bits, u32 total_bits, u32 it_in) {
+    assert(ISALIGNED(bits));
     /* TODO: iterate_flat could be specialised as we don't have to worry about
      * partial blocks. */
     return mmbit_iterate(bits->fb_int.raw, total_bits, it_in);