]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
hwlm: move accel from hwlm build to rose
authorJustin Viiret <justin.viiret@intel.com>
Wed, 30 Nov 2016 05:55:52 +0000 (16:55 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Wed, 26 Apr 2017 04:41:29 +0000 (14:41 +1000)
CMakeLists.txt
src/hwlm/hwlm_build.cpp
src/rose/rose_build_lit_accel.cpp [new file with mode: 0644]
src/rose/rose_build_lit_accel.h [new file with mode: 0644]
src/rose/rose_build_matchers.cpp

index 7ede52b459ae734d91d779467df53adc08b30148..8fe82a7098cb179c38a04a782a09babce3c368e6 100644 (file)
@@ -936,6 +936,8 @@ SET (hs_SRCS
     src/rose/rose_build_impl.h
     src/rose/rose_build_infix.cpp
     src/rose/rose_build_infix.h
+    src/rose/rose_build_lit_accel.cpp
+    src/rose/rose_build_lit_accel.h
     src/rose/rose_build_long_lit.cpp
     src/rose/rose_build_long_lit.h
     src/rose/rose_build_lookaround.cpp
index 29e71293946925778b837cac9c6348f132f41962..a05ca1a25fca98b7d5913283d27b62c43e61f84d 100644 (file)
 /** \file
  * \brief Hamster Wheel Literal Matcher: build code.
  */
+
+#include "hwlm_build.h"
+
 #include "grey.h"
 #include "hwlm.h"
-#include "hwlm_build.h"
 #include "hwlm_internal.h"
 #include "hwlm_literal.h"
 #include "noodle_engine.h"
 #include "scratch.h"
 #include "ue2common.h"
 #include "fdr/fdr_compile.h"
-#include "nfa/shufticompile.h"
-#include "nfa/trufflecompile.h"
-#include "util/alloc.h"
-#include "util/bitutils.h"
-#include "util/charreach.h"
-#include "util/compare.h"
 #include "util/compile_context.h"
 #include "util/compile_error.h"
-#include "util/dump_charclass.h"
-#include "util/target_info.h"
 #include "util/ue2string.h"
-#include "util/verify_types.h"
 
 #include <cassert>
-#include <cstdio>
-#include <cstdlib>
 #include <cstring>
 #include <vector>
 
@@ -62,431 +53,6 @@ using namespace std;
 
 namespace ue2 {
 
-static const unsigned int MAX_ACCEL_OFFSET = 16;
-static const unsigned int MAX_SHUFTI_WIDTH = 240;
-
-static
-size_t mask_overhang(const hwlmLiteral &lit) {
-    size_t msk_true_size = lit.msk.size();
-    assert(msk_true_size <= HWLM_MASKLEN);
-    assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET);
-    for (u8 c : lit.msk) {
-        if (!c) {
-            msk_true_size--;
-        } else {
-            break;
-        }
-    }
-
-    if (lit.s.length() >= msk_true_size) {
-        return 0;
-    }
-
-    /* only short literals should be able to have a mask which overhangs */
-    assert(lit.s.length() < MAX_ACCEL_OFFSET);
-    return msk_true_size - lit.s.length();
-}
-
-static
-bool findDVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
-    const hwlmLiteral &first = *lits.front();
-
-    struct candidate {
-        candidate(void)
-            : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {}
-        candidate(const hwlmLiteral &base, u32 offset)
-            : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0),
-              b5insens(false), valid(true) {}
-        char c1;
-        char c2;
-        u32 max_offset;
-        bool b5insens;
-        bool valid;
-
-        bool operator>(const candidate &other) const {
-            if (!valid) {
-                return false;
-            }
-
-            if (!other.valid) {
-                return true;
-            }
-
-            if (other.cdiffers() && !cdiffers()) {
-                return false;
-            }
-
-            if (!other.cdiffers() && cdiffers()) {
-                return true;
-            }
-
-            if (!other.b5insens && b5insens) {
-                return false;
-            }
-
-            if (other.b5insens && !b5insens) {
-                return true;
-            }
-
-            if (max_offset > other.max_offset) {
-                return false;
-            }
-
-            return true;
-        }
-
-        bool cdiffers(void) const {
-            if (!b5insens) {
-                return c1 != c2;
-            }
-            return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR);
-        }
-    };
-
-    candidate best;
-
-    for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) {
-        candidate curr(first, i);
-
-        /* check to see if this pair appears in each string */
-        for (const auto &lit_ptr : lits) {
-            const hwlmLiteral &lit = *lit_ptr;
-            if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) {
-                curr.b5insens = true; /* no choice but to be case insensitive */
-            }
-
-            bool found = false;
-            bool found_nc = false;
-            for (u32 j = 0;
-                 !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) {
-                found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
-                found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
-                    && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
-
-                if (curr.b5insens) {
-                    found = found_nc;
-                }
-            }
-
-            if (!curr.b5insens && !found && found_nc) {
-                curr.b5insens = true;
-                found = true;
-            }
-
-            if (!found) {
-                goto next_candidate;
-            }
-        }
-
-        /* check to find the max offset where this appears */
-        for (const auto &lit_ptr : lits) {
-            const hwlmLiteral &lit = *lit_ptr;
-            for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1;
-                 j++) {
-                bool found = false;
-                if (curr.b5insens) {
-                    found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
-                     && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
-                } else {
-                    found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
-                }
-
-                if (found) {
-                    assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
-                    ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
-                    break;
-                }
-            }
-        }
-
-        if (curr > best) {
-            best = curr;
-        }
-
-    next_candidate:;
-    }
-
-    if (!best.valid) {
-        return false;
-    }
-
-    aux->dverm.offset = verify_u8(best.max_offset);
-
-    if (!best.b5insens) {
-        aux->dverm.accel_type = ACCEL_DVERM;
-        aux->dverm.c1 = best.c1;
-        aux->dverm.c2 = best.c2;
-        DEBUG_PRINTF("built dverm for %02hhx%02hhx\n",
-                     aux->dverm.c1, aux->dverm.c2);
-    } else {
-        aux->dverm.accel_type = ACCEL_DVERM_NOCASE;
-        aux->dverm.c1 = best.c1 & CASE_CLEAR;
-        aux->dverm.c2 = best.c2 & CASE_CLEAR;
-        DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n",
-                     aux->dverm.c1, aux->dverm.c2);
-    }
-    return true;
-}
-
-static
-bool findSVerm(const vector<const hwlmLiteral *> &lits, AccelAux *aux) {
-    const hwlmLiteral &first = *lits.front();
-
-    struct candidate {
-        candidate(void)
-            : c(0), max_offset(0), b5insens(false), valid(false) {}
-        candidate(const hwlmLiteral &base, u32 offset)
-            : c(base.s[offset]), max_offset(0),
-              b5insens(false), valid(true) {}
-        char c;
-        u32 max_offset;
-        bool b5insens;
-        bool valid;
-
-        bool operator>(const candidate &other) const {
-            if (!valid) {
-                return false;
-            }
-
-            if (!other.valid) {
-                return true;
-            }
-
-            if (!other.b5insens && b5insens) {
-                return false;
-            }
-
-            if (other.b5insens && !b5insens) {
-                return true;
-            }
-
-            if (max_offset > other.max_offset) {
-                return false;
-            }
-
-            return true;
-        }
-    };
-
-    candidate best;
-
-    for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) {
-        candidate curr(first, i);
-
-        /* check to see if this pair appears in each string */
-        for (const auto &lit_ptr : lits) {
-            const hwlmLiteral &lit = *lit_ptr;
-            if (lit.nocase && ourisalpha(curr.c)) {
-                curr.b5insens = true; /* no choice but to be case insensitive */
-            }
-
-            bool found = false;
-            bool found_nc = false;
-            for (u32 j = 0;
-                 !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
-                found |= curr.c == lit.s[j];
-                found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
-
-                if (curr.b5insens) {
-                    found = found_nc;
-                }
-            }
-
-            if (!curr.b5insens && !found && found_nc) {
-                curr.b5insens = true;
-                found = true;
-            }
-
-            if (!found) {
-                goto next_candidate;
-            }
-        }
-
-        /* check to find the max offset where this appears */
-        for (const auto &lit_ptr : lits) {
-            const hwlmLiteral &lit = *lit_ptr;
-            for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
-                bool found = false;
-                if (curr.b5insens) {
-                    found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
-                } else {
-                    found = curr.c == lit.s[j];
-                }
-
-                if (found) {
-                    assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
-                    ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
-                }
-            }
-        }
-
-        if (curr > best) {
-            best = curr;
-        }
-
-    next_candidate:;
-    }
-
-    if (!best.valid) {
-        return false;
-    }
-
-    if (!best.b5insens) {
-        aux->verm.accel_type = ACCEL_VERM;
-        aux->verm.c = best.c;
-        DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c);
-    } else {
-        aux->verm.accel_type = ACCEL_VERM_NOCASE;
-        aux->verm.c = best.c & CASE_CLEAR;
-        DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c);
-    }
-    aux->verm.offset = verify_u8(best.max_offset);
-
-    return true;
-}
-
-static
-void filterLits(const vector<hwlmLiteral> &lits, hwlm_group_t expected_groups,
-                vector<const hwlmLiteral *> *filtered_lits, u32 *min_len) {
-    *min_len = MAX_ACCEL_OFFSET;
-
-    for (const auto &lit : lits) {
-        if (!(lit.groups & expected_groups)) {
-            continue;
-        }
-
-        const size_t lit_len = lit.s.length();
-        if (lit_len < *min_len) {
-            *min_len = verify_u32(lit_len);
-        }
-
-        filtered_lits->push_back(&lit);
-
-#ifdef DEBUG
-        DEBUG_PRINTF("lit:");
-        for (u32 i = 0; i < lit.s.length(); i++) {
-            printf("%02hhx", lit.s[i]);
-        }
-        printf("\n");
-#endif
-    }
-}
-
-static
-bool litGuardedByCharReach(const CharReach &cr, const hwlmLiteral &lit,
-                           u32 max_offset) {
-    for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) {
-         unsigned char c = lit.s[i];
-         if (lit.nocase) {
-             if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) {
-                 return true;
-             }
-         } else {
-             if (cr.test(c)) {
-                 return true;
-             }
-         }
-    }
-
-    return false;
-}
-
-static
-void findForwardAccelScheme(const vector<hwlmLiteral> &lits,
-                            hwlm_group_t expected_groups, AccelAux *aux) {
-    DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups);
-    u32 min_len = MAX_ACCEL_OFFSET;
-    vector<const hwlmLiteral *> filtered_lits;
-
-    filterLits(lits, expected_groups, &filtered_lits, &min_len);
-    if (filtered_lits.empty()) {
-        return;
-    }
-
-    if (findDVerm(filtered_lits, aux)
-        || findSVerm(filtered_lits, aux)) {
-        return;
-    }
-
-    /* look for shufti/truffle */
-
-    vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
-    for (const auto &lit : lits) {
-        if (!(lit.groups & expected_groups)) {
-            continue;
-        }
-
-        u32 overhang = mask_overhang(lit);
-        for (u32 i = 0; i < overhang; i++) {
-            /* this offset overhangs the start of the real literal; look at the
-             * msk/cmp */
-            for (u32 j = 0; j < N_CHARS; j++) {
-                if ((j & lit.msk[i]) == lit.cmp[i]) {
-                    reach[i].set(j);
-                }
-            }
-        }
-        for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) {
-            CharReach &reach_i = reach[i];
-            u32 i_effective = i - overhang;
-
-            if (litGuardedByCharReach(reach_i, lit, i_effective)) {
-                continue;
-            }
-            unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective]
-                                                           : lit.s.back();
-            if (lit.nocase) {
-                reach_i.set(mytoupper(c));
-                reach_i.set(mytolower(c));
-            } else {
-                reach_i.set(c);
-            }
-        }
-    }
-
-    u32 min_count = ~0U;
-    u32 min_offset = ~0U;
-    for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) {
-        size_t count = reach[i].count();
-        DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
-                     describeClass(reach[i]).c_str(), count);
-        if (count < min_count) {
-            min_count = (u32)count;
-            min_offset = i;
-        }
-    }
-
-    if (min_count > MAX_SHUFTI_WIDTH) {
-        DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count);
-        return;
-    }
-
-    const CharReach &cr = reach[min_offset];
-    if (-1 !=
-        shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) {
-        DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n",
-                     describeClass(cr).c_str(), cr.count(), min_offset);
-        aux->shufti.accel_type = ACCEL_SHUFTI;
-        aux->shufti.offset = verify_u8(min_offset);
-        return;
-    }
-
-    truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2);
-    DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n",
-                 describeClass(cr).c_str(), cr.count(), min_offset);
-    aux->truffle.accel_type = ACCEL_TRUFFLE;
-    aux->truffle.offset = verify_u8(min_offset);
-}
-
-static
-void buildForwardAccel(HWLM *h, const vector<hwlmLiteral> &lits,
-                       hwlm_group_t expected_groups) {
-    findForwardAccelScheme(lits, expected_groups, &h->accel1);
-    findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0);
-
-    h->accel1_groups = expected_groups;
-}
-
 static
 void dumpLits(UNUSED const vector<hwlmLiteral> &lits) {
 #ifdef DEBUG
@@ -533,7 +99,7 @@ bool isNoodleable(const vector<hwlmLiteral> &lits,
 
 aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
                                    bool make_small, const CompileContext &cc,
-                                   hwlm_group_t expected_groups) {
+                                   UNUSED hwlm_group_t expected_groups) {
     assert(!lits.empty());
     dumpLits(lits);
 
@@ -604,10 +170,6 @@ aligned_unique_ptr<HWLM> hwlmBuild(const vector<hwlmLiteral> &lits,
     h->type = engType;
     memcpy(HWLM_DATA(h.get()), eng.get(), engSize);
 
-    if (engType == HWLM_ENGINE_FDR && cc.grey.hamsterAccelForward) {
-        buildForwardAccel(h.get(), lits, expected_groups);
-    }
-
     return h;
 }
 
diff --git a/src/rose/rose_build_lit_accel.cpp b/src/rose/rose_build_lit_accel.cpp
new file mode 100644 (file)
index 0000000..1388069
--- /dev/null
@@ -0,0 +1,471 @@
+/*
+ * Copyright (c) 2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_lit_accel.h"
+
+#include "grey.h"
+#include "ue2common.h"
+#include "hwlm/hwlm_build.h"
+#include "hwlm/hwlm_internal.h"
+#include "hwlm/hwlm_literal.h"
+#include "nfa/accel.h"
+#include "nfa/shufticompile.h"
+#include "nfa/trufflecompile.h"
+#include "util/compare.h"
+#include "util/dump_charclass.h"
+#include "util/verify_types.h"
+
+using namespace std;
+
+namespace ue2 {
+
+static const unsigned int MAX_ACCEL_OFFSET = 16;
+static const unsigned int MAX_SHUFTI_WIDTH = 240;
+
+static
+size_t mask_overhang(const AccelString &lit) {
+    size_t msk_true_size = lit.msk.size();
+    assert(msk_true_size <= HWLM_MASKLEN);
+    assert(HWLM_MASKLEN <= MAX_ACCEL_OFFSET);
+    for (u8 c : lit.msk) {
+        if (!c) {
+            msk_true_size--;
+        } else {
+            break;
+        }
+    }
+
+    if (lit.s.length() >= msk_true_size) {
+        return 0;
+    }
+
+    /* only short literals should be able to have a mask which overhangs */
+    assert(lit.s.length() < MAX_ACCEL_OFFSET);
+    return msk_true_size - lit.s.length();
+}
+
+static
+bool findDVerm(const vector<const AccelString *> &lits, AccelAux *aux) {
+    const AccelString &first = *lits.front();
+
+    struct candidate {
+        candidate(void)
+            : c1(0), c2(0), max_offset(0), b5insens(false), valid(false) {}
+        candidate(const AccelString &base, u32 offset)
+            : c1(base.s[offset]), c2(base.s[offset + 1]), max_offset(0),
+              b5insens(false), valid(true) {}
+        char c1;
+        char c2;
+        u32 max_offset;
+        bool b5insens;
+        bool valid;
+
+        bool operator>(const candidate &other) const {
+            if (!valid) {
+                return false;
+            }
+
+            if (!other.valid) {
+                return true;
+            }
+
+            if (other.cdiffers() && !cdiffers()) {
+                return false;
+            }
+
+            if (!other.cdiffers() && cdiffers()) {
+                return true;
+            }
+
+            if (!other.b5insens && b5insens) {
+                return false;
+            }
+
+            if (other.b5insens && !b5insens) {
+                return true;
+            }
+
+            if (max_offset > other.max_offset) {
+                return false;
+            }
+
+            return true;
+        }
+
+        bool cdiffers(void) const {
+            if (!b5insens) {
+                return c1 != c2;
+            }
+            return (c1 & CASE_CLEAR) != (c2 & CASE_CLEAR);
+        }
+    };
+
+    candidate best;
+
+    for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()) - 1; i++) {
+        candidate curr(first, i);
+
+        /* check to see if this pair appears in each string */
+        for (const auto &lit_ptr : lits) {
+            const AccelString &lit = *lit_ptr;
+            if (lit.nocase && (ourisalpha(curr.c1) || ourisalpha(curr.c2))) {
+                curr.b5insens = true; /* no choice but to be case insensitive */
+            }
+
+            bool found = false;
+            bool found_nc = false;
+            for (u32 j = 0;
+                 !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1; j++) {
+                found |= curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
+                found_nc |= (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
+                    && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
+
+                if (curr.b5insens) {
+                    found = found_nc;
+                }
+            }
+
+            if (!curr.b5insens && !found && found_nc) {
+                curr.b5insens = true;
+                found = true;
+            }
+
+            if (!found) {
+                goto next_candidate;
+            }
+        }
+
+        /* check to find the max offset where this appears */
+        for (const auto &lit_ptr : lits) {
+            const AccelString &lit = *lit_ptr;
+            for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()) - 1;
+                 j++) {
+                bool found = false;
+                if (curr.b5insens) {
+                    found = (curr.c1 & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR)
+                     && (curr.c2 & CASE_CLEAR) == (lit.s[j + 1] & CASE_CLEAR);
+                } else {
+                    found = curr.c1 == lit.s[j] && curr.c2 == lit.s[j + 1];
+                }
+
+                if (found) {
+                    assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
+                    ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
+                    break;
+                }
+            }
+        }
+
+        if (curr > best) {
+            best = curr;
+        }
+
+    next_candidate:;
+    }
+
+    if (!best.valid) {
+        return false;
+    }
+
+    aux->dverm.offset = verify_u8(best.max_offset);
+
+    if (!best.b5insens) {
+        aux->dverm.accel_type = ACCEL_DVERM;
+        aux->dverm.c1 = best.c1;
+        aux->dverm.c2 = best.c2;
+        DEBUG_PRINTF("built dverm for %02hhx%02hhx\n",
+                     aux->dverm.c1, aux->dverm.c2);
+    } else {
+        aux->dverm.accel_type = ACCEL_DVERM_NOCASE;
+        aux->dverm.c1 = best.c1 & CASE_CLEAR;
+        aux->dverm.c2 = best.c2 & CASE_CLEAR;
+        DEBUG_PRINTF("built dverm nc for %02hhx%02hhx\n",
+                     aux->dverm.c1, aux->dverm.c2);
+    }
+    return true;
+}
+
+static
+bool findSVerm(const vector<const AccelString *> &lits, AccelAux *aux) {
+    const AccelString &first = *lits.front();
+
+    struct candidate {
+        candidate(void)
+            : c(0), max_offset(0), b5insens(false), valid(false) {}
+        candidate(const AccelString &base, u32 offset)
+            : c(base.s[offset]), max_offset(0),
+              b5insens(false), valid(true) {}
+        char c;
+        u32 max_offset;
+        bool b5insens;
+        bool valid;
+
+        bool operator>(const candidate &other) const {
+            if (!valid) {
+                return false;
+            }
+
+            if (!other.valid) {
+                return true;
+            }
+
+            if (!other.b5insens && b5insens) {
+                return false;
+            }
+
+            if (other.b5insens && !b5insens) {
+                return true;
+            }
+
+            if (max_offset > other.max_offset) {
+                return false;
+            }
+
+            return true;
+        }
+    };
+
+    candidate best;
+
+    for (u32 i = 0; i < MIN(MAX_ACCEL_OFFSET, first.s.length()); i++) {
+        candidate curr(first, i);
+
+        /* check to see if this pair appears in each string */
+        for (const auto &lit_ptr : lits) {
+            const AccelString &lit = *lit_ptr;
+            if (lit.nocase && ourisalpha(curr.c)) {
+                curr.b5insens = true; /* no choice but to be case insensitive */
+            }
+
+            bool found = false;
+            bool found_nc = false;
+            for (u32 j = 0;
+                 !found && j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
+                found |= curr.c == lit.s[j];
+                found_nc |= (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
+
+                if (curr.b5insens) {
+                    found = found_nc;
+                }
+            }
+
+            if (!curr.b5insens && !found && found_nc) {
+                curr.b5insens = true;
+                found = true;
+            }
+
+            if (!found) {
+                goto next_candidate;
+            }
+        }
+
+        /* check to find the max offset where this appears */
+        for (const auto &lit_ptr : lits) {
+            const AccelString &lit = *lit_ptr;
+            for (u32 j = 0; j < MIN(MAX_ACCEL_OFFSET, lit.s.length()); j++) {
+                bool found = false;
+                if (curr.b5insens) {
+                    found = (curr.c & CASE_CLEAR) == (lit.s[j] & CASE_CLEAR);
+                } else {
+                    found = curr.c == lit.s[j];
+                }
+
+                if (found) {
+                    assert(j + mask_overhang(lit) <= MAX_ACCEL_OFFSET);
+                    ENSURE_AT_LEAST(&curr.max_offset, j + mask_overhang(lit));
+                }
+            }
+        }
+
+        if (curr > best) {
+            best = curr;
+        }
+
+    next_candidate:;
+    }
+
+    if (!best.valid) {
+        return false;
+    }
+
+    if (!best.b5insens) {
+        aux->verm.accel_type = ACCEL_VERM;
+        aux->verm.c = best.c;
+        DEBUG_PRINTF("built verm for %02hhx\n", aux->verm.c);
+    } else {
+        aux->verm.accel_type = ACCEL_VERM_NOCASE;
+        aux->verm.c = best.c & CASE_CLEAR;
+        DEBUG_PRINTF("built verm nc for %02hhx\n", aux->verm.c);
+    }
+    aux->verm.offset = verify_u8(best.max_offset);
+
+    return true;
+}
+
+static
+void filterLits(const vector<AccelString> &lits, hwlm_group_t expected_groups,
+                vector<const AccelString *> *filtered_lits, u32 *min_len) {
+    *min_len = MAX_ACCEL_OFFSET;
+
+    for (const auto &lit : lits) {
+        if (!(lit.groups & expected_groups)) {
+            continue;
+        }
+
+        const size_t lit_len = lit.s.length();
+        if (lit_len < *min_len) {
+            *min_len = verify_u32(lit_len);
+        }
+
+        filtered_lits->push_back(&lit);
+
+#ifdef DEBUG
+        DEBUG_PRINTF("lit:");
+        for (u32 i = 0; i < lit.s.length(); i++) {
+            printf("%02hhx", lit.s[i]);
+        }
+        printf("\n");
+#endif
+    }
+}
+
+static
+bool litGuardedByCharReach(const CharReach &cr, const AccelString &lit,
+                           u32 max_offset) {
+    for (u32 i = 0; i <= max_offset && i < lit.s.length(); i++) {
+         unsigned char c = lit.s[i];
+         if (lit.nocase) {
+             if (cr.test(mytoupper(c)) && cr.test(mytolower(c))) {
+                 return true;
+             }
+         } else {
+             if (cr.test(c)) {
+                 return true;
+             }
+         }
+    }
+
+    return false;
+}
+
+static
+void findForwardAccelScheme(const vector<AccelString> &lits,
+                            hwlm_group_t expected_groups, AccelAux *aux) {
+    DEBUG_PRINTF("building accel expected=%016llx\n", expected_groups);
+    u32 min_len = MAX_ACCEL_OFFSET;
+    vector<const AccelString *> filtered_lits;
+
+    filterLits(lits, expected_groups, &filtered_lits, &min_len);
+    if (filtered_lits.empty()) {
+        return;
+    }
+
+    if (findDVerm(filtered_lits, aux)
+        || findSVerm(filtered_lits, aux)) {
+        return;
+    }
+
+    /* look for shufti/truffle */
+
+    vector<CharReach> reach(MAX_ACCEL_OFFSET, CharReach());
+    for (const auto &lit : lits) {
+        if (!(lit.groups & expected_groups)) {
+            continue;
+        }
+
+        u32 overhang = mask_overhang(lit);
+        for (u32 i = 0; i < overhang; i++) {
+            /* this offset overhangs the start of the real literal; look at the
+             * msk/cmp */
+            for (u32 j = 0; j < N_CHARS; j++) {
+                if ((j & lit.msk[i]) == lit.cmp[i]) {
+                    reach[i].set(j);
+                }
+            }
+        }
+        for (u32 i = overhang; i < MAX_ACCEL_OFFSET; i++) {
+            CharReach &reach_i = reach[i];
+            u32 i_effective = i - overhang;
+
+            if (litGuardedByCharReach(reach_i, lit, i_effective)) {
+                continue;
+            }
+            unsigned char c = i_effective < lit.s.length() ? lit.s[i_effective]
+                                                           : lit.s.back();
+            if (lit.nocase) {
+                reach_i.set(mytoupper(c));
+                reach_i.set(mytolower(c));
+            } else {
+                reach_i.set(c);
+            }
+        }
+    }
+
+    u32 min_count = ~0U;
+    u32 min_offset = ~0U;
+    for (u32 i = 0; i < MAX_ACCEL_OFFSET; i++) {
+        size_t count = reach[i].count();
+        DEBUG_PRINTF("offset %u is %s (reach %zu)\n", i,
+                     describeClass(reach[i]).c_str(), count);
+        if (count < min_count) {
+            min_count = (u32)count;
+            min_offset = i;
+        }
+    }
+
+    if (min_count > MAX_SHUFTI_WIDTH) {
+        DEBUG_PRINTF("FAIL: min shufti with %u chars is too wide\n", min_count);
+        return;
+    }
+
+    const CharReach &cr = reach[min_offset];
+    if (-1 !=
+        shuftiBuildMasks(cr, (u8 *)&aux->shufti.lo, (u8 *)&aux->shufti.hi)) {
+        DEBUG_PRINTF("built shufti for %s (%zu chars, offset %u)\n",
+                     describeClass(cr).c_str(), cr.count(), min_offset);
+        aux->shufti.accel_type = ACCEL_SHUFTI;
+        aux->shufti.offset = verify_u8(min_offset);
+        return;
+    }
+
+    truffleBuildMasks(cr, (u8 *)&aux->truffle.mask1, (u8 *)&aux->truffle.mask2);
+    DEBUG_PRINTF("built truffle for %s (%zu chars, offset %u)\n",
+                 describeClass(cr).c_str(), cr.count(), min_offset);
+    aux->truffle.accel_type = ACCEL_TRUFFLE;
+    aux->truffle.offset = verify_u8(min_offset);
+}
+
+void buildForwardAccel(HWLM *h, const vector<AccelString> &lits,
+                       hwlm_group_t expected_groups) {
+    findForwardAccelScheme(lits, expected_groups, &h->accel1);
+    findForwardAccelScheme(lits, HWLM_ALL_GROUPS, &h->accel0);
+
+    h->accel1_groups = expected_groups;
+}
+
+} // namespace ue2
diff --git a/src/rose/rose_build_lit_accel.h b/src/rose/rose_build_lit_accel.h
new file mode 100644 (file)
index 0000000..f0c0143
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_LIT_ACCEL_H
+#define ROSE_BUILD_LIT_ACCEL_H
+
+#include "hwlm/hwlm.h"
+
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+struct HWLM;
+
+namespace ue2 {
+
+struct AccelString {
+    AccelString(std::string s_in, bool nocase_in, std::vector<u8> msk_in,
+                std::vector<u8> cmp_in, hwlm_group_t groups_in)
+        : s(std::move(s_in)), nocase(nocase_in), msk(std::move(msk_in)),
+          cmp(std::move(cmp_in)), groups(groups_in) {}
+
+    std::string s;
+    bool nocase;
+    std::vector<u8> msk;
+    std::vector<u8> cmp;
+    hwlm_group_t groups;
+
+    bool operator==(const AccelString &a) const {
+        return s == a.s && nocase == a.nocase && msk == a.msk && cmp == a.cmp &&
+               groups == a.groups;
+    }
+
+    bool operator<(const AccelString &a) const {
+        return std::tie(s, nocase, msk, cmp, groups) <
+               std::tie(a.s, a.nocase, a.msk, a.cmp, a.groups);
+    }
+};
+
+void buildForwardAccel(HWLM *h, const std::vector<AccelString> &lits,
+                       hwlm_group_t expected_groups);
+
+} // namespace ue2
+
+#endif // ROSE_BUILD_LIT_ACCEL_H
index c51905cac1f5e2c83e6e02bcc9fda5008b303df0..8d6f68df4217063437485653038b0a08dca6b475 100644 (file)
@@ -34,6 +34,7 @@
 #include "rose_build_matchers.h"
 
 #include "rose_build_impl.h"
+#include "rose_build_lit_accel.h"
 #include "rose_build_width.h"
 #include "hwlm/hwlm_build.h"
 #include "hwlm/hwlm_literal.h"
@@ -801,6 +802,8 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
         throw CompileError("Unable to generate bytecode.");
     }
 
+    buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups());
+
     if (build.cc.streaming) {
         DEBUG_PRINTF("history_required=%zu\n", mp.history_required);
         assert(mp.history_required <= build.cc.grey.maxHistoryAvailable);
@@ -866,6 +869,8 @@ buildSmallBlockMatcher(const RoseBuildImpl &build,
         throw CompileError("Unable to generate bytecode.");
     }
 
+    buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups());
+
     *sbsize = hwlmSize(hwlm.get());
     assert(*sbsize);
     DEBUG_PRINTF("built small block literal table size %zu bytes\n", *sbsize);
@@ -893,6 +898,8 @@ buildEodAnchoredMatcher(const RoseBuildImpl &build,
         throw CompileError("Unable to generate bytecode.");
     }
 
+    buildForwardAccel(hwlm.get(), mp.lits, build.getInitialGroups());
+
     *esize = hwlmSize(hwlm.get());
     assert(*esize);
     DEBUG_PRINTF("built eod-anchored literal table size %zu bytes\n", *esize);