]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
raise the limit of strings in double shufti
authorAlex Coyte <a.coyte@intel.com>
Mon, 4 Apr 2016 05:54:09 +0000 (15:54 +1000)
committerMatthew Barr <matthew.barr@intel.com>
Wed, 20 Apr 2016 03:34:56 +0000 (13:34 +1000)
src/nfa/accelcompile.cpp
src/nfa/mcclellancompile.cpp
src/nfa/mcclellancompile_accel.cpp
src/nfagraph/ng_limex_accel.cpp
src/nfagraph/ng_limex_accel.h

index a9281c132a914218bbd9746e78934ac6cfdd3403..75960dda0edbacc1c5c16ec0158ff60fdfdc1a28 100644 (file)
@@ -207,17 +207,14 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
         }
     }
 
-    if (outs1 + outs2 <= 8) {
-        if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438.
-            DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu"
-                         " two-byte literals\n", outs1, outs2);
-            aux->accel_type = ACCEL_DSHUFTI;
-            aux->dshufti.offset = offset;
-            shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2,
-                                   &aux->dshufti.lo1,
-                                   &aux->dshufti.hi1,
-                                   &aux->dshufti.lo2,
-                                   &aux->dshufti.hi2);
+    if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438.
+        DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu"
+                     " two-byte literals\n", outs1, outs2);
+        aux->accel_type = ACCEL_DSHUFTI;
+        aux->dshufti.offset = offset;
+        if (shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2,
+                                   &aux->dshufti.lo1, &aux->dshufti.hi1,
+                                   &aux->dshufti.lo2, &aux->dshufti.hi2)) {
             return;
         }
     }
index 87eed25012d91b951798558a9b2f4180bc98d759..b441873094a228fc66ca01a20d2fc739cfdae83b 100644 (file)
@@ -130,7 +130,6 @@ mstate_aux *getAux(NFA *n, dstate_id_t i) {
 static
 bool double_byte_ok(const escape_info &info) {
     return !info.outs2_broken
-        && info.outs2_single.count() + info.outs2.size() <= 8
         && info.outs2_single.count() < info.outs2.size()
         && info.outs2_single.count() <= 2 && !info.outs2.empty();
 }
@@ -256,14 +255,12 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
         }
     }
 
-    if (double_byte_ok(info)) {
+    if (double_byte_ok(info)
+        && shuftiBuildDoubleMasks(info.outs2_single, info.outs2,
+                                  &accel->dshufti.lo1, &accel->dshufti.hi1,
+                                  &accel->dshufti.lo2, &accel->dshufti.hi2)) {
         accel->accel_type = ACCEL_DSHUFTI;
         accel->dshufti.offset = verify_u8(info.outs2_offset);
-        shuftiBuildDoubleMasks(info.outs2_single, info.outs2,
-                               &accel->dshufti.lo1,
-                               &accel->dshufti.hi1,
-                               &accel->dshufti.lo2,
-                               &accel->dshufti.hi2);
         DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
         return;
     }
index 471d0d53b7ec60761e6339edd9beef932388e4ef..3e73d31d40dd844c4055ae9d170ed5df4222f8cf 100644 (file)
@@ -334,7 +334,6 @@ map<dstate_id_t, escape_info> populateAccelerationInfo(const raw_dfa &rdfa,
 static
 bool double_byte_ok(const escape_info &info) {
     return !info.outs2_broken
-        && info.outs2_single.count() + info.outs2.size() <= 8
         && info.outs2_single.count() < info.outs2.size()
         && info.outs2_single.count() <= 2 && !info.outs2.empty();
 }
index 63ec546ba2f060ac3989d12d67edc505aa55a1bd..8509b36ff3932ec99e928a7b025cdf45eebfff15 100644 (file)
@@ -284,7 +284,7 @@ AccelScheme make_double_accel(AccelScheme as, CharReach cr_1,
         return as;
     }
 
-    if (two_count > 8) {
+    if (two_count > DOUBLE_SHUFTI_LIMIT) {
         if (cr_2.count() < cr_1.count()) {
             as.double_cr |= cr_2;
             offset = offset_in + 1;
@@ -513,7 +513,7 @@ AccelScheme findBestAccelScheme(vector<vector<CharReach> > paths,
     best.offset = offset;
 
     /* merge best single and best double */
-    if (!da.double_byte.empty() && da.double_byte.size() <= 8
+    if (!da.double_byte.empty() && da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT
         && da.double_cr.count() < best.cr.count()) {
         best.double_byte = da.double_byte;
         best.double_cr = da.double_cr;
@@ -857,7 +857,8 @@ depth_done:
     // literals)
     if (depth > 1) {
         for (unsigned int i = 0; i < (depth - 1); i++) {
-            if (depthReach[i].count()*depthReach[i+1].count() <= 8) {
+            if (depthReach[i].count() * depthReach[i+1].count()
+                <= DOUBLE_SHUFTI_LIMIT) {
                 DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
                 *as = AccelScheme(CharReach::dot(), i);
                 return true;
index 9c77dc67a3d6eb7555946b7c3cbcbf3dda564a98..16a6b770b0156def9279fc10f02173a5a3ef1b55 100644 (file)
@@ -63,6 +63,8 @@ void findAccelFriends(const NGHolder &g, NFAVertex v,
                       u32 offset,
                       ue2::flat_set<NFAVertex> *friends);
 
+#define DOUBLE_SHUFTI_LIMIT 20
+
 struct AccelScheme {
     AccelScheme(const CharReach &cr_in, u32 offset_in)
         : cr(cr_in), offset(offset_in) {
@@ -78,10 +80,10 @@ struct AccelScheme {
         size_t a_dcount = double_cr.count();
         size_t b_dcount = b.double_cr.count();
 
-        bool feasible_double_a
-            = !a.double_byte.empty() && a.double_byte.size() <= 8;
-        bool feasible_double_b
-            = !b.double_byte.empty() && b.double_byte.size() <= 8;
+        bool feasible_double_a = !a.double_byte.empty()
+            && a.double_byte.size() <= DOUBLE_SHUFTI_LIMIT;
+        bool feasible_double_b = !b.double_byte.empty()
+            && b.double_byte.size() <= DOUBLE_SHUFTI_LIMIT;
 
         if (feasible_double_a != feasible_double_b) {
             return feasible_double_a > feasible_double_b;