]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
masked version of dverm
authorAlex Coyte <a.coyte@intel.com>
Mon, 21 Mar 2016 05:19:46 +0000 (16:19 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Wed, 20 Apr 2016 03:34:56 +0000 (13:34 +1000)
src/nfa/accel.c
src/nfa/accel.h
src/nfa/accel_dump.cpp
src/nfa/accelcompile.cpp
src/nfa/accelcompile.h
src/nfa/limex_accel.c
src/nfa/mcclellancompile.cpp
src/nfa/vermicelli.h
src/nfa/vermicelli_sse.h
src/nfagraph/ng_limex_accel.h
unit/internal/vermicelli.cpp

index a8fc4e36aa5d6ce9880a8ad347aa4ed411c5c55b..8a8694a87d56a05a0a0ec571696dba4cef9f2eea 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -84,6 +84,18 @@ const u8 *run_accel(const union AccelAux *accel, const u8 *c, const u8 *c_end) {
                                   c_end - 1);
         break;
 
+    case ACCEL_DVERM_MASKED:
+        DEBUG_PRINTF("accel dverm masked %p %p\n", c, c_end);
+        if (c + 16 + 1 >= c_end) {
+            return c;
+        }
+
+        /* need to stop one early to get an accurate end state */
+        rv = vermicelliDoubleMaskedExec(accel->dverm.c1, accel->dverm.c2,
+                                        accel->dverm.m1, accel->dverm.m2,
+                                        c, c_end - 1);
+        break;
+
     case ACCEL_SHUFTI:
         DEBUG_PRINTF("accel shufti %p %p\n", c, c_end);
         if (c + 15 >= c_end) {
index af0295665d251271e502294f00a0323be8be1ecf..a13563b681e409a50e5ce4afe736c15eb4b21986 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -87,7 +87,10 @@ enum AccelType {
     ACCEL_MSTRUFFLE,
     ACCEL_MSGTRUFFLE,
     ACCEL_MDSTRUFFLE,
-    ACCEL_MDSGTRUFFLE
+    ACCEL_MDSGTRUFFLE,
+    /* masked dverm */
+    ACCEL_DVERM_MASKED,
+
 };
 
 /** \brief Structure for accel framework. */
@@ -107,6 +110,8 @@ union AccelAux {
         u8 offset;
         u8 c1; // uppercase if nocase
         u8 c2; // uppercase if nocase
+        u8 m1; // masked variant
+        u8 m2; // masked variant
     } dverm;
     struct {
         u8 accel_type;
index 9e4fb7e97fbe8f0d5caaa3d048f039e85760ce7d..40abd12c3da3e607efec030165ff67ac5566bee9 100644 (file)
@@ -66,6 +66,8 @@ const char *accelName(u8 accel_type) {
         return "double-vermicelli";
     case ACCEL_DVERM_NOCASE:
         return "double-vermicelli nocase";
+    case ACCEL_DVERM_MASKED:
+        return "double-vermicelli masked";
     case ACCEL_RVERM:
         return "reverse vermicelli";
     case ACCEL_RVERM_NOCASE:
@@ -247,6 +249,10 @@ void dumpAccelInfo(FILE *f, const AccelAux &accel) {
     case ACCEL_RDVERM_NOCASE:
         fprintf(f, " [\\x%02hhx\\x%02hhx]\n", accel.dverm.c1, accel.dverm.c2);
         break;
+    case ACCEL_DVERM_MASKED:
+        fprintf(f, " [\\x%02hhx\\x%02hhx] & [\\x%02hhx\\x%02hhx]\n",
+                accel.dverm.c1, accel.dverm.c2, accel.dverm.m1, accel.dverm.m2);
+        break;
     case ACCEL_SHUFTI: {
         fprintf(f, "\n");
         dumpShuftiMasks(f, accel.shufti.lo, accel.shufti.hi);
index 6f3b6e8a98c8c43b94c3bb25d4bc227bcb7fd022..a9281c132a914218bbd9746e78934ac6cfdd3403 100644 (file)
@@ -94,6 +94,48 @@ void buildAccelSingle(const AccelInfo &info, AccelAux *aux) {
     DEBUG_PRINTF("unable to accelerate case with %zu outs\n", outs);
 }
 
+bool buildDvermMask(const flat_set<pair<u8, u8>> &escape_set, u8 *m1_out,
+                    u8 *m2_out) {
+    u8 a1 = 0xff;
+    u8 a2 = 0xff;
+    u8 b1 = 0xff;
+    u8 b2 = 0xff;
+
+    for (const auto &e : escape_set) {
+        DEBUG_PRINTF("%0hhx %0hhx\n", e.first, e.second);
+        a1 &= e.first;
+        b1 &= ~e.first;
+        a2 &= e.second;
+        b2 &= ~e.second;
+    }
+
+    u8 m1 = a1 | b1;
+    u8 m2 = a2 | b2;
+
+    u32 holes1 = 8 - popcount32(m1);
+    u32 holes2 = 8 - popcount32(m2);
+
+    DEBUG_PRINTF("aaaa %0hhx %0hhx\n", a1, a2);
+    DEBUG_PRINTF("bbbb %0hhx %0hhx\n", b1, b2);
+    DEBUG_PRINTF("mask %0hhx %0hhx\n", m1, m2);
+
+    assert(holes1 <= 8 && holes2 <= 8);
+    assert(escape_set.size() <= 1U << (holes1 + holes2));
+    if (escape_set.size() != 1U << (holes1 + holes2)) {
+        return false;
+    }
+
+    if (m1_out) {
+        *m1_out = m1;
+    }
+    if (m2_out) {
+        *m2_out = m2;
+    }
+
+    return true;
+}
+
+static
 bool isCaselessDouble(const flat_set<pair<u8, u8>> &stop) {
     // test for vector containing <A,Z> <A,z> <a,Z> <a,z>
     if (stop.size() != 4) {
@@ -148,6 +190,23 @@ void buildAccelDouble(const AccelInfo &info, AccelAux *aux) {
         return;
     }
 
+    if (outs1 == 0) {
+        u8 m1;
+        u8 m2;
+
+        if (buildDvermMask(info.double_stop2, &m1, &m2)) {
+            aux->accel_type = ACCEL_DVERM_MASKED;
+            aux->dverm.offset = offset;
+            aux->dverm.c1 = info.double_stop2.begin()->first & m1;
+            aux->dverm.c2 = info.double_stop2.begin()->second & m2;
+            aux->dverm.m1 = m1;
+            aux->dverm.m2 = m2;
+            DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
+                         aux->dverm.c1, aux->dverm.c2);
+            return;
+        }
+    }
+
     if (outs1 + outs2 <= 8) {
         if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438.
             DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu"
index d479a54579f9dbc72f92f7b8180b2af52875ca49..9b30146cde1e12385682d6838582d366a1b3b84f 100644 (file)
@@ -56,8 +56,6 @@ struct MultibyteAccelInfo {
     multiaccel_type type = MAT_NONE;
 };
 
-bool isCaselessDouble(const flat_set<std::pair<u8, u8>> &stop);
-
 struct AccelInfo {
     AccelInfo() : single_offset(0U), double_offset(0U),
                   single_stops(CharReach::dot()),
@@ -79,6 +77,10 @@ struct AccelInfo {
 
 bool buildAccelAux(const AccelInfo &info, AccelAux *aux);
 
+/* returns true is the escape set can be handled with a masked double_verm */
+bool buildDvermMask(const flat_set<std::pair<u8, u8>> &escape_set,
+                    u8 *m1_out = nullptr, u8 *m2_out = nullptr);
+
 } // namespace ue2
 
 #endif
index 77ed5ac0723b6949a00f9f8344a92c80c8d54e71..0bfc9d85b4b802720348ee292de4283e2c8430c7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -81,6 +81,15 @@ const u8 *accelScan(const union AccelAux *aux, const u8 *ptr, const u8 *end) {
         ptr = vermicelliDoubleExec(aux->dverm.c1, aux->dverm.c2,
                                    1, ptr, end);
         break;
+    case ACCEL_DVERM_MASKED:
+        DEBUG_PRINTF("double vermicelli masked for "
+                     "0x%02hhx%02hhx/0x%02hhx%02hhx\n",
+                     aux->dverm.c1, aux->dverm.c2,
+                     aux->dverm.m1, aux->dverm.m2);
+        offset = aux->dverm.offset;
+        ptr = vermicelliDoubleMaskedExec(aux->dverm.c1, aux->dverm.c2,
+                                         aux->dverm.m1, aux->dverm.m2, ptr, end);
+        break;
     case ACCEL_MLVERM:
         DEBUG_PRINTF("long vermicelli for 0x%02hhx\n", aux->mverm.c);
         offset = aux->mverm.offset;
index 279f454e7bbd5d550617bf441bdc3d0759275f46..87eed25012d91b951798558a9b2f4180bc98d759 100644 (file)
@@ -29,6 +29,7 @@
 #include "mcclellancompile.h"
 
 #include "accel.h"
+#include "accelcompile.h"
 #include "grey.h"
 #include "mcclellan_internal.h"
 #include "mcclellancompile_accel.h"
@@ -239,6 +240,20 @@ void mcclellan_build_strat::buildAccel(UNUSED dstate_id_t this_idx,
             DEBUG_PRINTF("state %hu is nc double vermicelli\n", this_idx);
             return;
         }
+
+        u8 m1;
+        u8 m2;
+        if (buildDvermMask(info.outs2, &m1, &m2)) {
+            accel->accel_type = ACCEL_DVERM_MASKED;
+            accel->dverm.offset = verify_u8(info.outs2_offset);
+            accel->dverm.c1 = info.outs2.begin()->first & m1;
+            accel->dverm.c2 = info.outs2.begin()->second & m2;
+            accel->dverm.m1 = m1;
+            accel->dverm.m2 = m2;
+            DEBUG_PRINTF("building maskeddouble-vermicelli for 0x%02hhx%02hhx\n",
+                         accel->dverm.c1, accel->dverm.c2);
+            return;
+        }
     }
 
     if (double_byte_ok(info)) {
index e6957f9f6008f3466aa720ca294611eb637692a0..36d7fb5fd19ed3f3a376637343d8b6874840f0c5 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -185,6 +185,41 @@ const u8 *vermicelliDoubleExec(char c1, char c2, char nocase, const u8 *buf,
     }
 }
 
+static really_inline
+const u8 *vermicelliDoubleMaskedExec(char c1, char c2, char m1, char m2,
+                                     const u8 *buf, const u8 *buf_end) {
+    DEBUG_PRINTF("double verm scan (\\x%02hhx&\\x%02hhx)(\\x%02hhx&\\x%02hhx) "
+                 "over %zu bytes\n", c1, m1, c2, m2, (size_t)(buf_end - buf));
+    assert(buf < buf_end);
+    assert((buf_end - buf) >= VERM_BOUNDARY);
+
+    uintptr_t min = (uintptr_t)buf % VERM_BOUNDARY;
+    VERM_TYPE chars1 = VERM_SET_FN(c1);
+    VERM_TYPE chars2 = VERM_SET_FN(c2);
+    VERM_TYPE mask1 = VERM_SET_FN(m1);
+    VERM_TYPE mask2 = VERM_SET_FN(m2);
+
+    if (min) {
+        // Input isn't aligned, so we need to run one iteration with an
+        // unaligned load, then skip buf forward to the next aligned address.
+        // There's some small overlap here, but we don't mind scanning it twice
+        // if we can do it quickly, do we?
+        const u8 *p = dvermPreconditionMasked(chars1, chars2, mask1, mask2, buf);
+        if (p) {
+            return p;
+        }
+
+        buf += VERM_BOUNDARY - min;
+        if (buf >= buf_end) {
+            return buf_end - 1;
+        }
+    }
+
+    // Aligned loops from here on in
+    return dvermSearchAlignedMasked(chars1, chars2, mask1, mask2, c1, c2, m1, m2,
+                                    buf, buf_end);
+}
+
 // Reverse vermicelli scan. Provides exact semantics and returns (buf - 1) if
 // character not found.
 static really_inline
index 1a041505269866c05a8e12cec8dfdda691052723..0a30306f608a38175d03190bd2aed2454f536ed0 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -172,6 +172,27 @@ const u8 *dvermSearchAlignedNocase(m128 chars1, m128 chars2, u8 c1, u8 c2,
     return buf;
 }
 
+static really_inline
+const u8 *dvermSearchAlignedMasked(m128 chars1, m128 chars2,
+                                   m128 mask1, m128 mask2, u8 c1, u8 c2, u8 m1,
+                                   u8 m2, const u8 *buf, const u8 *buf_end) {
+    assert((size_t)buf % 16 == 0);
+
+    for (; buf + 16 < buf_end; buf += 16) {
+        m128 data = load128(buf);
+        u32 z = movemask128(and128(eq128(chars1, and128(data, mask1)),
+                   shiftRight8Bits(eq128(chars2, and128(data, mask2)))));
+        if ((buf[15] & m1) == c1 && (buf[16] & m2) == c2) {
+            z |= (1 << 15);
+        }
+        if (unlikely(z)) {
+            u32 pos = ctz32(z);
+            return buf + pos;
+        }
+    }
+    return buf;
+}
+
 // returns NULL if not found
 static really_inline
 const u8 *dvermPrecondition(m128 chars1, m128 chars2, const u8 *buf) {
@@ -205,6 +226,22 @@ const u8 *dvermPreconditionNocase(m128 chars1, m128 chars2, const u8 *buf) {
     return NULL;
 }
 
+// returns NULL if not found
+static really_inline
+const u8 *dvermPreconditionMasked(m128 chars1, m128 chars2,
+                                  m128 mask1, m128 mask2, const u8 *buf) {
+    m128 data = loadu128(buf); // unaligned
+    u32 z = movemask128(and128(eq128(chars1, and128(data, mask1)),
+               shiftRight8Bits(eq128(chars2, and128(data, mask2)))));
+
+    /* no fixup of the boundary required - the aligned run will pick it up */
+    if (unlikely(z)) {
+        u32 pos = ctz32(z);
+        return buf + pos;
+    }
+    return NULL;
+}
+
 static really_inline
 const u8 *lastMatchOffset(const u8 *buf_end, u32 z) {
     assert(z);
index 80b3f0ecb84d512c1034ca1a63c45d7662de5b68..9c77dc67a3d6eb7555946b7c3cbcbf3dda564a98 100644 (file)
@@ -96,10 +96,12 @@ struct AccelScheme {
                 return a.double_byte.size() < b.double_byte.size();
             }
 
-            bool cd_a = isCaselessDouble(a.double_byte);
-            bool cd_b = isCaselessDouble(b.double_byte);
-            if (cd_a != cd_b) {
-                return cd_a > cd_b;
+            if (!a_dcount) {
+                bool cd_a = buildDvermMask(a.double_byte);
+                bool cd_b = buildDvermMask(b.double_byte);
+                if (cd_a != cd_b) {
+                    return cd_a > cd_b;
+                }
             }
             ORDER_CHECK(double_byte.size());
             ORDER_CHECK(double_offset);
index 4442754e22f1953e38485ae28ec09e214316e31b..6866b7c8405baf3b0e8dfa1926ba26537e3f90d7 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -345,3 +345,179 @@ TEST(NVermicelli, Exec4) {
     }
 }
 
+TEST(DoubleVermicelliMasked, ExecNoMatch1) {
+    std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
+    const u8 *t1_raw = (const u8 *)t1.c_str();
+
+    for (size_t i = 0; i < 16; i++) {
+        for (size_t j = 0; j < 16; j++) {
+            const u8 *rv = vermicelliDoubleMaskedExec('a', 'b', 0xff, 0xff,
+                                                  t1_raw + i,
+                                                  t1_raw + t1.length() - i - j);
+
+            ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv);
+
+            rv = vermicelliDoubleMaskedExec('B', 'b', 0xff, CASE_CLEAR,
+                                            t1_raw + i,
+                                            t1_raw + t1.length() - i - j);
+
+            ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j  - 1) & BOUND, (size_t)rv);
+
+            rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR,
+                                            t1_raw + i,
+                                            t1_raw + t1.length() -i - j);
+
+            ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j  - 1) & BOUND, (size_t)rv);
+
+            rv = vermicelliDoubleMaskedExec('b', 'B', CASE_CLEAR, 0xff,
+                                            t1_raw + i,
+                                            t1_raw + t1.length() - i - j);
+
+            ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j  - 1) & BOUND, (size_t)rv);
+
+            rv = vermicelliDoubleMaskedExec('B', 'A', 0xff, 0xff,
+                                            t1_raw + i,
+                                            t1_raw + t1.length() - i - j);
+
+            ASSERT_EQ(((size_t)t1_raw + t1.length() - i - j - 1) & BOUND, (size_t)rv);
+        }
+    }
+}
+
+TEST(DoubleVermicelliMasked, Exec1) {
+    std::string t1("bbbbbbbbbbbbbbbbbbabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbabbbbbbbbbbb");
+    const u8 *t1_raw = (const u8 *)t1.c_str();
+
+    for (size_t i = 0; i < 16; i++) {
+        const u8 *rv = vermicelliDoubleMaskedExec('a', 'b', 0xff, 0xff,
+                                                  t1_raw + i,
+                                                  t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('A', 'B', CASE_CLEAR, CASE_CLEAR,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('a', 'B', 0xff, CASE_CLEAR,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('A', 'b', CASE_CLEAR, 0xff,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('b', 'a', 0xff, 0xff,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('B', 'A', CASE_CLEAR, CASE_CLEAR,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv);
+    }
+}
+
+TEST(DoubleVermicelliMasked,  Exec2) {
+    std::string t1("bbbbbbbbbbbbbbbbbaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbaaaaabbbbbbbb");
+    const u8 *t1_raw = (const u8 *)t1.c_str();
+
+    for (size_t i = 0; i < 16; i++) {
+        const u8 *rv = vermicelliDoubleMaskedExec('a', 'a', 0xff, 0xff,
+                                                  t1_raw + i,
+                                                  t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('A', 'A', CASE_CLEAR, CASE_CLEAR,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('a', 'A', 0xff, CASE_CLEAR,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('A', 'a', CASE_CLEAR, 0xff,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv);
+}
+}
+
+TEST(DoubleVermicelliMasked,  Exec3) {
+    /*              012345678901234567890123 */
+    std::string t1("bbbbbbbbbbbbbbbbbaAaaAAaaaaaaaaaaaaaaaaaabbbbbbbaaaaabbbbbbbb");
+    const u8 *t1_raw = (const u8 *)t1.c_str();
+
+    for (size_t i = 0; i < 16; i++) {
+        const u8 *rv = vermicelliDoubleMaskedExec('A', 'a', 0xff, 0xff,
+                                                  t1_raw + i,
+                                                  t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('A', 'A', CASE_CLEAR, CASE_CLEAR,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('A', 'A', 0xff, 0xff,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 21, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('a', 'A', 0xff, 0xff,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('a', 'A', 0xff, CASE_CLEAR,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 17, (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('A', 'a', CASE_CLEAR, 0xff,
+                                        t1_raw + i,
+                                        t1_raw + t1.length() - i);
+
+        ASSERT_EQ((size_t)t1_raw + 18, (size_t)rv);
+}
+}
+
+TEST(DoubleVermicelliMasked, Exec4) {
+    std::string t1("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb");
+    const u8 *t1_raw = (const u8 *)t1.c_str();
+
+    for (size_t i = 0; i < 31; i++) {
+        t1[48 - i] = 'a';
+        t1[48 - i + 1] = 'a';
+        const u8 *rv = vermicelliDoubleMaskedExec('a', 'a', 0xff, 0xff, t1_raw,
+                                                  t1_raw + t1.length());
+
+        ASSERT_EQ((size_t)&t1_raw[48 - i], (size_t)rv);
+
+        rv = vermicelliDoubleMaskedExec('A', 'A', CASE_CLEAR, CASE_CLEAR, t1_raw,
+                                        t1_raw + t1.length());
+
+        ASSERT_EQ((size_t)&t1_raw[48 - i], (size_t)rv);
+    }
+}
+