]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
ue-3145: make parents of included literals exclusive
authorWang, Xiang W <xiang.w.wang@intel.com>
Thu, 20 Jul 2017 20:40:54 +0000 (16:40 -0400)
committerMatthew Barr <matthew.barr@intel.com>
Mon, 21 Aug 2017 01:12:36 +0000 (11:12 +1000)
src/fdr/fdr_compile.cpp
src/fdr/teddy_compile.h
src/rose/program_runtime.h
src/rose/rose_build_dump.cpp

index 181f95126d69e6319734f64c88583de1ac4a27c0..dc91010e2a795bf74b6f4c6d9576f64f2ade1eec 100644 (file)
@@ -609,16 +609,18 @@ bool includedCheck(const hwlmLiteral &lit1, const hwlmLiteral &lit2) {
 }
 
 /*
- * if lit2 is an included literal of both lit1 and lit0, and lit1 is an
- * exceptional literal of lit0 - lit1 sometimes matches when lit0 matches,
- * then we give up squashing for lit1. e.g. lit0:AAA(no case), lit1:aa,
- * lit2:A(no case). We can have duplicate matches for input "aaa" if lit0
- * and lit1 both squash lit2.
+ * if lit2 is an included literal of both lit0 and lit1, then lit0 and lit1
+ * shouldn't match at the same offset, otherwise we give up squashing for lit1.
+ * e.g. lit0:AAA(no case), lit1:aa, lit2:A(no case). We can have duplicate
+ * matches for input "aaa" if lit0 and lit1 both squash lit2.
  */
 static
 bool checkParentLit(
-            u32 pos1, const unordered_set<u32> &parent_map,
+            const vector<hwlmLiteral> &lits, u32 pos1,
+            const unordered_set<u32> &parent_map,
             const unordered_map<u32, unordered_set<u32>> &exception_map) {
+    assert(pos1 < lits.size());
+    const auto &lit1 = lits[pos1];
     for (const auto pos2 : parent_map) {
         if (contains(exception_map, pos2)) {
             const auto &exception_pos = exception_map.at(pos2);
@@ -626,6 +628,16 @@ bool checkParentLit(
                 return false;
             }
         }
+
+        /* if lit1 isn't an exception of lit2, then we have to do further
+         * exclusive check.
+         * TODO: More mask checks. Note if two literals are group exclusive,
+         * it is possible that they match at the same offset. */
+        assert(pos2 < lits.size());
+        const auto &lit2 = lits[pos2];
+        if (isSuffix(lit2, lit1)) {
+            return false;
+        }
     }
 
     return true;
@@ -652,30 +664,26 @@ void buildSquashMask(vector<hwlmLiteral> &lits, u32 id1, u32 bucket1,
         // check if lit2 is a suffix of lit1
         if (isSuffix(lit1, lit2)) {
             /* if we have a included literal in the same bucket,
-             * quit and let the included literal to do possible squashing
-             */
+             * quit and let the included literal to do possible squashing */
             if (bucket1 == bucket2) {
                 DEBUG_PRINTF("same bucket\n");
                 return;
             }
-            /*
-             * if lit2 is a suffix but doesn't pass included checks for
-             * extra info, we give up sqaushing
-             */
+            /* if lit2 is a suffix but doesn't pass included checks for
+             * extra info, we give up sqaushing */
             if (includedCheck(lit1, lit2)) {
                 DEBUG_PRINTF("find exceptional suffix %u\n", lit2.id);
                 exception_map[id1].insert(id2);
                 exception = true;
-            } else if (checkParentLit(id1, parent_map[id2], exception_map)) {
+            } else if (checkParentLit(lits, id1, parent_map[id2],
+                       exception_map)) {
                 if (lit1.included_id == INVALID_LIT_ID) {
                     DEBUG_PRINTF("find suffix lit1 %u lit2 %u\n",
                                  lit1.id, lit2.id);
                     lit1.included_id = lit2.id;
                 } else {
-                    /*
-                     * if we have multiple included literals in one bucket,
-                     * give up squashing.
-                     */
+                    /* if we have multiple included literals in one bucket,
+                     * give up squashing. */
                     DEBUG_PRINTF("multiple included literals\n");
                     lit1.included_id = INVALID_LIT_ID;
                     return;
@@ -690,10 +698,8 @@ void buildSquashMask(vector<hwlmLiteral> &lits, u32 id1, u32 bucket1,
         if (bucket2 != nextBucket) {
             if (included) {
                 if (exception) {
-                    /*
-                     * give up if we have exception literals
-                     * in the same bucket as the included literal
-                     */
+                    /* give up if we have exception literals
+                     * in the same bucket as the included literal. */
                     lit1.included_id = INVALID_LIT_ID;
                 } else {
                     parent_map[child_id].insert(id1);
@@ -714,14 +720,12 @@ static constexpr u32 INCLUDED_LIMIT = 1000;
 static
 void findIncludedLits(vector<hwlmLiteral> &lits,
                       const vector<vector<pair<u32, u32>>> &lastCharMap) {
-    /** Map for finding the positions of literal which includes a literal
-     * in FDR hwlm literal vector.
-     */
+    /* Map for finding the positions of literal which includes a literal
+     * in FDR hwlm literal vector. */
     unordered_map<u32, unordered_set<u32>> parent_map;
 
-    /** Map for finding the positions of exception literals which could
-     * sometimes match if a literal matches in FDR hwlm literal vector.
-     */
+    /* Map for finding the positions of exception literals which could
+     * sometimes match if a literal matches in FDR hwlm literal vector. */
     unordered_map<u32, unordered_set<u32>> exception_map;
     for (const auto &group : lastCharMap) {
         size_t cnt = group.size();
index ec251310263e177f519847d1f19d8d31a803bb03..a2b4a13ca364d729815b9cf67cbe86e6f2f6f40e 100644 (file)
@@ -44,10 +44,10 @@ struct FDR;
 
 namespace ue2 {
 
+class TeddyEngineDescription;
 struct Grey;
 struct hwlmLiteral;
 struct target_t;
-struct TeddyEngineDescription;
 
 bytecode_ptr<FDR> teddyBuildTable(const HWLMProto &proto, const Grey &grey);
 
index ab0934de55239d1bc6791b7c2cddbc651e4a0d31..e6ce9bdbe073971a81cf31dfd338b4953a68f966 100644 (file)
@@ -2581,8 +2581,9 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
                     pc = getByOffset(t, ri->child_offset);
                     pc_base = pc;
                     programOffset = (const u8 *)pc_base -(const u8 *)t;
-                    DEBUG_PRINTF("pc_base %p pc %p child_offset %u\n",
-                                 pc_base, pc, ri->child_offset);
+                    DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n",
+                                 pc_base, pc, ri->child_offset, ri->squash);
+                    work_done = 0;
                     continue;
                 }
             }
index e98308acfc87b146b60f7bffd48f0509ffa542aa..5ab9fc99ec773c020f053f5cd9c3f405398bf174 100644 (file)
@@ -1465,7 +1465,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
 
             PROGRAM_CASE(INCLUDED_JUMP) {
                 os << "    child_offset " << ri->child_offset << endl;
-                os << "    squash " << ri->squash << endl;
+                os << "    squash " << (u32)ri->squash << endl;
             }
             PROGRAM_NEXT_INSTRUCTION