]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
rose: rework storage of extra lookaround information
authorAlex Coyte <a.coyte@intel.com>
Mon, 1 May 2017 06:09:10 +0000 (16:09 +1000)
committerMatthew Barr <matthew.barr@intel.com>
Tue, 30 May 2017 03:59:00 +0000 (13:59 +1000)
- remove explicit lookaround table from bytecode
- make the RoseInstr responsible for adding required info to blob

12 files changed:
CMakeLists.txt
src/rose/program_runtime.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_build_dump.cpp
src/rose/rose_build_engine_blob.cpp [new file with mode: 0644]
src/rose/rose_build_engine_blob.h
src/rose/rose_build_instructions.cpp
src/rose/rose_build_instructions.h
src/rose/rose_build_program.cpp
src/rose/rose_build_program.h
src/rose/rose_internal.h
src/rose/rose_program.h

index bc42c659406c2bb329de76191a3b402e35711de2..4f5d661f1e0806311df0583e1b29ec78fd7976ce 100644 (file)
@@ -948,6 +948,7 @@ SET (hs_SRCS
     src/rose/rose_build_convert.cpp
     src/rose/rose_build_convert.h
     src/rose/rose_build_dedupe.cpp
+    src/rose/rose_build_engine_blob.cpp
     src/rose/rose_build_engine_blob.h
     src/rose/rose_build_exclusive.cpp
     src/rose/rose_build_exclusive.h
index dac8345ea6cad86b5059df824c4940e7c7b34970..c67a4acbf51f74b6f08c4cdea73a4b05f71ccc6e 100644 (file)
@@ -1031,8 +1031,7 @@ int roseCheckSingleLookaround(const struct RoseEngine *t,
         return 0;
     }
 
-    const u8 *reach_base = (const u8 *)t + t->lookaroundReachOffset;
-    const u8 *reach = reach_base + lookaroundReachIndex;
+    const u8 *reach = getByOffset(t, lookaroundReachIndex);
 
     u8 c;
     if (offset >= 0 && offset < (s64a)ci->len) {
@@ -1069,14 +1068,11 @@ int roseCheckLookaround(const struct RoseEngine *t,
     DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
                  ci->buf_offset, ci->buf_offset + ci->len);
 
-    const u8 *base = (const u8 *)t;
-    const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
-    const s8 *look = look_base + lookaroundLookIndex;
+    const s8 *look = getByOffset(t, lookaroundLookIndex);
     const s8 *look_end = look + lookaroundCount;
     assert(look < look_end);
 
-    const u8 *reach_base = base + t->lookaroundReachOffset;
-    const u8 *reach = reach_base + lookaroundReachIndex;
+    const u8 *reach = getByOffset(t, lookaroundReachIndex);
 
     // The following code assumes that the lookaround structures are ordered by
     // increasing offset.
@@ -1166,13 +1162,11 @@ int roseMultipathLookaround(const struct RoseEngine *t,
     DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
                  ci->buf_offset, ci->buf_offset + ci->len);
 
-    const s8 *look_base = getByOffset(t, t->lookaroundTableOffset);
-    const s8 *look = look_base + multipathLookaroundLookIndex;
+    const s8 *look = getByOffset(t, multipathLookaroundLookIndex);
     const s8 *look_end = look + multipathLookaroundCount;
     assert(look < look_end);
 
-    const u8 *reach_base = getByOffset(t, t->lookaroundReachOffset);
-    const u8 *reach = reach_base + multipathLookaroundReachIndex;
+    const u8 *reach = getByOffset(t, multipathLookaroundReachIndex);
 
     const s64a base_offset = (s64a)end - ci->buf_offset;
     DEBUG_PRINTF("base_offset=%lld\n", base_offset);
index dfe4ff635ca0cd957b94c20e01ffa26d58981650..02304ae22877ddc7e58ce30a11d47bf40c51507f 100644 (file)
@@ -147,8 +147,6 @@ struct build_context : noncopyable {
     ue2::unordered_map<RoseProgram, u32, RoseProgramHash,
                        RoseProgramEquivalence> program_cache;
 
-    lookaround_info lookarounds;
-
     /** \brief State indices, for those roles that have them.
      * Each vertex present has a unique state index in the range
      * [0, roleStateIndices.size()). */
@@ -2428,70 +2426,6 @@ bool hasEodAnchors(const RoseBuildImpl &build, const build_context &bc,
     return false;
 }
 
-static
-void writeLookaround(const vector<LookEntry> &look_vec, s8 *&look, u8 *&reach) {
-    for (const auto &le : look_vec) {
-        *look = verify_s8(le.offset);
-        const CharReach &cr = le.reach;
-
-        assert(cr.any()); // Should be at least one character!
-        fill_bitvector(cr, reach);
-
-        ++look;
-        reach += REACH_BITVECTOR_LEN;
-    }
-}
-
-static
-void writeMultipathLookaround(const vector<vector<LookEntry>> &multi_look,
-                              s8 *&look, u8 *&reach) {
-    for (const auto &m : multi_look) {
-        u8 u = 0;
-        assert(m.size() == MAX_LOOKAROUND_PATHS);
-        for (size_t i = 0; i < m.size(); i++) {
-            if (m[i].reach.none()) {
-                u |= (u8)1U << i;
-            }
-        }
-        std::fill_n(reach, MULTI_REACH_BITVECTOR_LEN, u);
-
-        for (size_t i = 0; i < m.size(); i++) {
-            const CharReach &cr = m[i].reach;
-            if (cr.none()) {
-                continue;
-            }
-            *look = m[i].offset;
-
-            for (size_t c = cr.find_first(); c != cr.npos;
-                 c = cr.find_next(c)) {
-                reach[c] |= (u8)1U << i;
-            }
-        }
-
-        ++look;
-        reach += MULTI_REACH_BITVECTOR_LEN;
-    }
-}
-
-static
-void writeLookaroundTables(const lookaround_info &lookarounds,
-                           RoseEngineBlob &engine_blob, RoseEngine &proto) {
-    vector<s8> look_table(lookarounds.lookTableSize, 0);
-    vector<u8> reach_table(lookarounds.reachTableSize, 0);
-    s8 *look = look_table.data();
-    u8 *reach = reach_table.data();
-    for (const auto &la : lookarounds.table) {
-        if (la.size() == 1) {
-            writeLookaround(la.front(), look, reach);
-        } else {
-            writeMultipathLookaround(la, look, reach);
-        }
-    }
-
-    proto.lookaroundTableOffset = engine_blob.add_range(look_table);
-    proto.lookaroundReachOffset = engine_blob.add_range(reach_table);
-}
-
 static
 void writeDkeyInfo(const ReportManager &rm, RoseEngineBlob &engine_blob,
                    RoseEngine &proto) {
@@ -2752,7 +2686,7 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build, build_context &bc,
     }
 
     return makeLiteralProgram(build, bc.leftfix_info, bc.suffixes,
-                              bc.engine_info_by_queue, bc.lookarounds,
+                              bc.engine_info_by_queue,
                               bc.roleStateIndices, prog_build, lit_id,
                               *edges_ptr, is_anchored_replay_program);
 }
@@ -2917,8 +2851,7 @@ void buildLiteralPrograms(const RoseBuildImpl &build,
             continue;
         }
 
-        auto rebuild_prog = makeDelayRebuildProgram(build,
-                                                    bc.lookarounds, prog_build,
+        auto rebuild_prog = makeDelayRebuildProgram(build, prog_build,
                                                     frag.lit_ids);
         frag.delay_program_offset = writeProgram(bc, move(rebuild_prog));
     }
@@ -3181,7 +3114,7 @@ void addEodEventProgram(const RoseBuildImpl &build, build_context &bc,
          });
 
     auto block = makeLiteralProgram(build, bc.leftfix_info, bc.suffixes,
-                                    bc.engine_info_by_queue, bc.lookarounds,
+                                    bc.engine_info_by_queue,
                                     bc.roleStateIndices, prog_build,
                                     build.eod_event_literal_id, edge_list,
                                     false);
@@ -3555,7 +3488,6 @@ bytecode_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
 
     addSomRevNfas(bc, proto, ssm);
 
-    writeLookaroundTables(bc.lookarounds, bc.engine_blob, proto);
     writeDkeyInfo(rm, bc.engine_blob, proto);
     writeLeftInfo(bc.engine_blob, proto, leftInfoTable);
 
index 7fd19d43684eb75e90975e88b329f2f6020af0b6..b527db6c8a18d4e0b1f265b8fe7fca29b871b074 100644 (file)
@@ -625,12 +625,10 @@ void dumpLookaround(ofstream &os, const RoseEngine *t,
     assert(ri);
 
     const u8 *base = (const u8 *)t;
-    const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
-    const u8 *reach_base = base + t->lookaroundReachOffset;
 
-    const s8 *look = look_base + ri->look_index;
+    const s8 *look = (const s8 *)base + ri->look_index;
     const s8 *look_end = look + ri->count;
-    const u8 *reach = reach_base + ri->reach_index;
+    const u8 *reach = base + ri->reach_index;
 
     os << "    contents:" << endl;
 
@@ -648,12 +646,10 @@ void dumpMultipathLookaround(ofstream &os, const RoseEngine *t,
     assert(ri);
 
     const u8 *base = (const u8 *)t;
-    const s8 *look_base = (const s8 *)(base + t->lookaroundTableOffset);
-    const u8 *reach_base = base + t->lookaroundReachOffset;
 
-    const s8 *look_begin = look_base + ri->look_index;
+    const s8 *look_begin = (const s8 *)base + ri->look_index;
     const s8 *look_end = look_begin + ri->count;
-    const u8 *reach_begin = reach_base + ri->reach_index;
+    const u8 *reach_begin = base + ri->reach_index;
 
     os << "    contents:" << endl;
 
@@ -926,10 +922,7 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
                 os << "    offset " << int{ri->offset} << endl;
                 os << "    reach_index " << ri->reach_index << endl;
                 os << "    fail_jump " << offset + ri->fail_jump << endl;
-                const u8 *base = (const u8 *)t;
-                const u8 *reach_base = base + t->lookaroundReachOffset;
-                const u8 *reach = reach_base +
-                                  ri->reach_index * REACH_BITVECTOR_LEN;
+                const u8 *reach = (const u8 *)t + ri->reach_index;
                 os << "    contents ";
                 describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
                 os << endl;
@@ -2146,8 +2139,6 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
     DUMP_U32(t, handledKeyFatbitSize);
     DUMP_U32(t, leftOffset);
     DUMP_U32(t, roseCount);
-    DUMP_U32(t, lookaroundTableOffset);
-    DUMP_U32(t, lookaroundReachOffset);
     DUMP_U32(t, eodProgramOffset);
     DUMP_U32(t, lastByteHistoryIterOffset);
     DUMP_U32(t, minWidth);
diff --git a/src/rose/rose_build_engine_blob.cpp b/src/rose/rose_build_engine_blob.cpp
new file mode 100644 (file)
index 0000000..d395720
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_engine_blob.h"
+
+#include "rose_build_lookaround.h"
+#include "util/charreach_util.h"
+
+using namespace std;
+
+namespace ue2 {
+
+u32 lookaround_info::get_offset_of(const vector<vector<CharReach>> &reaches,
+                                   RoseEngineBlob &blob) {
+    assert(reaches.size() != 1);
+
+    // Check the cache.
+    auto it = multi_cache.find(reaches);
+    if (it != multi_cache.end()) {
+        DEBUG_PRINTF("reusing reach at idx %u\n", it->second);
+        return it->second;
+    }
+
+    vector<u8> raw_reach(reaches.size() * MULTI_REACH_BITVECTOR_LEN);
+    size_t off = 0;
+    for (const auto &m : reaches) {
+        u8 u = 0;
+        assert(m.size() == MAX_LOOKAROUND_PATHS);
+        for (size_t i = 0; i < m.size(); i++) {
+            if (m[i].none()) {
+                u |= (u8)1U << i;
+            }
+        }
+        fill_n(raw_reach.data() + off, MULTI_REACH_BITVECTOR_LEN, u);
+
+        for (size_t i = 0; i < m.size(); i++) {
+            const CharReach &cr = m[i];
+            if (cr.none()) {
+                continue;
+            }
+
+            for (size_t c = cr.find_first(); c != cr.npos;
+                 c = cr.find_next(c)) {
+                raw_reach[c + off] |= (u8)1U << i;
+            }
+        }
+
+        off += MULTI_REACH_BITVECTOR_LEN;
+    }
+
+    u32 reach_idx = blob.add_range(raw_reach);
+    DEBUG_PRINTF("adding reach at idx %u\n", reach_idx);
+    multi_cache.emplace(reaches, reach_idx);
+
+    return reach_idx;
+}
+
+u32 lookaround_info::get_offset_of(const vector<CharReach> &reach,
+                                   RoseEngineBlob &blob) {
+    if (contains(rcache, reach)) {
+        u32 offset = rcache[reach];
+        DEBUG_PRINTF("reusing reach at idx %u\n", offset);
+        return offset;
+    }
+
+    vector<u8> raw_reach(reach.size() * REACH_BITVECTOR_LEN);
+    size_t off = 0;
+    for (const auto &cr : reach) {
+        assert(cr.any()); // Should be at least one character!
+        fill_bitvector(cr, raw_reach.data() + off);
+        off += REACH_BITVECTOR_LEN;
+    }
+
+    u32 offset = blob.add_range(raw_reach);
+    rcache.emplace(reach, offset);
+    return offset;
+}
+
+u32 lookaround_info::get_offset_of(const vector<s8> &look,
+                                   RoseEngineBlob &blob) {
+    if (contains(lcache, look)) {
+        u32 offset = lcache[look];
+        DEBUG_PRINTF("reusing look at idx %u\n", offset);
+        return offset;
+    }
+
+    u32 offset = blob.add_range(look);
+    lcache.emplace(look, offset);
+    return offset;
+}
+
+} // namespace ue2
index 69e8201ec7a96ebf3c5a9608810c48b6237afa14..a22f2dff77a8122feb5dbb043ea14d641a7bb974 100644 (file)
@@ -34,6 +34,7 @@
 #include "ue2common.h"
 #include "util/alloc.h"
 #include "util/bytecode_ptr.h"
+#include "util/charreach.h"
 #include "util/container.h"
 #include "util/multibit_build.h"
 #include "util/noncopyable.h"
 
 namespace ue2 {
 
+class RoseEngineBlob;
+
+struct lookaround_info : noncopyable {
+    u32 get_offset_of(const std::vector<std::vector<CharReach>> &look,
+                      RoseEngineBlob &blob);
+    u32 get_offset_of(const std::vector<CharReach> &reach,
+                      RoseEngineBlob &blob);
+    u32 get_offset_of(const std::vector<s8> &look, RoseEngineBlob &blob);
+
+private:
+    unordered_map<std::vector<std::vector<CharReach>>, u32> multi_cache;
+    unordered_map<std::vector<s8>, u32> lcache;
+    unordered_map<std::vector<CharReach>, u32> rcache;
+};
+
 class RoseEngineBlob : noncopyable {
 public:
     /** \brief Base offset of engine_blob in the Rose engine bytecode. */
@@ -133,6 +149,8 @@ public:
         copy_bytes((char *)engine + base_offset, blob);
     }
 
+    lookaround_info lookaround_cache;
+
 private:
     void pad(size_t align) {
         assert(ISALIGNED_N(base_offset, align));
index f39fbe986dc2cc0a24418c822fa86d4461d18ecc..b00c36be685b537541d7bc1a5ccf28fafe1af1c1 100644 (file)
@@ -118,7 +118,7 @@ void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob,
     RoseInstrBase::write(dest, blob, offset_map);
     auto *inst = static_cast<impl_type *>(dest);
     inst->offset = offset;
-    inst->reach_index = reach_index;
+    inst->reach_index = blob.lookaround_cache.get_offset_of({reach}, blob);
     inst->fail_jump = calc_jump(offset_map, this, target);
 }
 
@@ -126,9 +126,15 @@ void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob,
                                      const OffsetMap &offset_map) const {
     RoseInstrBase::write(dest, blob, offset_map);
     auto *inst = static_cast<impl_type *>(dest);
-    inst->look_index = look_index;
-    inst->reach_index = reach_index;
-    inst->count = count;
+    vector<s8> look_offsets;
+    vector<CharReach> reaches;
+    for (const auto &le : look) {
+        look_offsets.push_back(le.offset);
+        reaches.push_back(le.reach);
+    }
+    inst->look_index = blob.lookaround_cache.get_offset_of(look_offsets, blob);
+    inst->reach_index = blob.lookaround_cache.get_offset_of(reaches, blob);
+    inst->count = verify_u32(look.size());
     inst->fail_jump = calc_jump(offset_map, this, target);
 }
 
@@ -532,9 +538,26 @@ void RoseInstrMultipathLookaround::write(void *dest, RoseEngineBlob &blob,
                                          const OffsetMap &offset_map) const {
     RoseInstrBase::write(dest, blob, offset_map);
     auto *inst = static_cast<impl_type *>(dest);
-    inst->look_index = look_index;
-    inst->reach_index = reach_index;
-    inst->count = count;
+    auto &cache = blob.lookaround_cache;
+    vector<s8> look_offsets;
+    vector<vector<CharReach>> reaches;
+    for (const auto &vle : multi_look) {
+        reaches.push_back({});
+        bool done_offset = false;
+
+        for (const auto &le : vle) {
+            reaches.back().push_back(le.reach);
+
+            /* empty reaches don't have valid offsets */
+            if (!done_offset && le.reach.any()) {
+                look_offsets.push_back(le.offset);
+                done_offset = true;
+            }
+        }
+    }
+    inst->look_index = cache.get_offset_of(look_offsets, blob);
+    inst->reach_index = cache.get_offset_of(reaches, blob);
+    inst->count = verify_u32(multi_look.size());
     inst->last_start = last_start;
     copy(begin(start_mask), end(start_mask), inst->start_mask);
     inst->fail_jump = calc_jump(offset_map, this, target);
index 06d146a5d9538e8e8a42d9cbf77e3b4f961b21c1..025f6a671861959625c8090c7f42a667e79f6588 100644 (file)
@@ -37,6 +37,7 @@
 #ifndef ROSE_BUILD_INSTRUCTIONS_H
 #define ROSE_BUILD_INSTRUCTIONS_H
 
+#include "rose_build_lookaround.h"
 #include "rose_build_program.h"
 #include "util/verify_types.h"
 
@@ -382,20 +383,19 @@ class RoseInstrCheckSingleLookaround
                                     RoseInstrCheckSingleLookaround> {
 public:
     s8 offset;
-    u32 reach_index;
+    CharReach reach;
     const RoseInstruction *target;
 
-    RoseInstrCheckSingleLookaround(s8 offset_in, u32 reach_index_in,
+    RoseInstrCheckSingleLookaround(s8 offset_in, CharReach reach_in,
                                    const RoseInstruction *target_in)
-        : offset(offset_in), reach_index(reach_index_in), target(target_in) {}
+        : offset(offset_in), reach(std::move(reach_in)), target(target_in) {}
 
     bool operator==(const RoseInstrCheckSingleLookaround &ri) const {
-        return offset == ri.offset && reach_index == ri.reach_index &&
-               target == ri.target;
+        return offset == ri.offset && reach == ri.reach && target == ri.target;
     }
 
     size_t hash() const override {
-        return hash_all(static_cast<int>(opcode), offset, reach_index);
+        return hash_all(static_cast<int>(opcode), offset, reach);
     }
 
     void write(void *dest, RoseEngineBlob &blob,
@@ -404,7 +404,7 @@ public:
     bool equiv_to(const RoseInstrCheckSingleLookaround &ri,
                   const OffsetMap &offsets,
                   const OffsetMap &other_offsets) const {
-        return offset == ri.offset && reach_index == ri.reach_index &&
+        return offset == ri.offset && reach == ri.reach &&
                offsets.at(target) == other_offsets.at(ri.target);
     }
 };
@@ -414,24 +414,19 @@ class RoseInstrCheckLookaround
                                     ROSE_STRUCT_CHECK_LOOKAROUND,
                                     RoseInstrCheckLookaround> {
 public:
-    u32 look_index;
-    u32 reach_index;
-    u32 count;
+    std::vector<LookEntry> look;
     const RoseInstruction *target;
 
-    RoseInstrCheckLookaround(u32 look_index_in, u32 reach_index_in,
-                             u32 count_in, const RoseInstruction *target_in)
-        : look_index(look_index_in), reach_index(reach_index_in),
-          count(count_in), target(target_in) {}
+    RoseInstrCheckLookaround(std::vector<LookEntry> look_in,
+                             const RoseInstruction *target_in)
+        : look(std::move(look_in)), target(target_in) {}
 
     bool operator==(const RoseInstrCheckLookaround &ri) const {
-        return look_index == ri.look_index && reach_index == ri.reach_index &&
-               count == ri.count && target == ri.target;
+        return look == ri.look && target == ri.target;
     }
 
     size_t hash() const override {
-        return hash_all(static_cast<int>(opcode), look_index, reach_index,
-                        count);
+        return hash_all(static_cast<int>(opcode), look);
     }
 
     void write(void *dest, RoseEngineBlob &blob,
@@ -439,9 +434,8 @@ public:
 
     bool equiv_to(const RoseInstrCheckLookaround &ri, const OffsetMap &offsets,
                   const OffsetMap &other_offsets) const {
-        return look_index == ri.look_index && reach_index == ri.reach_index &&
-               count == ri.count &&
-               offsets.at(target) == other_offsets.at(ri.target);
+        return look == ri.look
+            && offsets.at(target) == other_offsets.at(ri.target);
     }
 };
 
@@ -1837,30 +1831,26 @@ class RoseInstrMultipathLookaround
                                     ROSE_STRUCT_MULTIPATH_LOOKAROUND,
                                     RoseInstrMultipathLookaround> {
 public:
-    u32 look_index;
-    u32 reach_index;
-    u32 count;
+    std::vector<std::vector<LookEntry>> multi_look;
     s32 last_start;
     std::array<u8, 16> start_mask;
     const RoseInstruction *target;
 
-    RoseInstrMultipathLookaround(u32 look_index_in, u32 reach_index_in,
-                                 u32 count_in, s32 last_start_in,
+    RoseInstrMultipathLookaround(std::vector<std::vector<LookEntry>> ml,
+                                 s32 last_start_in,
                                  std::array<u8, 16> start_mask_in,
                                  const RoseInstruction *target_in)
-        : look_index(look_index_in), reach_index(reach_index_in),
-          count(count_in), last_start(last_start_in),
+        : multi_look(std::move(ml)), last_start(last_start_in),
           start_mask(std::move(start_mask_in)), target(target_in) {}
 
     bool operator==(const RoseInstrMultipathLookaround &ri) const {
-        return look_index == ri.look_index && reach_index == ri.reach_index &&
-               count == ri.count && last_start == ri.last_start &&
-               start_mask == ri.start_mask && target == ri.target;
+        return multi_look == ri.multi_look && last_start == ri.last_start
+        && start_mask == ri.start_mask && target == ri.target;
     }
 
     size_t hash() const override {
-        return hash_all(static_cast<int>(opcode), look_index, reach_index,
-                        count, last_start, start_mask);
+        return hash_all(static_cast<int>(opcode), multi_look, last_start,
+                        start_mask);
     }
 
     void write(void *dest, RoseEngineBlob &blob,
@@ -1869,10 +1859,9 @@ public:
     bool equiv_to(const RoseInstrMultipathLookaround &ri,
                   const OffsetMap &offsets,
                   const OffsetMap &other_offsets) const {
-        return look_index == ri.look_index && reach_index == ri.reach_index &&
-               count == ri.count && last_start == ri.last_start &&
-               start_mask == ri.start_mask &&
-               offsets.at(target) == other_offsets.at(ri.target);
+        return multi_look == ri.multi_look && last_start == ri.last_start
+            && start_mask == ri.start_mask
+            && offsets.at(target) == other_offsets.at(ri.target);
     }
 };
 
index 92eeff63ce37b8871d4db9d09bec120b887e7816..eb9db5a6a351f666496f80cd9a420846e2bf8c87 100644 (file)
@@ -28,6 +28,7 @@
 
 #include "rose_build_program.h"
 
+#include "rose_build_engine_blob.h"
 #include "rose_build_instructions.h"
 #include "rose_build_lookaround.h"
 #include "rose_build_resources.h"
@@ -39,7 +40,6 @@
 #include "util/container.h"
 #include "util/compile_context.h"
 #include "util/compile_error.h"
-#include "util/dump_charclass.h"
 #include "util/report_manager.h"
 #include "util/verify_types.h"
 
@@ -851,40 +851,6 @@ void makeRoleGroups(const RoseGraph &g, ProgramBuild &prog_build,
     program.add_before_end(make_unique<RoseInstrSetGroups>(groups));
 }
 
-static
-void addLookaround(lookaround_info &lookarounds,
-                   const vector<vector<LookEntry>> &look,
-                   u32 &look_index, u32 &reach_index) {
-    // Check the cache.
-    auto it = lookarounds.cache.find(look);
-    if (it != lookarounds.cache.end()) {
-        look_index = verify_u32(it->second.first);
-        reach_index = verify_u32(it->second.second);
-        DEBUG_PRINTF("reusing look at idx %u\n", look_index);
-        DEBUG_PRINTF("reusing reach at idx %u\n", reach_index);
-        return;
-    }
-
-    size_t look_idx = lookarounds.lookTableSize;
-    size_t reach_idx = lookarounds.reachTableSize;
-
-    if (look.size() == 1) {
-        lookarounds.lookTableSize += look.front().size();
-        lookarounds.reachTableSize += look.front().size() * REACH_BITVECTOR_LEN;
-    } else {
-        lookarounds.lookTableSize += look.size();
-        lookarounds.reachTableSize += look.size() * MULTI_REACH_BITVECTOR_LEN;
-    }
-
-    lookarounds.cache.emplace(look, make_pair(look_idx, reach_idx));
-    lookarounds.table.emplace_back(look);
-
-    DEBUG_PRINTF("adding look at idx %zu\n", look_idx);
-    DEBUG_PRINTF("adding reach at idx %zu\n", reach_idx);
-    look_index =  verify_u32(look_idx);
-    reach_index = verify_u32(reach_idx);
-}
-
 static
 bool checkReachMask(const CharReach &cr, u8 &andmask, u8 &cmpmask) {
     size_t reach_size = cr.count();
@@ -1278,8 +1244,7 @@ bool makeRoleShufti(const vector<LookEntry> &look, RoseProgram &program) {
  * available.
  */
 static
-void makeLookaroundInstruction(lookaround_info &lookarounds,
-                               const vector<LookEntry> &look,
+void makeLookaroundInstruction(const vector<LookEntry> &look,
                                RoseProgram &program) {
     assert(!look.empty());
 
@@ -1289,12 +1254,8 @@ void makeLookaroundInstruction(lookaround_info &lookarounds,
 
     if (look.size() == 1) {
         s8 offset = look.begin()->offset;
-        u32 look_idx, reach_idx;
-        vector<vector<LookEntry>> lookaround;
-        lookaround.emplace_back(look);
-        addLookaround(lookarounds, lookaround, look_idx, reach_idx);
-        // We don't need look_idx here.
-        auto ri = make_unique<RoseInstrCheckSingleLookaround>(offset, reach_idx,
+        const CharReach &reach = look.begin()->reach;
+        auto ri = make_unique<RoseInstrCheckSingleLookaround>(offset, reach,
                                                      program.end_instruction());
         program.add_before_end(move(ri));
         return;
@@ -1312,21 +1273,13 @@ void makeLookaroundInstruction(lookaround_info &lookarounds,
         return;
     }
 
-    u32 look_idx, reach_idx;
-    vector<vector<LookEntry>> lookaround;
-    lookaround.emplace_back(look);
-    addLookaround(lookarounds, lookaround, look_idx, reach_idx);
-    u32 look_count = verify_u32(look.size());
-
-    auto ri = make_unique<RoseInstrCheckLookaround>(look_idx, reach_idx,
-                                                    look_count,
+    auto ri = make_unique<RoseInstrCheckLookaround>(look,
                                                     program.end_instruction());
     program.add_before_end(move(ri));
 }
 
 static
-void makeCheckLitMaskInstruction(const RoseBuildImpl &build,
-                                 lookaround_info &lookarounds, u32 lit_id,
+void makeCheckLitMaskInstruction(const RoseBuildImpl &build, u32 lit_id,
                                  RoseProgram &program) {
     const auto &info = build.literal_info.at(lit_id);
     if (!info.requires_benefits) {
@@ -1348,7 +1301,7 @@ void makeCheckLitMaskInstruction(const RoseBuildImpl &build,
     }
 
     assert(!look.empty());
-    makeLookaroundInstruction(lookarounds, look, program);
+    makeLookaroundInstruction(look, program);
 }
 
 static
@@ -1417,7 +1370,6 @@ bool hasDelayedLiteral(const RoseBuildImpl &build,
 
 static
 RoseProgram makeLitInitialProgram(const RoseBuildImpl &build,
-                                  lookaround_info &lookarounds,
                                   ProgramBuild &prog_build, u32 lit_id,
                                   const vector<RoseEdge> &lit_edges,
                                   bool is_anchored_replay_program) {
@@ -1431,7 +1383,7 @@ RoseProgram makeLitInitialProgram(const RoseBuildImpl &build,
     }
 
     // Check lit mask.
-    makeCheckLitMaskInstruction(build, lookarounds, lit_id, program);
+    makeCheckLitMaskInstruction(build, lit_id, program);
 
     // Check literal groups. This is an optimisation that we only perform for
     // delayed literals, as their groups may be switched off; ordinarily, we
@@ -1458,20 +1410,6 @@ RoseProgram makeLitInitialProgram(const RoseBuildImpl &build,
     return program;
 }
 
-#if defined(DEBUG) || defined(DUMP_SUPPORT)
-static UNUSED
-string dumpMultiLook(const vector<LookEntry> &looks) {
-    ostringstream oss;
-    for (auto it = looks.begin(); it != looks.end(); ++it) {
-        if (it != looks.begin()) {
-            oss << ", ";
-        }
-        oss << "{" << int(it->offset) << ": " << describeClass(it->reach) << "}";
-    }
-    return oss.str();
-}
-#endif
-
 static
 bool makeRoleMultipathShufti(const vector<vector<LookEntry>> &multi_look,
                              RoseProgram &program) {
@@ -1612,8 +1550,7 @@ bool makeRoleMultipathShufti(const vector<vector<LookEntry>> &multi_look,
 }
 
 static
-void makeRoleMultipathLookaround(lookaround_info &lookarounds,
-                                 const vector<vector<LookEntry>> &multi_look,
+void makeRoleMultipathLookaround(const vector<vector<LookEntry>> &multi_look,
                                  RoseProgram &program) {
     assert(!multi_look.empty());
     assert(multi_look.size() <= MAX_LOOKAROUND_PATHS);
@@ -1675,13 +1612,8 @@ void makeRoleMultipathLookaround(lookaround_info &lookarounds,
         ordered_look.emplace_back(multi_entry);
     }
 
-    u32 look_idx, reach_idx;
-    addLookaround(lookarounds, ordered_look, look_idx, reach_idx);
-    u32 look_count = verify_u32(ordered_look.size());
-
-    auto ri = make_unique<RoseInstrMultipathLookaround>(look_idx, reach_idx,
-                                                        look_count, last_start,
-                                                        start_mask,
+    auto ri = make_unique<RoseInstrMultipathLookaround>(move(ordered_look),
+                                                        last_start, start_mask,
                                                     program.end_instruction());
     program.add_before_end(move(ri));
 }
@@ -1689,8 +1621,7 @@ void makeRoleMultipathLookaround(lookaround_info &lookarounds,
 static
 void makeRoleLookaround(const RoseBuildImpl &build,
                         const map<RoseVertex, left_build_info> &leftfix_info,
-                        lookaround_info &lookarounds, RoseVertex v,
-                        RoseProgram &program) {
+                        RoseVertex v, RoseProgram &program) {
     if (!build.cc.grey.roseLookaroundMasks) {
         return;
     }
@@ -1714,14 +1645,14 @@ void makeRoleLookaround(const RoseBuildImpl &build,
         findLookaroundMasks(build, v, look_more);
         mergeLookaround(look, look_more);
         if (!look.empty()) {
-            makeLookaroundInstruction(lookarounds, look, program);
+            makeLookaroundInstruction(look, program);
         }
         return;
     }
 
     if (!makeRoleMultipathShufti(looks, program)) {
         assert(looks.size() <= 8);
-        makeRoleMultipathLookaround(lookarounds, looks, program);
+        makeRoleMultipathLookaround(looks, program);
     }
 }
 
@@ -1902,7 +1833,6 @@ RoseProgram makeRoleProgram(const RoseBuildImpl &build,
                         const map<RoseVertex, left_build_info> &leftfix_info,
                         const map<suffix_id, u32> &suffixes,
                         const map<u32, engine_info> &engine_info_by_queue,
-                        lookaround_info &lookarounds,
                         const unordered_map<RoseVertex, u32> &roleStateIndices,
                         ProgramBuild &prog_build, const RoseEdge &e) {
     const RoseGraph &g = build.g;
@@ -1929,7 +1859,7 @@ RoseProgram makeRoleProgram(const RoseBuildImpl &build,
         makeRoleCheckNotHandled(prog_build, v, program);
     }
 
-    makeRoleLookaround(build, leftfix_info, lookarounds, v, program);
+    makeRoleLookaround(build, leftfix_info, v, program);
     makeRoleCheckLeftfix(build, leftfix_info, v, program);
 
     // Next, we can add program instructions that have effects. This must be
@@ -2029,7 +1959,6 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
                          const map<RoseVertex, left_build_info> &leftfix_info,
                          const map<suffix_id, u32> &suffixes,
                          const map<u32, engine_info> &engine_info_by_queue,
-                         lookaround_info &lookarounds,
                          const unordered_map<RoseVertex, u32> &roleStateIndices,
                          ProgramBuild &prog_build, u32 lit_id,
                          const vector<RoseEdge> &lit_edges,
@@ -2040,8 +1969,8 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
 
     // Construct initial program up front, as its early checks must be able
     // to jump to end and terminate processing for this literal.
-    auto lit_program = makeLitInitialProgram(build, lookarounds, prog_build,
-                                             lit_id, lit_edges,
+    auto lit_program = makeLitInitialProgram(build, prog_build, lit_id,
+                                             lit_edges,
                                              is_anchored_replay_program);
 
     RoseProgram role_programs;
@@ -2060,8 +1989,8 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
         assert(contains(roleStateIndices, u));
         u32 pred_state = roleStateIndices.at(u);
         auto role_prog = makeRoleProgram(build, leftfix_info, suffixes,
-                                         engine_info_by_queue, lookarounds,
-                                         roleStateIndices, prog_build, e);
+                                         engine_info_by_queue, roleStateIndices,
+                                         prog_build, e);
         if (!role_prog.empty()) {
             pred_blocks[pred_state].add_block(move(role_prog));
         }
@@ -2080,8 +2009,8 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
         DEBUG_PRINTF("root edge (%zu,%zu)\n", g[u].index,
                      g[target(e, g)].index);
         auto role_prog = makeRoleProgram(build, leftfix_info, suffixes,
-                                         engine_info_by_queue, lookarounds,
-                                         roleStateIndices, prog_build, e);
+                                         engine_info_by_queue, roleStateIndices,
+                                         prog_build, e);
         role_programs.add_block(move(role_prog));
     }
 
@@ -2104,7 +2033,6 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
 }
 
 RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build,
-                                    lookaround_info &lookarounds,
                                     ProgramBuild &prog_build,
                                     const vector<u32> &lit_ids) {
     assert(!lit_ids.empty());
@@ -2126,7 +2054,7 @@ RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build,
                                         build.cc);
         }
 
-        makeCheckLitMaskInstruction(build, lookarounds, lit_id, prog);
+        makeCheckLitMaskInstruction(build, lit_id, prog);
         makePushDelayedInstructions(build.literals, prog_build,
                                     build.literal_info.at(lit_id).delayed_ids,
                                     prog);
index d8e542b81dd5d3343b2439e90e829c69362e9225..8758ef64a02f38dd3ae2c65c40cc64baec5c53c2 100644 (file)
@@ -214,25 +214,6 @@ struct left_build_info {
     std::vector<std::vector<LookEntry>> lookaround;
 };
 
-struct lookaround_info : noncopyable {
-    /** \brief LookEntry list cache, so that we can reuse the look index and
-     * reach index for the same lookaround. */
-    ue2::unordered_map<std::vector<std::vector<LookEntry>>,
-        std::pair<size_t, size_t>> cache;
-
-    /** \brief Lookaround table for Rose roles. */
-    std::vector<std::vector<std::vector<LookEntry>>> table;
-
-    /** \brief Lookaround look table size. */
-    size_t lookTableSize = 0;
-
-    /** \brief Lookaround reach table size.
-     * since single path lookaround and multi-path lookaround have different
-     * bitvectors range (32 and 256), we need to maintain both look table size
-     * and reach table size. */
-    size_t reachTableSize = 0;
-};
-
 /**
  * \brief Provides a brief summary of properties of an NFA that has already been
  * finalised and stored in the blob.
@@ -261,14 +242,12 @@ RoseProgram makeLiteralProgram(const RoseBuildImpl &build,
                       const std::map<RoseVertex, left_build_info> &leftfix_info,
                       const std::map<suffix_id, u32> &suffixes,
                       const std::map<u32, engine_info> &engine_info_by_queue,
-                      lookaround_info &lookarounds,
                       const unordered_map<RoseVertex, u32> &roleStateIndices,
                       ProgramBuild &prog_build, u32 lit_id,
                       const std::vector<RoseEdge> &lit_edges,
                       bool is_anchored_replay_program);
 
 RoseProgram makeDelayRebuildProgram(const RoseBuildImpl &build,
-                                    lookaround_info &lookarounds,
                                     ProgramBuild &prog_build,
                                     const std::vector<u32> &lit_ids);
 
index 777e7234c2ffa21773bf5c752ef3b0754c62f5d6..57395c9dc107fa2c39376440197d9a0480d7d082 100644 (file)
@@ -383,10 +383,6 @@ struct RoseEngine {
 
     u32 leftOffset;
     u32 roseCount;
-    u32 lookaroundTableOffset; //!< base of lookaround offset list (of s8 values)
-    u32 lookaroundReachOffset; /**< base of lookaround reach bitvectors (32
-                                * bytes for single-path lookaround and 256 bytes
-                                * for multi-path lookaround) */
 
     u32 eodProgramOffset; //!< EOD program, otherwise 0.
 
index cdfe96acee9315c389579870268a649c8d766dba..78b123d5c30f2230f76a94d75fafbef92bafeeb9 100644 (file)
@@ -231,8 +231,8 @@ struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND {
 
 struct ROSE_STRUCT_CHECK_LOOKAROUND {
     u8 code; //!< From enum RoseInstructionCode.
-    u32 look_index; //!< Index for lookaround offset list.
-    u32 reach_index; //!< Index for lookaround reach bitvectors.
+    u32 look_index; //!< Offset in bytecode of lookaround offset list.
+    u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors.
     u32 count; //!< The count of lookaround entries in one instruction.
     u32 fail_jump; //!< Jump forward this many bytes on failure.
 };
@@ -561,8 +561,8 @@ struct ROSE_STRUCT_CLEAR_WORK_DONE {
 
 struct ROSE_STRUCT_MULTIPATH_LOOKAROUND {
     u8 code; //!< From enum RoseInstructionCode.
-    u32 look_index; //!< Index for lookaround offset list.
-    u32 reach_index; //!< Index for lookaround reach bitvectors.
+    u32 look_index; //!< Offset in bytecode of lookaround offset list.
+    u32 reach_index; //!< Offset in bytecode of lookaround reach bitvectors.
     u32 count; //!< The lookaround byte numbers for each path.
     s32 last_start; //!< The latest start offset among 8 paths.
     u8 start_mask[MULTIPATH_MAX_LEN]; /*!< Used to initialize path if left-most