]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
rose: add CHECK_SINGLE_LOOKAROUND instruction
authorXu, Chi <chi.xu@intel.com>
Mon, 5 Sep 2016 04:19:15 +0000 (12:19 +0800)
committerMatthew Barr <matthew.barr@intel.com>
Fri, 28 Oct 2016 03:47:04 +0000 (14:47 +1100)
This specialisation is cheaper than the shufti-based variants, so we
prefer it for single character class tests.

src/rose/program_runtime.h
src/rose/rose_build_bytecode.cpp
src/rose/rose_build_program.cpp
src/rose/rose_build_program.h
src/rose/rose_dump.cpp
src/rose/rose_program.h

index 57f39bbeeff68b248f20d64964470434d175137d..735f8cdb7fd080a7991a04b4b53885be39530290 100644 (file)
@@ -1019,6 +1019,46 @@ int roseCheckShufti32x16(const struct core_info *ci, const u8 *hi_mask,
     }
 }
 
+static rose_inline
+int roseCheckSingleLookaround(const struct RoseEngine *t,
+                              const struct hs_scratch *scratch,
+                              s8 checkOffset, u32 lookaroundIndex, u64a end) {
+    assert(lookaroundIndex != MO_INVALID_IDX);
+    const struct core_info *ci = &scratch->core_info;
+    DEBUG_PRINTF("end=%llu, buf_offset=%llu, buf_end=%llu\n", end,
+                 ci->buf_offset, ci->buf_offset + ci->len);
+
+    const s64a base_offset = end - ci->buf_offset;
+    const s64a offset = base_offset + checkOffset;
+    DEBUG_PRINTF("base_offset=%lld\n", base_offset);
+    DEBUG_PRINTF("checkOffset=%d offset=%lld\n", checkOffset, offset);
+
+    if (unlikely(checkOffset < 0 && (u64a)(0 - checkOffset) > end)) {
+        DEBUG_PRINTF("too early, fail\n");
+        return 0;
+    }
+
+    const u8 *reach_base = (const u8 *)t + t->lookaroundReachOffset;
+    const u8 *reach = reach_base + lookaroundIndex * REACH_BITVECTOR_LEN;
+
+    u8 c;
+    if (offset >= 0 && offset < (s64a)ci->len) {
+        c = ci->buf[offset];
+    } else if (offset < 0 && offset >= -(s64a)ci->hlen) {
+        c = ci->hbuf[ci->hlen + offset];
+    } else {
+        return 1;
+    }
+
+    if (!reachHasBit(reach, c)) {
+        DEBUG_PRINTF("char 0x%02x failed reach check\n", c);
+        return 0;
+    }
+
+    DEBUG_PRINTF("OK :)\n");
+    return 1;
+}
+
 /**
  * \brief Scan around a literal, checking that that "lookaround" reach masks
  * are satisfied.
@@ -1415,6 +1455,17 @@ hwlmcb_rv_t roseRunProgram_i(const struct RoseEngine *t,
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) {
+                if (!roseCheckSingleLookaround(t, scratch, ri->offset,
+                                               ri->reach_index, end)) {
+                    DEBUG_PRINTF("failed lookaround check\n");
+                    assert(ri->fail_jump); // must progress
+                    pc += ri->fail_jump;
+                    continue;
+                }
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(CHECK_LOOKAROUND) {
                 if (!roseCheckLookaround(t, scratch, ri->index, ri->count,
                                          end)) {
index 7061cb3297eb8ef4cd1c8f4edea5574038cf1a02..5421f1cb0440a920b410e5fafa1302fd7813c047 100644 (file)
@@ -3076,6 +3076,15 @@ void makeLookaroundInstruction(build_context &bc, const vector<LookEntry> &look,
         return;
     }
 
+    if (look.size() == 1) {
+        s8 offset = look.begin()->offset;
+        u32 look_idx = addLookaround(bc, look);
+        auto ri = make_unique<RoseInstrCheckSingleLookaround>(offset, look_idx,
+                                                    program.end_instruction());
+        program.add_before_end(move(ri));
+        return;
+    }
+
     if (makeRoleMask(look, program)) {
         return;
     }
index 69ad31a964cb15f98931dd1b706d00e2081a03da..fc157b8895f8542ff33eb8a5983909c12830f716 100644 (file)
@@ -112,6 +112,15 @@ void RoseInstrCheckNotHandled::write(void *dest, RoseEngineBlob &blob,
     inst->fail_jump = calc_jump(offset_map, this, target);
 }
 
+void RoseInstrCheckSingleLookaround::write(void *dest, RoseEngineBlob &blob,
+                                           const OffsetMap &offset_map) const {
+    RoseInstrBase::write(dest, blob, offset_map);
+    auto *inst = static_cast<impl_type *>(dest);
+    inst->offset = offset;
+    inst->reach_index = reach_index;
+    inst->fail_jump = calc_jump(offset_map, this, target);
+}
+
 void RoseInstrCheckLookaround::write(void *dest, RoseEngineBlob &blob,
                                      const OffsetMap &offset_map) const {
     RoseInstrBase::write(dest, blob, offset_map);
index 309a1b3e9f0980dbad1da3269de948b0eabc8761..c76456ccc59ae6170163e741d61980b9bffca688 100644 (file)
@@ -378,6 +378,39 @@ public:
     }
 };
 
+class RoseInstrCheckSingleLookaround
+    : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_SINGLE_LOOKAROUND,
+                                    ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND,
+                                    RoseInstrCheckSingleLookaround> {
+public:
+    s8 offset;
+    u32 reach_index;
+    const RoseInstruction *target;
+
+    RoseInstrCheckSingleLookaround(s8 offset_in, u32 reach_index_in,
+                                   const RoseInstruction *target_in)
+        : offset(offset_in), reach_index(reach_index_in), target(target_in) {}
+
+    bool operator==(const RoseInstrCheckSingleLookaround &ri) const {
+        return offset == ri.offset && reach_index == ri.reach_index &&
+               target == ri.target;
+    }
+
+    size_t hash() const override {
+        return hash_all(static_cast<int>(opcode), offset, reach_index);
+    }
+
+    void write(void *dest, RoseEngineBlob &blob,
+               const OffsetMap &offset_map) const override;
+
+    bool equiv_to(const RoseInstrCheckSingleLookaround &ri,
+                  const OffsetMap &offsets,
+                  const OffsetMap &other_offsets) const {
+        return offset == ri.offset && reach_index == ri.reach_index &&
+               offsets.at(target) == other_offsets.at(ri.target);
+    }
+};
+
 class RoseInstrCheckLookaround
     : public RoseInstrBaseOneTarget<ROSE_INSTR_CHECK_LOOKAROUND,
                                     ROSE_STRUCT_CHECK_LOOKAROUND,
index 7a02ed3a7b9190f69be04ec54ad290e1f40c89de..f0bec701fee908a3be91f537324458a0fcf3ecef 100644 (file)
@@ -273,6 +273,20 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) {
             }
             PROGRAM_NEXT_INSTRUCTION
 
+            PROGRAM_CASE(CHECK_SINGLE_LOOKAROUND) {
+                os << "    offset " << int{ri->offset} << endl;
+                os << "    reach_index " << ri->reach_index << endl;
+                os << "    fail_jump " << offset + ri->fail_jump << endl;
+                const u8 *base = (const u8 *)t;
+                const u8 *reach_base = base + t->lookaroundReachOffset;
+                const u8 *reach = reach_base +
+                                  ri->reach_index * REACH_BITVECTOR_LEN;
+                os << "    contents:" << endl;
+                describeClass(os, bitvectorToReach(reach), 1000, CC_OUT_TEXT);
+                os << endl;
+            }
+            PROGRAM_NEXT_INSTRUCTION
+
             PROGRAM_CASE(CHECK_LOOKAROUND) {
                 os << "    index " << ri->index << endl;
                 os << "    count " << ri->count << endl;
index 44d5d524e22b897568c316200448e236490bc201..370fc826c70cacd69713597160943dadf1ec5d26 100644 (file)
@@ -48,6 +48,7 @@ enum RoseInstructionCode {
     ROSE_INSTR_CHECK_ONLY_EOD,    //!< Role matches only at EOD.
     ROSE_INSTR_CHECK_BOUNDS,      //!< Bounds on distance from offset 0.
     ROSE_INSTR_CHECK_NOT_HANDLED, //!< Test & set role in "handled".
+    ROSE_INSTR_CHECK_SINGLE_LOOKAROUND, //!< Single lookaround check.
     ROSE_INSTR_CHECK_LOOKAROUND,  //!< Lookaround check.
     ROSE_INSTR_CHECK_MASK,        //!< 8-bytes mask check.
     ROSE_INSTR_CHECK_MASK_32,     //!< 32-bytes and/cmp/neg mask check.
@@ -154,6 +155,13 @@ struct ROSE_STRUCT_CHECK_NOT_HANDLED {
     u32 fail_jump; //!< Jump forward this many bytes if we have seen key before.
 };
 
+struct ROSE_STRUCT_CHECK_SINGLE_LOOKAROUND {
+    u8 code; //!< From enum RoseInstructionCode.
+    s8 offset; //!< The offset of the byte to examine.
+    u32 reach_index; //!< The index of the reach table entry to use.
+    u32 fail_jump; //!< Jump forward this many bytes on failure.
+};
+
 struct ROSE_STRUCT_CHECK_LOOKAROUND {
     u8 code; //!< From enum RoseInstructionCode.
     u32 index;