]> git.ipfire.org Git - thirdparty/vectorscan.git/commitdiff
make rose responsible for dumping its bytecode
authorAlex Coyte <a.coyte@intel.com>
Thu, 16 Mar 2017 04:30:33 +0000 (15:30 +1100)
committerMatthew Barr <matthew.barr@intel.com>
Wed, 26 Apr 2017 05:17:19 +0000 (15:17 +1000)
src/compiler/compiler.cpp
src/rose/rose_build_bytecode.cpp
src/rose/rose_build_dump.cpp
src/rose/rose_build_dump.h
src/rose/rose_build_matchers.cpp
src/rose/rose_build_matchers.h

index d59c5cc6518b860b7a179a25edaf3fb19b2caf0a..56ed5f41c53d46cc4e962f699d9e1522943364c0 100644 (file)
@@ -55,7 +55,6 @@
 #include "parser/unsupported.h"
 #include "parser/utf8_validate.h"
 #include "rose/rose_build.h"
-#include "rose/rose_build_dump.h"
 #include "som/slot_manager_dump.h"
 #include "util/alloc.h"
 #include "util/compile_error.h"
@@ -310,7 +309,6 @@ aligned_unique_ptr<RoseEngine> generateRoseEngine(NG &ng) {
         return nullptr;
     }
 
-    dumpRose(*ng.rose, rose.get(), ng.cc.grey);
     dumpReportManager(ng.rm, ng.cc.grey);
     dumpSomSlotManager(ng.ssm, ng.cc.grey);
     dumpSmallWrite(rose.get(), ng.cc.grey);
index e7859405c155de060e4d17fe1586fcfe044b1827..32a1d07508078f1cba2b0ebb16a5be084d7e70a1 100644 (file)
@@ -33,6 +33,7 @@
 #include "hs_compile.h" // for HS_MODE_*
 #include "rose_build_add_internal.h"
 #include "rose_build_anchored.h"
+#include "rose_build_dump.h"
 #include "rose_build_engine_blob.h"
 #include "rose_build_exclusive.h"
 #include "rose_build_groups.h"
@@ -5582,6 +5583,9 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
     engine = addSmallWriteEngine(*this, move(engine));
 
     DEBUG_PRINTF("rose done %p\n", engine.get());
+
+    dumpRose(*this, engine.get());
+
     return engine;
 }
 
index a13fc9646c7b4a2a4fe096549b4fb10b3d9ec672..0d05e8ac7084293f0169d1c6ee40e24a9f7676bf 100644 (file)
@@ -275,10 +275,8 @@ private:
 
 } // namespace
 
-void dumpRoseGraph(const RoseBuild &build_base, const RoseEngine *t,
+void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t,
                    const char *filename) {
-    const RoseBuildImpl &build = dynamic_cast<const RoseBuildImpl &>(build_base);
-
     const Grey &grey = build.cc.grey;
 
     /* "early" rose graphs should only be dumped if we are dumping intermediate
@@ -497,9 +495,13 @@ string toRegex(const string &lit) {
     return os.str();
 }
 
-static
-void dumpTestLiterals(const string &filename, const vector<hwlmLiteral> &lits) {
-    ofstream of(filename.c_str());
+void dumpMatcherLiterals(const vector<hwlmLiteral> &lits, const string &name,
+                         const Grey &grey) {
+    if (!grey.dumpFlags) {
+        return;
+    }
+
+    ofstream of(grey.dumpPath + "rose_" + name + "_test_literals.txt");
 
     // Unique regex index, as literals may share an ID.
     u32 i = 0;
@@ -528,40 +530,6 @@ void dumpTestLiterals(const string &filename, const vector<hwlmLiteral> &lits) {
     of.close();
 }
 
-static
-void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
-    size_t historyRequired = build.calcHistoryRequired();
-    size_t longLitLengthThreshold =
-        calcLongLitThreshold(build, historyRequired);
-
-    auto mp =
-        makeMatcherProto(build, ROSE_ANCHORED, false, longLitLengthThreshold);
-    dumpTestLiterals(base + "rose_anchored_test_literals.txt", mp.lits);
-
-    mp = makeMatcherProto(build, ROSE_FLOATING, false, longLitLengthThreshold);
-    dumpTestLiterals(base + "rose_float_test_literals.txt", mp.lits);
-
-    if (build.cc.streaming) {
-        mp = makeMatcherProto(build, ROSE_FLOATING, true,
-                              longLitLengthThreshold);
-        dumpTestLiterals(base + "rose_delay_rebuild_test_literals.txt",
-                         mp.lits);
-    }
-
-    mp = makeMatcherProto(build, ROSE_EOD_ANCHORED, false,
-                          build.ematcher_region_size);
-    dumpTestLiterals(base + "rose_eod_test_literals.txt", mp.lits);
-
-    if (!build.cc.streaming) {
-        mp = makeMatcherProto(build, ROSE_FLOATING, false, ROSE_SMALL_BLOCK_LEN,
-                              ROSE_SMALL_BLOCK_LEN);
-        auto mp2 = makeMatcherProto(build, ROSE_ANCHORED_SMALL_BLOCK, false,
-                                    ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
-        mp.lits.insert(end(mp.lits), begin(mp2.lits), end(mp2.lits));
-        dumpTestLiterals(base + "rose_smallblock_test_literals.txt", mp.lits);
-    }
-}
-
 static
 const void *loadFromByteCodeOffset(const RoseEngine *t, u32 offset) {
     if (!offset) {
@@ -1894,14 +1862,13 @@ void roseDumpPrograms(const RoseBuildImpl &build, const RoseEngine *t,
     dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt");
 }
 
-void dumpRose(const RoseBuild &build_base, const RoseEngine *t,
-              const Grey &grey) {
+void dumpRose(const RoseBuildImpl &build, const RoseEngine *t) {
+    const Grey &grey = build.cc.grey;
+    
     if (!grey.dumpFlags) {
         return;
     }
 
-    const RoseBuildImpl &build = dynamic_cast<const RoseBuildImpl&>(build_base);
-
     stringstream ss;
     ss << grey.dumpPath << "rose.txt";
 
@@ -1929,7 +1896,6 @@ void dumpRose(const RoseBuild &build_base, const RoseEngine *t,
     ss.clear();
     ss << grey.dumpPath << "rose_literals.txt";
     dumpRoseLiterals(build, ss.str().c_str());
-    dumpRoseTestLiterals(build, grey.dumpPath);
 
     f = fopen((grey.dumpPath + "/rose_struct.txt").c_str(), "w");
     roseDumpStructRaw(t, f);
index 28e9f53ab9f964ddef0fab65a37e7cf4f2e12bf9..601f5914c4642563eb9880f8bbbe7ae696ec76e3 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
 #ifndef ROSE_BUILD_DUMP_H
 #define ROSE_BUILD_DUMP_H
 
+#include <string>
+#include <vector>
+
 struct RoseEngine;
 
 namespace ue2 {
 
-class RoseBuild;
+class RoseBuildImpl;
 struct Grey;
+struct hwlmLiteral;
 
 #ifdef DUMP_SUPPORT
 // Dump the Rose graph in graphviz representation.
-void dumpRoseGraph(const RoseBuild &build, const RoseEngine *t,
+void dumpRoseGraph(const RoseBuildImpl &build, const RoseEngine *t,
                    const char *filename);
 
-void dumpRose(const RoseBuild &build_base, const RoseEngine *t,
-              const Grey &grey);
+void dumpRose(const RoseBuildImpl &build, const RoseEngine *t);
+
+void dumpMatcherLiterals(const std::vector<hwlmLiteral> &lits,
+                         const std::string &name, const Grey &grey);
 #else
 
 static UNUSED
-void dumpRoseGraph(const RoseBuild &, const RoseEngine *, const char *) {
+void dumpRoseGraph(const RoseBuildImpl &, const RoseEngine *, const char *) {
 }
 
 static UNUSED
-void dumpRose(const RoseBuild &, const RoseEngine *, const Grey &) {
+void dumpRose(const RoseBuildImpl &, const RoseEngine *) {
 }
 
+static UNUSED
+void dumpMatcherLiterals(const std::vector<hwlmLiteral> &, const std::string &,
+                         const Grey &) {
+}
 #endif
 
 } // namespace ue2
index 50e48a5b6b4f9843a889e7400d6a810d2a202238..5625437bb83f1a15dc7fe20f566da9f826289cbb 100644 (file)
@@ -33,6 +33,7 @@
 
 #include "rose_build_matchers.h"
 
+#include "rose_build_dump.h"
 #include "rose_build_impl.h"
 #include "rose_build_lit_accel.h"
 #include "rose_build_width.h"
@@ -645,9 +646,35 @@ void trim_to_suffix(Container &c, size_t len) {
     c.erase(c.begin(), c.begin() + suffix_len);
 }
 
+namespace {
+
+/** \brief Prototype for literal matcher construction. */
+struct MatcherProto {
+    /** \brief Literal fragments used to construct the literal matcher. */
+    vector<hwlmLiteral> lits;
+
+    /** \brief Longer literals used for acceleration analysis. */
+    vector<AccelString> accel_lits;
+
+    /** \brief The history required by the literal matcher. */
+    size_t history_required = 0;
+
+    /** \brief Insert the contents of another MatcherProto. */
+    void insert(const MatcherProto &a);
+};
+}
+
+/**
+ * \brief Build up a vector of literals (and associated other data) for the
+ * given table.
+ *
+ * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can
+ * only lead to a pattern match after max_offset may be excluded.
+ */
+static
 MatcherProto makeMatcherProto(const RoseBuildImpl &build,
                               rose_literal_table table, bool delay_rebuild,
-                              size_t max_len, u32 max_offset) {
+                              size_t max_len, u32 max_offset = ROSE_BOUND_INF) {
     MatcherProto mp;
 
     if (delay_rebuild) {
@@ -794,6 +821,7 @@ buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
         DEBUG_PRINTF("empty floating matcher\n");
         return nullptr;
     }
+    dumpMatcherLiterals(mp.lits, "floating", build.cc.grey);
 
     for (const hwlmLiteral &lit : mp.lits) {
         *fgroups |= lit.groups;
@@ -834,6 +862,7 @@ aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(const RoseBuildImpl &build,
         DEBUG_PRINTF("empty delay rebuild matcher\n");
         return nullptr;
     }
+    dumpMatcherLiterals(mp.lits, "delay_rebuild", build.cc.grey);
 
     auto hwlm = hwlmBuild(mp.lits, false, build.cc, build.getInitialGroups());
     if (!hwlm) {
@@ -883,6 +912,7 @@ aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
     }
 
     mp.insert(mp_anchored);
+    dumpMatcherLiterals(mp.lits, "smallblock", build.cc.grey);
 
     // None of our literals should be longer than the small block limit.
     assert(all_of(begin(mp.lits), end(mp.lits), [](const hwlmLiteral &lit) {
@@ -919,6 +949,7 @@ aligned_unique_ptr<HWLM> buildEodAnchoredMatcher(const RoseBuildImpl &build,
         assert(!build.ematcher_region_size);
         return nullptr;
     }
+    dumpMatcherLiterals(mp.lits, "eod", build.cc.grey);
 
     assert(build.ematcher_region_size);
 
index 184c26337048802b0d7c07f029fe4940e506a6bf..cb56037ddbda3e180011c5c7a936052a05ede9a2 100644 (file)
 #define ROSE_BUILD_MATCHERS_H
 
 #include "rose_build_impl.h"
-#include "rose_build_lit_accel.h"
-#include "hwlm/hwlm_literal.h"
-
-#include <map>
-#include <vector>
 
+struct Grey;
 struct HWLM;
 
 namespace ue2 {
 
-/** \brief Prototype for literal matcher construction. */
-struct MatcherProto {
-    /** \brief Literal fragments used to construct the literal matcher. */
-    std::vector<hwlmLiteral> lits;
-
-    /** \brief Longer literals used for acceleration analysis. */
-    std::vector<AccelString> accel_lits;
-
-    /** \brief The history required by the literal matcher. */
-    size_t history_required = 0;
-
-    /** \brief Insert the contents of another MatcherProto. */
-    void insert(const MatcherProto &a);
-};
-
-/**
- * \brief Build up a vector of literals (and associated other data) for the
- * given table.
- *
- * If max_offset is specified (and not ROSE_BOUND_INF), then literals that can
- * only lead to a pattern match after max_offset may be excluded.
- */
-MatcherProto makeMatcherProto(const RoseBuildImpl &build,
-                              rose_literal_table table, bool delay_rebuild,
-                              size_t max_len, u32 max_offset = ROSE_BOUND_INF);
-
 aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
                                               size_t longLitLengthThreshold,
                                               rose_group *fgroups,