DEBUG_PRINTF("STATE groups=0x%016llx\n", tctx->groups);
- const u32 *delayRebuildPrograms =
- getByOffset(t, t->litDelayRebuildProgramOffset);
- assert(id < t->literalCount);
- const u32 program = delayRebuildPrograms[id];
-
- if (program) {
- const u64a som = 0;
- const size_t match_len = end - start + 1;
- const u8 flags = 0;
- UNUSED hwlmcb_rv_t rv = roseRunProgram(t, scratch, program, som,
- real_end, match_len, flags);
- assert(rv != HWLM_TERMINATE_MATCHING);
- }
+ assert(id < t->size); // id is a program offset
+ const u64a som = 0;
+ const size_t match_len = end - start + 1;
+ const u8 flags = 0;
+ UNUSED hwlmcb_rv_t rv =
+ roseRunProgram(t, scratch, id, som, real_end, match_len, flags);
+ assert(rv != HWLM_TERMINATE_MATCHING);
/* we are just repopulating the delay queue, groups should be
* already set from the original scan. */
struct hs_scratch *scratch, u64a end,
size_t match_len, u32 id) {
DEBUG_PRINTF("id=%u\n", id);
- const u32 *programs = getByOffset(t, t->litProgramOffset);
- assert(id < t->literalCount);
+ assert(id < t->size); // id is an offset into bytecode
const u64a som = 0;
const u8 flags = 0;
- return roseRunProgram_i(t, scratch, programs[id], som, end, match_len,
- flags);
+ return roseRunProgram_i(t, scratch, id, som, end, match_len, flags);
}
static rose_inline
* raw_dfa with program offsets.
*/
static
-void remapIdsToPrograms(raw_dfa &rdfa, const vector<u32> &litPrograms,
- const map<u32, u32> &final_to_frag_map) {
+void remapIdsToPrograms(raw_dfa &rdfa,
+ const map<u32, LitFragment> &final_to_frag_map) {
for (dstate &ds : rdfa.states) {
assert(ds.reports_eod.empty()); // Not used in anchored matcher.
if (ds.reports.empty()) {
flat_set<ReportID> new_reports;
for (auto final_id : ds.reports) {
assert(contains(final_to_frag_map, final_id));
- auto frag_id = final_to_frag_map.at(final_id);
- assert(frag_id < litPrograms.size());
- new_reports.insert(litPrograms.at(frag_id));
+ auto &frag = final_to_frag_map.at(final_id);
+ new_reports.insert(frag.lit_program_offset);
}
ds.reports = move(new_reports);
}
aligned_unique_ptr<anchored_matcher_info>
buildAnchoredMatcher(RoseBuildImpl &build, vector<raw_dfa> &dfas,
- const vector<u32> &litPrograms,
- const map<u32, u32> &final_to_frag_map, size_t *asize) {
+ const map<u32, LitFragment> &final_to_frag_map,
+ size_t *asize) {
const CompileContext &cc = build.cc;
if (dfas.empty()) {
}
for (auto &rdfa : dfas) {
- remapIdsToPrograms(rdfa, litPrograms, final_to_frag_map);
+ remapIdsToPrograms(rdfa, final_to_frag_map);
}
vector<aligned_unique_ptr<NFA>> nfas;
#define ROSE_BUILD_ANCHORED
#include "ue2common.h"
-#include "rose_build.h"
+#include "rose_build_impl.h"
#include "nfagraph/ng_holder.h"
#include "util/alloc.h"
*/
aligned_unique_ptr<anchored_matcher_info>
buildAnchoredMatcher(RoseBuildImpl &build, std::vector<raw_dfa> &dfas,
- const std::vector<u32> &litPrograms,
- const std::map<u32, u32> &final_to_frag_map,
+ const std::map<u32, LitFragment> &final_to_frag_map,
size_t *asize);
u32 anchoredStateSize(const anchored_matcher_info &atable);
* that have already been pushed into the engine_blob. */
ue2::unordered_map<u32, u32> engineOffsets;
- /** \brief Literal programs, indexed by final_id, after they have been
- * written to the engine_blob. */
- vector<u32> litPrograms;
-
/** \brief List of long literals (ones with CHECK_LONG_LIT instructions)
* that need hash table support. */
vector<ue2_case_string> longLiterals;
static
u32 buildDelayRebuildProgram(RoseBuildImpl &build, build_context &bc,
const flat_set<u32> &final_ids) {
+ if (!build.cc.streaming) {
+ return 0; // We only do delayed rebuild in streaming mode.
+ }
+
RoseProgram program;
for (const auto &final_id : final_ids) {
return frag;
}
-map<u32, u32> groupByFragment(const RoseBuildImpl &build) {
+map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build) {
u32 frag_id = 0;
- map<u32, u32> final_to_frag;
+ map<u32, LitFragment> final_to_frag;
map<rose_literal_id, vector<u32>> frag_lits;
for (const auto &m : build.final_id_to_literal) {
assert(!lit_ids.empty());
if (lit_ids.size() > 1) {
- final_to_frag.emplace(final_id, frag_id++);
+ final_to_frag.emplace(final_id, LitFragment(frag_id++));
continue;
}
const auto lit_id = *lit_ids.begin();
const auto &lit = build.literals.right.at(lit_id);
if (lit.s.length() < ROSE_SHORT_LITERAL_LEN_MAX) {
- final_to_frag.emplace(final_id, frag_id++);
+ final_to_frag.emplace(final_id, LitFragment(frag_id++));
continue;
}
// Combining fragments that squash their groups is unsafe.
const auto &info = build.literal_info[lit_id];
if (info.squash_group) {
- final_to_frag.emplace(final_id, frag_id++);
+ final_to_frag.emplace(final_id, LitFragment(frag_id++));
continue;
}
as_string_list(m.second).c_str());
for (const auto final_id : m.second) {
assert(!contains(final_to_frag, final_id));
- final_to_frag.emplace(final_id, frag_id);
+ final_to_frag.emplace(final_id, LitFragment(frag_id));
}
frag_id++;
}
static
tuple<u32, u32, u32>
buildLiteralPrograms(RoseBuildImpl &build, build_context &bc,
- const map<u32, u32> &final_to_frag_map) {
+ map<u32, LitFragment> &final_to_frag_map) {
// Build a reverse mapping from fragment -> final_id.
map<u32, flat_set<u32>> frag_to_final_map;
for (const auto &m : final_to_frag_map) {
- frag_to_final_map[m.second].insert(m.first);
+ frag_to_final_map[m.second.fragment_id].insert(m.first);
}
const u32 num_fragments = verify_u32(frag_to_final_map.size());
auto lit_edge_map = findEdgesByLiteral(build);
- bc.litPrograms.resize(num_fragments);
+ vector<u32> litPrograms(num_fragments);
vector<u32> delayRebuildPrograms(num_fragments);
for (u32 frag_id = 0; frag_id != num_fragments; ++frag_id) {
DEBUG_PRINTF("frag_id=%u, final_ids=[%s]\n", frag_id,
as_string_list(final_ids).c_str());
- bc.litPrograms[frag_id] =
+ litPrograms[frag_id] =
writeLiteralProgram(build, bc, final_ids, lit_edge_map);
delayRebuildPrograms[frag_id] =
buildDelayRebuildProgram(build, bc, final_ids);
}
+ // Update LitFragment entries.
+ for (auto &frag : final_to_frag_map | map_values) {
+ frag.lit_program_offset = litPrograms[frag.fragment_id];
+ frag.delay_program_offset = delayRebuildPrograms[frag.fragment_id];
+ }
+
u32 litProgramsOffset =
- bc.engine_blob.add(begin(bc.litPrograms), end(bc.litPrograms));
+ bc.engine_blob.add(begin(litPrograms), end(litPrograms));
u32 delayRebuildProgramsOffset = bc.engine_blob.add(
begin(delayRebuildPrograms), end(delayRebuildPrograms));
// Build anchored matcher.
size_t asize = 0;
u32 amatcherOffset = 0;
- auto atable = buildAnchoredMatcher(*this, anchored_dfas, bc.litPrograms,
- final_to_frag_map, &asize);
+ auto atable =
+ buildAnchoredMatcher(*this, anchored_dfas, final_to_frag_map, &asize);
if (atable) {
currOffset = ROUNDUP_CL(currOffset);
amatcherOffset = currOffset;
bool canImplementGraphs(const RoseBuildImpl &tbi);
#endif
-std::map<u32, u32> groupByFragment(const RoseBuildImpl &build);
+struct LitFragment {
+ explicit LitFragment(u32 fragment_id_in) : fragment_id(fragment_id_in) {}
+ u32 fragment_id;
+ u32 lit_program_offset = 0;
+ u32 delay_program_offset = 0;
+};
+
+std::map<u32, LitFragment> groupByFragment(const RoseBuildImpl &build);
} // namespace ue2
static
map<u32, hwlm_group_t> makeFragGroupMap(const RoseBuildImpl &build,
- const map<u32, u32> &final_to_frag_map) {
+ const map<u32, LitFragment> &final_to_frag_map) {
map<u32, hwlm_group_t> frag_to_group;
for (const auto &m : final_to_frag_map) {
u32 final_id = m.first;
- u32 frag_id = m.second;
+ u32 frag_id = m.second.fragment_id;
hwlm_group_t groups = 0;
const auto &lits = build.final_id_to_literal.at(final_id);
for (auto lit_id : lits) {
}
MatcherProto makeMatcherProto(const RoseBuildImpl &build,
- const map<u32, u32> &final_to_frag_map,
+ const map<u32, LitFragment> &final_to_frag_map,
rose_literal_table table, bool delay_rebuild,
size_t max_len, u32 max_offset) {
MatcherProto mp;
for (auto &lit : mp.lits) {
u32 final_id = lit.id;
assert(contains(final_to_frag_map, final_id));
- lit.id = final_to_frag_map.at(final_id);
- assert(contains(frag_group_map, lit.id));
- lit.groups = frag_group_map.at(lit.id);
+ const auto &frag = final_to_frag_map.at(final_id);
+ lit.id = delay_rebuild ? frag.delay_program_offset
+ : frag.lit_program_offset;
+ assert(contains(frag_group_map, frag.fragment_id));
+ lit.groups = frag_group_map.at(frag.fragment_id);
}
sort_and_unique(mp.lits);
aligned_unique_ptr<HWLM>
buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
- const map<u32, u32> &final_to_frag_map,
+ const map<u32, LitFragment> &final_to_frag_map,
rose_group *fgroups, size_t *fsize,
size_t *historyRequired) {
*fsize = 0;
aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
const RoseBuildImpl &build, size_t longLitLengthThreshold,
- const map<u32, u32> &final_to_frag_map, size_t *drsize) {
+ const map<u32, LitFragment> &final_to_frag_map, size_t *drsize) {
*drsize = 0;
if (!build.cc.streaming) {
aligned_unique_ptr<HWLM>
buildSmallBlockMatcher(const RoseBuildImpl &build,
- const map<u32, u32> &final_to_frag_map, size_t *sbsize) {
+ const map<u32, LitFragment> &final_to_frag_map,
+ size_t *sbsize) {
*sbsize = 0;
if (build.cc.streaming) {
aligned_unique_ptr<HWLM>
buildEodAnchoredMatcher(const RoseBuildImpl &build,
- const map<u32, u32> &final_to_frag_map, size_t *esize) {
+ const map<u32, LitFragment> &final_to_frag_map,
+ size_t *esize) {
*esize = 0;
auto mp = makeMatcherProto(build, final_to_frag_map, ROSE_EOD_ANCHORED,
* If max_offset is specified (and not ROSE_BOUND_INF), then literals that can
* only lead to a pattern match after max_offset may be excluded.
*/
-MatcherProto makeMatcherProto(const RoseBuildImpl &build,
- const std::map<u32, u32> &final_to_frag_map,
- rose_literal_table table, bool delay_rebuild,
- size_t max_len, u32 max_offset = ROSE_BOUND_INF);
-
-aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
- size_t longLitLengthThreshold,
- const std::map<u32, u32> &final_to_frag_map,
- rose_group *fgroups,
- size_t *fsize,
- size_t *historyRequired);
+MatcherProto
+makeMatcherProto(const RoseBuildImpl &build,
+ const std::map<u32, LitFragment> &final_to_frag_map,
+ rose_literal_table table, bool delay_rebuild, size_t max_len,
+ u32 max_offset = ROSE_BOUND_INF);
+
+aligned_unique_ptr<HWLM>
+buildFloatingMatcher(const RoseBuildImpl &build, size_t longLitLengthThreshold,
+ const std::map<u32, LitFragment> &final_to_frag_map,
+ rose_group *fgroups, size_t *fsize,
+ size_t *historyRequired);
aligned_unique_ptr<HWLM> buildDelayRebuildMatcher(
const RoseBuildImpl &build, size_t longLitLengthThreshold,
- const std::map<u32, u32> &final_to_frag_map, size_t *drsize);
+ const std::map<u32, LitFragment> &final_to_frag_map, size_t *drsize);
aligned_unique_ptr<HWLM>
buildSmallBlockMatcher(const RoseBuildImpl &build,
- const std::map<u32, u32> &final_to_frag_map,
+ const std::map<u32, LitFragment> &final_to_frag_map,
size_t *sbsize);
aligned_unique_ptr<HWLM>
buildEodAnchoredMatcher(const RoseBuildImpl &build,
- const std::map<u32, u32> &final_to_frag_map,
+ const std::map<u32, LitFragment> &final_to_frag_map,
size_t *esize);
void findMoreLiteralMasks(RoseBuildImpl &build);