src/fdr/fdr_internal.h
src/fdr/fdr_confirm.h
src/fdr/fdr_confirm_runtime.h
- src/fdr/fdr_streaming_runtime.h
src/fdr/flood_runtime.h
src/fdr/fdr_loadval.h
src/fdr/teddy.c
src/rose/init.h
src/rose/init.c
src/rose/stream.c
+ src/rose/stream_long_lit.h
+ src/rose/stream_long_lit_hash.h
src/rose/match.h
src/rose/match.c
src/rose/miracle.h
src/fdr/fdr_engine_description.cpp
src/fdr/fdr_engine_description.h
src/fdr/fdr_internal.h
- src/fdr/fdr_streaming_compile.cpp
- src/fdr/fdr_streaming_internal.h
src/fdr/flood_compile.cpp
src/fdr/teddy_compile.cpp
src/fdr/teddy_compile.h
src/rose/rose_build_impl.h
src/rose/rose_build_infix.cpp
src/rose/rose_build_infix.h
+ src/rose/rose_build_long_lit.cpp
+ src/rose/rose_build_long_lit.h
src/rose/rose_build_lookaround.cpp
src/rose/rose_build_lookaround.h
src/rose/rose_build_matchers.cpp
#include "fdr_confirm_runtime.h"
#include "fdr_internal.h"
#include "fdr_loadval.h"
-#include "fdr_streaming_runtime.h"
#include "flood_runtime.h"
#include "teddy.h"
#include "teddy_internal.h"
len,
hbuf,
0,
- hbuf, // nocase
- 0,
start,
cb,
ctxt,
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
- hwlm_group_t groups, u8 *stream_state) {
+ hwlm_group_t groups) {
struct FDR_Runtime_Args a = {
buf,
len,
hbuf,
hlen,
- hbuf, // nocase - start same as caseful, override later if needed
- hlen, // nocase
start,
cb,
ctxt,
* the history buffer (they may be garbage). */
hbuf ? unaligned_load_u64a(hbuf + hlen - sizeof(u64a)) : (u64a)0
};
- fdrUnpackState(fdr, &a, stream_state);
hwlm_error_t ret;
if (unlikely(a.start_offset >= a.len)) {
ret = funcs[fdr->engineID](fdr, &a, groups);
}
- fdrPackState(fdr, &a, stream_state);
return ret;
}
struct FDR;
-/** \brief Returns non-zero if the contents of the stream state indicate that
- * there is active FDR history beyond the regularly used history. */
-u32 fdrStreamStateActive(const struct FDR *fdr, const u8 *stream_state);
-
/**
* \brief Block-mode scan.
*
* \param cb Callback to call when a match is found.
* \param ctxt Caller-provided context pointer supplied to callback on match.
* \param groups Initial groups mask.
- * \param stream_state Persistent stream state for use by FDR.
*/
hwlm_error_t fdrExecStreaming(const struct FDR *fdr, const u8 *hbuf,
size_t hlen, const u8 *buf, size_t len,
size_t start, HWLMCallback cb, void *ctxt,
- hwlm_group_t groups, u8 *stream_state);
+ hwlm_group_t groups);
#ifdef __cplusplus
}
#include "teddy_engine_description.h"
#include "grey.h"
#include "ue2common.h"
+#include "hwlm/hwlm_build.h"
#include "util/alloc.h"
#include "util/compare.h"
#include "util/dump_mask.h"
} // namespace
+static
+size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
+ size_t rv = 0;
+ for (const auto &lit : lits) {
+ rv = max(rv, lit.msk.size());
+ }
+ return rv;
+}
+
+static
+void setHistoryRequired(hwlmStreamingControl &stream_ctl,
+ const vector<hwlmLiteral> &lits) {
+ size_t max_mask_len = maxMaskLen(lits);
+
+ // we want enough history to manage the longest literal and the longest
+ // mask.
+ stream_ctl.literal_history_required = max(maxLen(lits), max_mask_len) - 1;
+}
+
static
aligned_unique_ptr<FDR>
fdrBuildTableInternal(const vector<hwlmLiteral> &lits, bool make_small,
const target_t &target, const Grey &grey, u32 hint,
hwlmStreamingControl *stream_control) {
pair<aligned_unique_ptr<u8>, size_t> link(nullptr, 0);
+
if (stream_control) {
- link = fdrBuildTableStreaming(lits, *stream_control);
+ setHistoryRequired(*stream_control, lits);
}
DEBUG_PRINTF("cpu has %s\n", target.has_avx2() ? "avx2" : "no-avx2");
ptr = ROUNDUP_PTR(ptr, alignof(LitInfo));
if (next(i) == e) {
- finalLI.next = 0x0;
+ finalLI.next = 0;
} else {
// our next field represents an adjustment on top of
// current address + the actual size of the literal
if (loc < buf) {
u32 full_overhang = buf - loc;
- const u8 *history = caseless ? a->buf_history_nocase
- : a->buf_history;
- size_t len_history = caseless ? a->len_history_nocase
- : a->len_history;
+ const u8 *history = a->buf_history;
+ size_t len_history = a->len_history;
// can't do a vectored confirm either if we don't have
// the bytes
const u8 *loc2 = buf + i - li->extended_size + 1 - pullBackAmount;
if (loc2 < buf) {
u32 full_overhang = buf - loc2;
- size_t len_history = caseless ? a->len_history_nocase
- : a->len_history;
+ size_t len_history = a->len_history;
if (full_overhang > len_history) {
goto out;
}
size_t len;
const u8 *buf_history;
size_t len_history;
- const u8 *buf_history_nocase;
- size_t len_history_nocase;
size_t start_offset;
HWLMCallback cb;
void *ctxt;
+++ /dev/null
-/*
- * Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "fdr_internal.h"
-#include "fdr_streaming_internal.h"
-#include "fdr_compile_internal.h"
-#include "hwlm/hwlm_build.h"
-#include "util/alloc.h"
-#include "util/bitutils.h"
-#include "util/target_info.h"
-#include "util/verify_types.h"
-
-#include <algorithm>
-#include <cassert>
-#include <cstdio>
-#include <cstring>
-#include <deque>
-#include <set>
-
-#include <boost/dynamic_bitset.hpp>
-
-using namespace std;
-using boost::dynamic_bitset;
-
-namespace ue2 {
-
-namespace {
-struct LongLitOrder {
- bool operator()(const hwlmLiteral &i1, const hwlmLiteral &i2) const {
- if (i1.nocase != i2.nocase) {
- return i1.nocase < i2.nocase;
- } else {
- return i1.s < i2.s;
- }
- }
-};
-}
-
-static
-bool hwlmLitEqual(const hwlmLiteral &l1, const hwlmLiteral &l2) {
- return l1.s == l2.s && l1.nocase == l2.nocase;
-}
-
-static
-u32 roundUpToPowerOfTwo(u32 x) {
- x -= 1;
- x |= (x >> 1);
- x |= (x >> 2);
- x |= (x >> 4);
- x |= (x >> 8);
- x |= (x >> 16);
- return x + 1;
-}
-
-/**
- * \brief Creates a long literals vector containing all literals of length > max_len.
- *
- * The last char of each literal is trimmed as we're not interested in full
- * matches, only partial matches.
- *
- * Literals are sorted (by caseful/caseless, then lexicographical order) and
- * made unique.
- *
- * The ID of each literal is set to its position in the vector.
- *
- * \return False if there aren't any long literals.
- */
-static
-bool setupLongLits(const vector<hwlmLiteral> &lits,
- vector<hwlmLiteral> &long_lits, size_t max_len) {
- long_lits.reserve(lits.size());
- for (const auto &lit : lits) {
- if (lit.s.length() > max_len) {
- hwlmLiteral tmp = lit; // copy
- tmp.s.pop_back();
- tmp.id = 0; // recalc later
- tmp.groups = 0; // filled in later by hash bucket(s)
- long_lits.push_back(move(tmp));
- }
- }
-
- if (long_lits.empty()) {
- return false;
- }
-
- // sort long_literals by caseful/caseless and in lexicographical order,
- // remove duplicates
- stable_sort(long_lits.begin(), long_lits.end(), LongLitOrder());
- auto new_end = unique(long_lits.begin(), long_lits.end(), hwlmLitEqual);
- long_lits.erase(new_end, long_lits.end());
-
- // fill in ids; not currently used
- for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
- i->id = distance(long_lits.begin(), i);
- }
- return true;
-}
-
-// boundaries are the 'start' boundaries for each 'mode'
-// so boundary[CASEFUL] is the index one above the largest caseful index
-// positions[CASEFUL] is the # of positions in caseful strings (stream)
-// hashedPositions[CASEFUL] is the # of positions in caseful strings
-// (not returned - a temporary)
-// hashEntries[CASEFUL] is the # of positions hashed for caseful strings
-// (rounded up to the nearest power of two)
-static
-void analyzeLits(const vector<hwlmLiteral> &long_lits, size_t max_len,
- u32 *boundaries, u32 *positions, u32 *hashEntries) {
- u32 hashedPositions[MAX_MODES];
-
- for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
- boundaries[m] = verify_u32(long_lits.size());
- positions[m] = 0;
- hashedPositions[m] = 0;
- }
-
- for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
- if (i->nocase) {
- boundaries[CASEFUL] = verify_u32(distance(long_lits.begin(), i));
- break;
- }
- }
-
- for (const auto &lit : long_lits) {
- Modes m = lit.nocase ? CASELESS : CASEFUL;
- for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
- hashedPositions[m]++;
- }
- positions[m] += lit.s.size();
- }
-
- for (u32 m = CASEFUL; m < MAX_MODES; m++) {
- hashEntries[m] = hashedPositions[m]
- ? roundUpToPowerOfTwo(MAX(4096, hashedPositions[m]))
- : 0;
- }
-
-#ifdef DEBUG_COMPILE
- printf("analyzeLits:\n");
- for (Modes m = CASEFUL; m < MAX_MODES; m++) {
- printf("mode %s boundary %d positions %d hashedPositions %d "
- "hashEntries %d\n",
- (m == CASEFUL) ? "caseful" : "caseless", boundaries[m],
- positions[m], hashedPositions[m], hashEntries[m]);
- }
- printf("\n");
-#endif
-}
-
-static
-u32 hashLit(const hwlmLiteral &l, u32 offset, size_t max_len, Modes m) {
- return streaming_hash((const u8 *)l.s.c_str() + offset, max_len, m);
-}
-
-// sort by 'distance from start'
-namespace {
-struct OffsetIDFromEndOrder {
- const vector<hwlmLiteral> &lits; // not currently used
- explicit OffsetIDFromEndOrder(const vector<hwlmLiteral> &lits_in)
- : lits(lits_in) {}
- bool operator()(const pair<u32, u32> &i1, const pair<u32, u32> &i2) const {
- if (i1.second != i2.second) {
- // longest is 'first', so > not <
- return i1.second > i2.second;
- }
- return i1.first < i2.first;
- }
-};
-}
-
-static
-void fillHashes(const vector<hwlmLiteral> &long_lits, size_t max_len,
- FDRSHashEntry *tab, size_t numEntries, Modes mode,
- map<u32, u32> &litToOffsetVal) {
- const u32 nbits = lg2(numEntries);
- map<u32, deque<pair<u32, u32> > > bucketToLitOffPairs;
- map<u32, u64a> bucketToBitfield;
-
- for (const auto &lit : long_lits) {
- if ((mode == CASELESS) != lit.nocase) {
- continue;
- }
- for (u32 j = 1; j < lit.s.size() - max_len + 1; j++) {
- u32 h = hashLit(lit, j, max_len, mode);
- u32 h_ent = h & ((1U << nbits) - 1);
- u32 h_low = (h >> nbits) & 63;
- bucketToLitOffPairs[h_ent].emplace_back(lit.id, j);
- bucketToBitfield[h_ent] |= (1ULL << h_low);
- }
- }
-
- // this used to be a set<u32>, but a bitset is much much faster given that
- // we're using it only for membership testing.
- dynamic_bitset<> filledBuckets(numEntries); // all bits zero by default.
-
- // sweep out bitfield entries and save the results swapped accordingly
- // also, anything with bitfield entries is put in filledBuckets
- for (const auto &m : bucketToBitfield) {
- const u32 &bucket = m.first;
- const u64a &contents = m.second;
- tab[bucket].bitfield = contents;
- filledBuckets.set(bucket);
- }
-
- // store out all our chains based on free values in our hash table.
- // find nearest free locations that are empty (there will always be more
- // entries than strings, at present)
- for (auto &m : bucketToLitOffPairs) {
- u32 bucket = m.first;
- deque<pair<u32, u32>> &d = m.second;
-
- // sort d by distance of the residual string (len minus our depth into
- // the string). We need to put the 'furthest back' string first...
- stable_sort(d.begin(), d.end(), OffsetIDFromEndOrder(long_lits));
-
- while (1) {
- // first time through is always at bucket, then we fill in links
- filledBuckets.set(bucket);
- FDRSHashEntry *ent = &tab[bucket];
- u32 lit_id = d.front().first;
- u32 offset = d.front().second;
-
- ent->state = verify_u32(litToOffsetVal[lit_id] + offset + max_len);
- ent->link = (u32)LINK_INVALID;
-
- d.pop_front();
- if (d.empty()) {
- break;
- }
- // now, if there is another value
- // find a bucket for it and put in 'bucket' and repeat
- // all we really need to do is find something not in filledBuckets,
- // ideally something close to bucket
- // we search backward and forward from bucket, trying to stay as
- // close as possible.
- UNUSED bool found = false;
- int bucket_candidate = 0;
- for (u32 k = 1; k < numEntries * 2; k++) {
- bucket_candidate = bucket + (((k & 1) == 0)
- ? (-(int)k / 2) : (k / 2));
- if (bucket_candidate < 0 ||
- (size_t)bucket_candidate >= numEntries) {
- continue;
- }
- if (!filledBuckets.test(bucket_candidate)) {
- found = true;
- break;
- }
- }
-
- assert(found);
- bucket = bucket_candidate;
- ent->link = bucket;
- }
- }
-}
-
-static
-size_t maxMaskLen(const vector<hwlmLiteral> &lits) {
- size_t rv = 0;
- for (const auto &lit : lits) {
- rv = max(rv, lit.msk.size());
- }
- return rv;
-}
-
-pair<aligned_unique_ptr<u8>, size_t>
-fdrBuildTableStreaming(const vector<hwlmLiteral> &lits,
- hwlmStreamingControl &stream_control) {
- // refuse to compile if we are forced to have smaller than minimum
- // history required for long-literal support, full stop
- // otherwise, choose the maximum of the preferred history quantity
- // (currently a fairly extravagant 32) or the already used history
- // quantity - subject to the limitation of stream_control.history_max
-
- const size_t MIN_HISTORY_REQUIRED = 32;
-
- if (MIN_HISTORY_REQUIRED > stream_control.history_max) {
- throw std::logic_error("Cannot set history to minimum history required");
- }
-
- size_t max_len =
- MIN(stream_control.history_max,
- MAX(MIN_HISTORY_REQUIRED, stream_control.history_min));
- assert(max_len >= MIN_HISTORY_REQUIRED);
- size_t max_mask_len = maxMaskLen(lits);
-
- vector<hwlmLiteral> long_lits;
- if (!setupLongLits(lits, long_lits, max_len) || false) {
- // "Don't need to do anything" path, not really a fail
- DEBUG_PRINTF("Streaming literal path produces no table\n");
-
- // we want enough history to manage the longest literal and the longest
- // mask.
- stream_control.literal_history_required =
- max(maxLen(lits), max_mask_len) - 1;
- stream_control.literal_stream_state_required = 0;
- return {nullptr, size_t{0}};
- }
-
- // Ensure that we have enough room for the longest mask.
- if (max_mask_len) {
- max_len = max(max_len, max_mask_len - 1);
- }
-
- u32 boundary[MAX_MODES];
- u32 positions[MAX_MODES];
- u32 hashEntries[MAX_MODES];
-
- analyzeLits(long_lits, max_len, boundary, positions, hashEntries);
-
- // first assess the size and find our caseless threshold
- size_t headerSize = ROUNDUP_16(sizeof(FDRSTableHeader));
-
- size_t litTabOffset = headerSize;
-
- size_t litTabNumEntries = long_lits.size() + 1;
- size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(FDRSLiteral));
-
- size_t wholeLitTabOffset = litTabOffset + litTabSize;
- size_t totalWholeLitTabSize = ROUNDUP_16(positions[CASEFUL] +
- positions[CASELESS]);
-
- size_t htOffset[MAX_MODES];
- size_t htSize[MAX_MODES];
-
- htOffset[CASEFUL] = wholeLitTabOffset + totalWholeLitTabSize;
- htSize[CASEFUL] = hashEntries[CASEFUL] * sizeof(FDRSHashEntry);
- htOffset[CASELESS] = htOffset[CASEFUL] + htSize[CASEFUL];
- htSize[CASELESS] = hashEntries[CASELESS] * sizeof(FDRSHashEntry);
-
- size_t tabSize = ROUNDUP_16(htOffset[CASELESS] + htSize[CASELESS]);
-
- // need to add +2 to both of these to allow space for the actual largest
- // value as well as handling the fact that we add one to the space when
- // storing out a position to allow zero to mean "no stream state value"
- u8 streamBits[MAX_MODES];
- streamBits[CASEFUL] = lg2(roundUpToPowerOfTwo(positions[CASEFUL] + 2));
- streamBits[CASELESS] = lg2(roundUpToPowerOfTwo(positions[CASELESS] + 2));
- u32 tot_state_bytes = (streamBits[CASEFUL] + streamBits[CASELESS] + 7) / 8;
-
- auto secondaryTable = aligned_zmalloc_unique<u8>(tabSize);
- assert(secondaryTable); // otherwise would have thrown std::bad_alloc
-
- // then fill it in
- u8 * ptr = secondaryTable.get();
- FDRSTableHeader * header = (FDRSTableHeader *)ptr;
- // fill in header
- header->pseudoEngineID = (u32)0xffffffff;
- header->N = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
- for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
- header->boundary[m] = boundary[m];
- header->hashOffset[m] = verify_u32(htOffset[m]);
- header->hashNBits[m] = lg2(hashEntries[m]);
- header->streamStateBits[m] = streamBits[m];
- }
- assert(tot_state_bytes < sizeof(u64a));
- header->streamStateBytes = verify_u8(tot_state_bytes); // u8
-
- ptr += headerSize;
-
- // now fill in the rest
-
- FDRSLiteral * litTabPtr = (FDRSLiteral *)ptr;
- ptr += litTabSize;
-
- map<u32, u32> litToOffsetVal;
- for (auto i = long_lits.begin(), e = long_lits.end(); i != e; ++i) {
- u32 entry = verify_u32(i - long_lits.begin());
- u32 offset = verify_u32(ptr - secondaryTable.get());
-
- // point the table entry to the string location
- litTabPtr[entry].offset = offset;
-
- litToOffsetVal[entry] = offset;
-
- // copy the string into the string location
- memcpy(ptr, i->s.c_str(), i->s.size());
-
- ptr += i->s.size(); // and the string location
- }
-
- // fill in final lit table entry with current ptr (serves as end value)
- litTabPtr[long_lits.size()].offset = verify_u32(ptr - secondaryTable.get());
-
- // fill hash tables
- ptr = secondaryTable.get() + htOffset[CASEFUL];
- for (u32 m = CASEFUL; m < MAX_MODES; ++m) {
- fillHashes(long_lits, max_len, (FDRSHashEntry *)ptr, hashEntries[m],
- (Modes)m, litToOffsetVal);
- ptr += htSize[m];
- }
-
- // tell the world what we did
- stream_control.literal_history_required = max_len;
- stream_control.literal_stream_state_required = tot_state_bytes;
- return {move(secondaryTable), tabSize};
-}
-
-} // namespace ue2
+++ /dev/null
-/*
- * Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FDR_STREAMING_INTERNAL_H
-#define FDR_STREAMING_INTERNAL_H
-
-#include "ue2common.h"
-#include "fdr_internal.h"
-#include "util/unaligned.h"
-
-// tertiary table:
-// a header (FDRSTableHeader)
-// long_lits.size()+1 entries holding an offset to the string in the
-// 'whole literal table' (FDRSLiteral structure)
-// the whole literal table - every string packed in (freeform)
-// hash table (caseful) (FDRSHashEntry)
-// hash table (caseless) (FDRSHashEntry)
-
-enum Modes {
- CASEFUL = 0,
- CASELESS = 1,
- MAX_MODES = 2
-};
-
-// We have one of these structures hanging off the 'link' of our secondary
-// FDR table that handles streaming strings
-struct FDRSTableHeader {
- u32 pseudoEngineID; // set to 0xffffffff to indicate this isn't an FDR
-
- // string id one beyond the maximum entry for this type of literal
- // boundary[CASEFUL] is the end of the caseful literals
- // boundary[CASELESS] is the end of the caseless literals and one beyond
- // the largest literal id (the size of the littab)
- u32 boundary[MAX_MODES];
-
- // offsets are 0 if no such table exists
- // offset from the base of the tertiary structure to the hash table
- u32 hashOffset[MAX_MODES];
- u32 hashNBits[MAX_MODES]; // lg2 of the size of the hash table
-
- u8 streamStateBits[MAX_MODES];
- u8 streamStateBytes; // total size of packed stream state in bytes
- u8 N; // prefix lengths
- u16 pad;
-};
-
-// One of these structures per literal entry in our secondary FDR table.
-struct FDRSLiteral {
- u32 offset;
- // potentially - another u32 to point to the 'next lesser included literal'
- // which would be a literal that overlaps this one in such a way that a
- // failure to match _this_ literal can leave us in a state that we might
- // still match that literal. Offset information might also be called for,
- // in which case we might be wanting to use a FDRSLiteralOffset
-};
-
-typedef u32 FDRSLiteralOffset;
-
-#define LINK_INVALID 0xffffffff
-
-// One of these structures per hash table entry in our secondary FDR table
-struct FDRSHashEntry {
- u64a bitfield;
- FDRSLiteralOffset state;
- u32 link;
-};
-
-static really_inline
-u32 get_start_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
- return m == CASEFUL ? 0 : h->boundary[m-1];
-}
-
-static really_inline
-u32 get_end_lit_idx(const struct FDRSTableHeader * h, enum Modes m) {
- return h->boundary[m];
-}
-
-static really_inline
-const struct FDRSLiteral * getLitTab(const struct FDRSTableHeader * h) {
- return (const struct FDRSLiteral *) (((const u8 *)h) +
- ROUNDUP_16(sizeof(struct FDRSTableHeader)));
-}
-
-static really_inline
-u32 getBaseOffsetOfLits(const struct FDRSTableHeader * h, enum Modes m) {
- return getLitTab(h)[get_start_lit_idx(h, m)].offset;
-}
-
-static really_inline
-u32 packStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
- return v - getBaseOffsetOfLits(h, m) + 1;
-}
-
-static really_inline
-u32 unpackStateVal(const struct FDRSTableHeader * h, enum Modes m, u32 v) {
- return v + getBaseOffsetOfLits(h, m) - 1;
-}
-
-static really_inline
-u32 has_bit(const struct FDRSHashEntry * ent, u32 bit) {
- return (ent->bitfield >> bit) & 0x1;
-}
-
-static really_inline
-u32 streaming_hash(const u8 *ptr, UNUSED size_t len, enum Modes mode) {
- const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
- const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
- assert(len >= 32);
-
- u64a v1 = unaligned_load_u64a(ptr);
- u64a v2 = unaligned_load_u64a(ptr + 8);
- u64a v3 = unaligned_load_u64a(ptr + 16);
- if (mode == CASELESS) {
- v1 &= CASEMASK;
- v2 &= CASEMASK;
- v3 &= CASEMASK;
- }
- v1 *= MULTIPLIER;
- v2 *= (MULTIPLIER*MULTIPLIER);
- v3 *= (MULTIPLIER*MULTIPLIER*MULTIPLIER);
- v1 >>= 32;
- v2 >>= 32;
- v3 >>= 32;
- return v1 ^ v2 ^ v3;
-}
-
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2015-2016, Intel Corporation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef FDR_STREAMING_RUNTIME_H
-#define FDR_STREAMING_RUNTIME_H
-
-#include "fdr_streaming_internal.h"
-#include "util/partial_store.h"
-
-#include <string.h>
-
-static really_inline
-const struct FDRSTableHeader * getSHDR(const struct FDR * fdr) {
- const u8 * linkPtr = ((const u8 *)fdr) + fdr->link;
- // test if it's not really a engineID, but a 'pseudo engine id'
- assert(*(const u32 *)linkPtr == 0xffffffff);
- assert(linkPtr);
- return (const struct FDRSTableHeader *)linkPtr;
-}
-
-// Reads from stream state and unpacks values into stream state table.
-static really_inline
-void getStreamStates(const struct FDRSTableHeader * streamingTable,
- const u8 * stream_state, u32 * table) {
- assert(streamingTable);
- assert(stream_state);
- assert(table);
-
- u8 ss_bytes = streamingTable->streamStateBytes;
- u8 ssb = streamingTable->streamStateBits[CASEFUL];
- UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
- assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
-
-#if defined(ARCH_32_BIT)
- // On 32-bit hosts, we may be able to avoid having to do any u64a
- // manipulation at all.
- if (ss_bytes <= 4) {
- u32 ssb_mask = (1U << ssb) - 1;
- u32 streamVal = partial_load_u32(stream_state, ss_bytes);
- table[CASEFUL] = (u32)(streamVal & ssb_mask);
- table[CASELESS] = (u32)(streamVal >> ssb);
- return;
- }
-#endif
-
- u64a ssb_mask = (1ULL << ssb) - 1;
- u64a streamVal = partial_load_u64a(stream_state, ss_bytes);
- table[CASEFUL] = (u32)(streamVal & ssb_mask);
- table[CASELESS] = (u32)(streamVal >> (u64a)ssb);
-}
-
-#ifndef NDEBUG
-// Defensive checking (used in assert) that these table values don't overflow
-// outside the range available.
-static really_inline UNUSED
-u32 streamingTableOverflow(u32 * table, u8 ssb, u8 ssb_nc) {
- u32 ssb_mask = (1ULL << (ssb)) - 1;
- if (table[CASEFUL] & ~ssb_mask) {
- return 1;
- }
- u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
- if (table[CASELESS] & ~ssb_nc_mask) {
- return 1;
- }
- return 0;
-}
-#endif
-
-// Reads from stream state table and packs values into stream state.
-static really_inline
-void setStreamStates(const struct FDRSTableHeader * streamingTable,
- u8 * stream_state, u32 * table) {
- assert(streamingTable);
- assert(stream_state);
- assert(table);
-
- u8 ss_bytes = streamingTable->streamStateBytes;
- u8 ssb = streamingTable->streamStateBits[CASEFUL];
- UNUSED u8 ssb_nc = streamingTable->streamStateBits[CASELESS];
- assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
- assert(!streamingTableOverflow(table, ssb, ssb_nc));
-
-#if defined(ARCH_32_BIT)
- // On 32-bit hosts, we may be able to avoid having to do any u64a
- // manipulation at all.
- if (ss_bytes <= 4) {
- u32 stagingStreamState = table[CASEFUL];
- stagingStreamState |= (table[CASELESS] << ssb);
-
- partial_store_u32(stream_state, stagingStreamState, ss_bytes);
- return;
- }
-#endif
-
- u64a stagingStreamState = (u64a)table[CASEFUL];
- stagingStreamState |= (u64a)table[CASELESS] << ((u64a)ssb);
- partial_store_u64a(stream_state, stagingStreamState, ss_bytes);
-}
-
-u32 fdrStreamStateActive(const struct FDR * fdr, const u8 * stream_state) {
- if (!stream_state) {
- return 0;
- }
- const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
- u8 ss_bytes = streamingTable->streamStateBytes;
-
- // We just care if there are any bits set, and the test below is faster
- // than a partial_load_u64a (especially on 32-bit hosts).
- for (u32 i = 0; i < ss_bytes; i++) {
- if (*stream_state) {
- return 1;
- }
- ++stream_state;
- }
- return 0;
-}
-
-// binary search for the literal index that contains the current state
-static really_inline
-u32 findLitTabEntry(const struct FDRSTableHeader * streamingTable,
- u32 stateValue, enum Modes m) {
- const struct FDRSLiteral * litTab = getLitTab(streamingTable);
- u32 lo = get_start_lit_idx(streamingTable, m);
- u32 hi = get_end_lit_idx(streamingTable, m);
-
- // Now move stateValue back by one so that we're looking for the
- // litTab entry that includes it the string, not the one 'one past' it
- stateValue -= 1;
- assert(lo != hi);
- assert(litTab[lo].offset <= stateValue);
- assert(litTab[hi].offset > stateValue);
-
- // binary search to find the entry e such that:
- // litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
- while (lo + 1 < hi) {
- u32 mid = (lo + hi) / 2;
- if (litTab[mid].offset <= stateValue) {
- lo = mid;
- } else { //(litTab[mid].offset > stateValue) {
- hi = mid;
- }
- }
- assert(litTab[lo].offset <= stateValue);
- assert(litTab[hi].offset > stateValue);
- return lo;
-}
-
-static really_inline
-void fdrUnpackStateMode(struct FDR_Runtime_Args *a,
- const struct FDRSTableHeader *streamingTable,
- const struct FDRSLiteral * litTab,
- const u32 *state_table,
- const enum Modes m) {
- if (!state_table[m]) {
- return;
- }
-
- u32 stateValue = unpackStateVal(streamingTable, m, state_table[m]);
- u32 idx = findLitTabEntry(streamingTable, stateValue, m);
- size_t found_offset = litTab[idx].offset;
- const u8 * found_buf = found_offset + (const u8 *)streamingTable;
- size_t found_sz = stateValue - found_offset;
- if (m == CASEFUL) {
- a->buf_history = found_buf;
- a->len_history = found_sz;
- } else {
- a->buf_history_nocase = found_buf;
- a->len_history_nocase = found_sz;
- }
-}
-
-static really_inline
-void fdrUnpackState(const struct FDR * fdr, struct FDR_Runtime_Args * a,
- const u8 * stream_state) {
- // nothing to do if there's no stream state for the case
- if (!stream_state) {
- return;
- }
-
- const struct FDRSTableHeader * streamingTable = getSHDR(fdr);
- const struct FDRSLiteral * litTab = getLitTab(streamingTable);
-
- u32 state_table[MAX_MODES];
- getStreamStates(streamingTable, stream_state, state_table);
-
- fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASEFUL);
- fdrUnpackStateMode(a, streamingTable, litTab, state_table, CASELESS);
-}
-
-static really_inline
-u32 do_single_confirm(const struct FDRSTableHeader *streamingTable,
- const struct FDR_Runtime_Args *a, u32 hashState,
- enum Modes m) {
- const struct FDRSLiteral * litTab = getLitTab(streamingTable);
- u32 idx = findLitTabEntry(streamingTable, hashState, m);
- size_t found_offset = litTab[idx].offset;
- const u8 * s1 = found_offset + (const u8 *)streamingTable;
- assert(hashState > found_offset);
- size_t l1 = hashState - found_offset;
- const u8 * buf = a->buf;
- size_t len = a->len;
- const char nocase = m != CASEFUL;
-
- if (l1 > len) {
- const u8 * hist = nocase ? a->buf_history_nocase : a->buf_history;
- size_t hist_len = nocase ? a->len_history_nocase : a->len_history;
-
- if (l1 > len+hist_len) {
- return 0; // Break out - not enough total history
- }
-
- size_t overhang = l1 - len;
- assert(overhang <= hist_len);
-
- if (cmpForward(hist + hist_len - overhang, s1, overhang, nocase)) {
- return 0;
- }
- s1 += overhang;
- l1 -= overhang;
- }
- // if we got here, we don't need history or we compared ok out of history
- assert(l1 <= len);
-
- if (cmpForward(buf + len - l1, s1, l1, nocase)) {
- return 0;
- }
- return hashState; // our new state
-}
-
-static really_inline
-void fdrFindStreamingHash(const struct FDR_Runtime_Args *a,
- const struct FDRSTableHeader *streamingTable,
- u8 hash_len, u32 *hashes) {
- u8 tempbuf[128];
- const u8 *base;
- if (hash_len > a->len) {
- assert(hash_len <= 128);
- size_t overhang = hash_len - a->len;
- assert(overhang <= a->len_history);
- memcpy(tempbuf, a->buf_history + a->len_history - overhang, overhang);
- memcpy(tempbuf + overhang, a->buf, a->len);
- base = tempbuf;
- } else {
- assert(hash_len <= a->len);
- base = a->buf + a->len - hash_len;
- }
-
- if (streamingTable->hashNBits[CASEFUL]) {
- hashes[CASEFUL] = streaming_hash(base, hash_len, CASEFUL);
- }
- if (streamingTable->hashNBits[CASELESS]) {
- hashes[CASELESS] = streaming_hash(base, hash_len, CASELESS);
- }
-}
-
-static really_inline
-const struct FDRSHashEntry *getEnt(const struct FDRSTableHeader *streamingTable,
- u32 h, const enum Modes m) {
- u32 nbits = streamingTable->hashNBits[m];
- if (!nbits) {
- return NULL;
- }
-
- u32 h_ent = h & ((1 << nbits) - 1);
- u32 h_low = (h >> nbits) & 63;
-
- const struct FDRSHashEntry *tab =
- (const struct FDRSHashEntry *)((const u8 *)streamingTable
- + streamingTable->hashOffset[m]);
- const struct FDRSHashEntry *ent = tab + h_ent;
-
- if (!has_bit(ent, h_low)) {
- return NULL;
- }
-
- return ent;
-}
-
-static really_inline
-void fdrPackStateMode(u32 *state_table, const struct FDR_Runtime_Args *a,
- const struct FDRSTableHeader *streamingTable,
- const struct FDRSHashEntry *ent, const enum Modes m) {
- assert(ent);
- assert(streamingTable->hashNBits[m]);
-
- const struct FDRSHashEntry *tab =
- (const struct FDRSHashEntry *)((const u8 *)streamingTable
- + streamingTable->hashOffset[m]);
-
- while (1) {
- u32 tmp = 0;
- if ((tmp = do_single_confirm(streamingTable, a, ent->state, m))) {
- state_table[m] = packStateVal(streamingTable, m, tmp);
- break;
- }
- if (ent->link == LINK_INVALID) {
- break;
- }
- ent = tab + ent->link;
- }
-}
-
-static really_inline
-void fdrPackState(const struct FDR *fdr, const struct FDR_Runtime_Args *a,
- u8 *stream_state) {
- // nothing to do if there's no stream state for the case
- if (!stream_state) {
- return;
- }
-
- // get pointers to the streamer FDR and the tertiary structure
- const struct FDRSTableHeader *streamingTable = getSHDR(fdr);
-
- assert(streamingTable->N);
-
- u32 state_table[MAX_MODES] = {0, 0};
-
- // if we don't have enough history, we don't need to do anything
- if (streamingTable->N <= a->len + a->len_history) {
- u32 hashes[MAX_MODES] = {0, 0};
-
- fdrFindStreamingHash(a, streamingTable, streamingTable->N, hashes);
-
- const struct FDRSHashEntry *ent_ful = getEnt(streamingTable,
- hashes[CASEFUL], CASEFUL);
- const struct FDRSHashEntry *ent_less = getEnt(streamingTable,
- hashes[CASELESS], CASELESS);
-
- if (ent_ful) {
- fdrPackStateMode(state_table, a, streamingTable, ent_ful,
- CASEFUL);
- }
-
- if (ent_less) {
- fdrPackStateMode(state_table, a, streamingTable, ent_less,
- CASELESS);
- }
- }
-
- setStreamStates(streamingTable, stream_state, state_table);
-}
-
-#endif
hwlm_error_t hwlmExecStreaming(const struct HWLM *t, struct hs_scratch *scratch,
size_t len, size_t start, HWLMCallback cb,
- void *ctxt, hwlm_group_t groups,
- u8 *stream_state) {
+ void *ctxt, hwlm_group_t groups) {
const u8 *hbuf = scratch->core_info.hbuf;
const size_t hlen = scratch->core_info.hlen;
const u8 *buf = scratch->core_info.buf;
DEBUG_PRINTF("using hq accel %hhu\n", t->accel1.accel_type);
aa = &t->accel1;
}
- // if no active stream state, use acceleration
- if (!fdrStreamStateActive(HWLM_C_DATA(t), stream_state)) {
- do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
- }
+ do_accel_streaming(aa, hbuf, hlen, buf, len, &start);
DEBUG_PRINTF("calling frankie (groups=%08llx, start=%zu)\n", groups,
start);
return fdrExecStreaming(HWLM_C_DATA(t), hbuf, hlen, buf, len,
- start, cb, ctxt, groups, stream_state);
+ start, cb, ctxt, groups);
}
}
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
hwlm_error_t hwlmExecStreaming(const struct HWLM *tab,
struct hs_scratch *scratch, size_t len,
size_t start, HWLMCallback callback,
- void *context, hwlm_group_t groups,
- u8 *stream_state);
+ void *context, hwlm_group_t groups);
#ifdef __cplusplus
} /* extern "C" */
if (stream_control) {
assert(stream_control->history_min <= stream_control->history_max);
+
+ // We should not have been passed any literals that are too long to
+ // match with a maximally-sized history buffer.
+ assert(all_of(begin(lits), end(lits), [&](const hwlmLiteral &lit) {
+ return lit.s.length() <= stream_control->history_max + 1;
+ }));
}
// Check that we haven't exceeded the maximum number of literals.
stream_control->literal_history_required = lit.s.length() - 1;
assert(stream_control->literal_history_required
<= stream_control->history_max);
- stream_control->literal_stream_state_required = 0;
}
eng = move(noodle);
} else {
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
/** \brief OUT parameter: History required by the literal matcher to
* correctly match all literals. */
size_t literal_history_required;
-
- /** OUT parameter: Stream state required by literal matcher in bytes. Can
- * be zero, and generally will be small (0-8 bytes). */
- size_t literal_stream_state_required;
};
/** \brief Build an \ref HWLM literal matcher runtime structure for a group of
const vector<u8> &msk_in, const vector<u8> &cmp_in)
: s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in),
groups(groups_in), msk(msk_in), cmp(cmp_in) {
+ assert(s.size() <= HWLM_LITERAL_MAX_LEN);
assert(msk.size() <= HWLM_MASKLEN);
assert(msk.size() == cmp.size());
namespace ue2 {
+/** \brief Max length of the literal passed to HWLM. */
+#define HWLM_LITERAL_MAX_LEN 255
+
/** \brief Max length of the hwlmLiteral::msk and hwlmLiteral::cmp vectors. */
#define HWLM_MASKLEN 8
init_state(t, state);
init_outfixes(t, state);
-
- // Clear the floating matcher state, if any.
- DEBUG_PRINTF("clearing %u bytes of floating matcher state\n",
- t->floatingStreamState);
- memset(getFloatingMatcherState(t, state), 0, t->floatingStreamState);
}
return HWLM_CONTINUE_MATCHING;
}
+static rose_inline
+int roseCheckLongLiteral(const struct RoseEngine *t,
+ const struct hs_scratch *scratch, u64a end,
+ u32 lit_offset, u32 lit_length, char nocase) {
+ const struct core_info *ci = &scratch->core_info;
+ const u8 *lit = getByOffset(t, lit_offset);
+
+ DEBUG_PRINTF("check lit at %llu, length %u\n", end, lit_length);
+ DEBUG_PRINTF("base buf_offset=%llu\n", ci->buf_offset);
+
+ if (end < lit_length) {
+ DEBUG_PRINTF("too short!\n");
+ return 0;
+ }
+
+ // If any portion of the literal matched in the current buffer, check it.
+ if (end > ci->buf_offset) {
+ u32 scan_len = MIN(end - ci->buf_offset, lit_length);
+ u64a scan_start = end - ci->buf_offset - scan_len;
+ DEBUG_PRINTF("checking suffix (%u bytes) in buf[%llu:%llu]\n", scan_len,
+ scan_start, end);
+ if (cmpForward(ci->buf + scan_start, lit + lit_length - scan_len,
+ scan_len, nocase)) {
+ DEBUG_PRINTF("cmp of suffix failed\n");
+ return 0;
+ }
+ }
+
+ // If the entirety of the literal was in the current block, we are done.
+ if (end - lit_length >= ci->buf_offset) {
+ DEBUG_PRINTF("literal confirmed in current block\n");
+ return 1;
+ }
+
+ // We still have a prefix which we must test against the buffer prepared by
+ // the long literal table. This is only done in streaming mode.
+
+ assert(t->mode != HS_MODE_BLOCK);
+
+ const u8 *ll_buf;
+ size_t ll_len;
+ if (nocase) {
+ ll_buf = scratch->tctxt.ll_buf_nocase;
+ ll_len = scratch->tctxt.ll_len_nocase;
+ } else {
+ ll_buf = scratch->tctxt.ll_buf;
+ ll_len = scratch->tctxt.ll_len;
+ }
+
+ assert(ll_buf);
+
+ u64a lit_start_offset = end - lit_length;
+ u32 prefix_len = MIN(lit_length, ci->buf_offset - lit_start_offset);
+ u32 hist_rewind = ci->buf_offset - lit_start_offset;
+ DEBUG_PRINTF("ll_len=%zu, hist_rewind=%u\n", ll_len, hist_rewind);
+ if (hist_rewind > ll_len) {
+ DEBUG_PRINTF("not enough history\n");
+ return 0;
+ }
+
+ DEBUG_PRINTF("check prefix len=%u from hist (len %zu, rewind %u)\n",
+ prefix_len, ll_len, hist_rewind);
+ assert(hist_rewind <= ll_len);
+ if (cmpForward(ll_buf + ll_len - hist_rewind, lit, prefix_len, nocase)) {
+ DEBUG_PRINTF("cmp of prefix failed\n");
+ return 0;
+ }
+
+ DEBUG_PRINTF("cmp succeeded\n");
+ return 1;
+}
+
static
void updateSeqPoint(struct RoseContext *tctxt, u64a offset,
const char from_mpv) {
}
}
PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(CHECK_LONG_LIT) {
+ const char nocase = 0;
+ if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
+ ri->lit_length, nocase)) {
+ DEBUG_PRINTF("halt: failed long lit check\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) {
+ const char nocase = 1;
+ if (!roseCheckLongLiteral(t, scratch, end, ri->lit_offset,
+ ri->lit_length, nocase)) {
+ DEBUG_PRINTF("halt: failed nocase long lit check\n");
+ return HWLM_CONTINUE_MATCHING;
+ }
+ }
+ PROGRAM_NEXT_INSTRUCTION
}
}
#include "rose_build_exclusive.h"
#include "rose_build_groups.h"
#include "rose_build_infix.h"
+#include "rose_build_long_lit.h"
#include "rose_build_lookaround.h"
#include "rose_build_matchers.h"
#include "rose_build_program.h"
#include "rose_build_scatter.h"
#include "rose_build_util.h"
#include "rose_build_width.h"
+#include "rose_internal.h"
#include "rose_program.h"
#include "hwlm/hwlm.h" /* engine types */
+#include "hwlm/hwlm_literal.h"
#include "nfa/castlecompile.h"
#include "nfa/goughcompile.h"
#include "nfa/mcclellancompile.h"
bool has_states = false;
bool checks_groups = false;
bool has_lit_delay = false;
+ bool has_lit_check = false; // long literal support
bool has_anchored = false;
bool has_eod = false;
};
* written to the engine_blob. */
vector<u32> litPrograms;
+ /** \brief List of long literals (ones with CHECK_LITERAL instructions)
+ * that need hash table support. */
+ vector<ue2_case_string> longLiterals;
+
/** \brief Minimum offset of a match from the floating table. */
u32 floatingMinLiteralMatchOffset = 0;
+ /** \brief Long literal length threshold, used in streaming mode. */
+ size_t longLitLengthThreshold = 0;
+
/** \brief Contents of the Rose bytecode immediately following the
* RoseEngine. */
RoseEngineBlob engine_blob;
}
static
-bool isPureFloating(const RoseResources &resources) {
+bool isPureFloating(const RoseResources &resources, const CompileContext &cc) {
if (resources.has_outfixes || resources.has_suffixes ||
resources.has_leftfixes) {
DEBUG_PRINTF("has engines\n");
return false;
}
+ if (cc.streaming && resources.has_lit_check) {
+ DEBUG_PRINTF("has long literals in streaming mode, which needs "
+ "long literal table support\n");
+ return false;
+ }
+
if (resources.checks_groups) {
DEBUG_PRINTF("has group checks\n");
return false;
DEBUG_PRINTF("has_states=%d\n", bc.resources.has_states);
DEBUG_PRINTF("checks_groups=%d\n", bc.resources.checks_groups);
DEBUG_PRINTF("has_lit_delay=%d\n", bc.resources.has_lit_delay);
+ DEBUG_PRINTF("has_lit_check=%d\n", bc.resources.has_lit_check);
DEBUG_PRINTF("has_anchored=%d\n", bc.resources.has_anchored);
DEBUG_PRINTF("has_eod=%d\n", bc.resources.has_eod);
- if (isPureFloating(bc.resources)) {
+ if (isPureFloating(bc.resources, build.cc)) {
return ROSE_RUNTIME_PURE_LITERAL;
}
void fillStateOffsets(const RoseBuildImpl &tbi, u32 rolesWithStateCount,
u32 anchorStateSize, u32 activeArrayCount,
u32 activeLeftCount, u32 laggedRoseCount,
- u32 floatingStreamStateRequired, u32 historyRequired,
+ u32 longLitStreamStateRequired, u32 historyRequired,
RoseStateOffsets *so) {
u32 curr_offset = 0;
so->activeLeftArray_size = mmbit_size(activeLeftCount);
curr_offset += so->activeLeftArray_size;
- so->floatingMatcherState = curr_offset;
- curr_offset += floatingStreamStateRequired;
+ so->longLitState = curr_offset;
+ curr_offset += longLitStreamStateRequired;
// ONE WHOLE BYTE for each active leftfix with lag.
so->leftfixLagTable = curr_offset;
case ROSE_INSTR_PUSH_DELAYED:
resources.has_lit_delay = true;
break;
+ case ROSE_INSTR_CHECK_LONG_LIT:
+ case ROSE_INSTR_CHECK_LONG_LIT_NOCASE:
+ resources.has_lit_check = true;
+ break;
default:
break;
}
}
}
+static
+void recordLongLiterals(build_context &bc, const RoseProgram &program) {
+ for (const auto &ri : program) {
+ if (const auto *ri_check =
+ dynamic_cast<const RoseInstrCheckLongLit *>(ri.get())) {
+ DEBUG_PRINTF("found CHECK_LITERAL for string '%s'\n",
+ escapeString(ri_check->literal).c_str());
+ bc.longLiterals.emplace_back(ri_check->literal, false);
+ continue;
+ }
+ if (const auto *ri_check =
+ dynamic_cast<const RoseInstrCheckLongLitNocase *>(ri.get())) {
+ DEBUG_PRINTF("found CHECK_LITERAL_NOCASE for string '%s'\n",
+ escapeString(ri_check->literal).c_str());
+ bc.longLiterals.emplace_back(ri_check->literal, true);
+ }
+ }
+}
+
static
u32 writeProgram(build_context &bc, RoseProgram &&program) {
if (program.empty()) {
}
recordResources(bc.resources, program);
+ recordLongLiterals(bc, program);
u32 len = 0;
auto prog_bytecode = writeProgram(bc.engine_blob, program, &len);
program.add_before_end(make_unique<RoseInstrCheckLitEarly>(min_offset));
}
+static
+void makeCheckLiteralInstruction(const RoseBuildImpl &build,
+ const build_context &bc, u32 final_id,
+ RoseProgram &program) {
+ const auto &lits = build.final_id_to_literal.at(final_id);
+ if (lits.size() != 1) {
+ // Long literals should not share a final_id.
+ assert(all_of(begin(lits), end(lits), [&](u32 lit_id) {
+ const rose_literal_id &lit = build.literals.right.at(lit_id);
+ return lit.table != ROSE_FLOATING ||
+ lit.s.length() <= bc.longLitLengthThreshold;
+ }));
+ return;
+ }
+
+ u32 lit_id = *lits.begin();
+ if (build.isDelayed(lit_id)) {
+ return;
+ }
+
+ const rose_literal_id &lit = build.literals.right.at(lit_id);
+ if (lit.table != ROSE_FLOATING) {
+ return;
+ }
+ if (lit.s.length() <= bc.longLitLengthThreshold) {
+ return;
+ }
+
+ // Check resource limits as well.
+ if (lit.s.length() > build.cc.grey.limitLiteralLength) {
+ throw ResourceLimitError();
+ }
+
+ unique_ptr<RoseInstruction> ri;
+ if (lit.s.any_nocase()) {
+ ri = make_unique<RoseInstrCheckLongLitNocase>(lit.s.get_string());
+ } else {
+ ri = make_unique<RoseInstrCheckLongLit>(lit.s.get_string());
+ }
+ program.add_before_end(move(ri));
+}
+
static
bool hasDelayedLiteral(RoseBuildImpl &build,
const vector<RoseEdge> &lit_edges) {
DEBUG_PRINTF("final_id %u\n", final_id);
+ // Check long literal info.
+ makeCheckLiteralInstruction(build, bc, final_id, program);
+
// Check lit mask.
makeCheckLitMaskInstruction(build, bc, final_id, program);
return bc.engine_blob.add_iterator(iter);
}
+static
+void allocateFinalIdToSet(RoseBuildImpl &build, const set<u32> &lits,
+ size_t longLitLengthThreshold, u32 *next_final_id) {
+ const auto &g = build.g;
+ auto &literal_info = build.literal_info;
+ auto &final_id_to_literal = build.final_id_to_literal;
+
+ /* We can allocate the same final id to multiple literals of the same type
+ * if they share the same vertex set and trigger the same delayed literal
+ * ids and squash the same roles and have the same group squashing
+ * behaviour. Benefits literals cannot be merged. */
+
+ for (u32 int_id : lits) {
+ rose_literal_info &curr_info = literal_info[int_id];
+ const rose_literal_id &lit = build.literals.right.at(int_id);
+ const auto &verts = curr_info.vertices;
+
+ // Literals with benefits cannot be merged.
+ if (curr_info.requires_benefits) {
+ DEBUG_PRINTF("id %u has benefits\n", int_id);
+ goto assign_new_id;
+ }
+
+ // Long literals (that require CHECK_LITERAL instructions) cannot be
+ // merged.
+ if (lit.s.length() > longLitLengthThreshold) {
+ DEBUG_PRINTF("id %u is a long literal\n", int_id);
+ goto assign_new_id;
+ }
+
+ if (!verts.empty() && curr_info.delayed_ids.empty()) {
+ vector<u32> cand;
+ insert(&cand, cand.end(), g[*verts.begin()].literals);
+ for (auto v : verts) {
+ vector<u32> temp;
+ set_intersection(cand.begin(), cand.end(),
+ g[v].literals.begin(),
+ g[v].literals.end(),
+ inserter(temp, temp.end()));
+ cand.swap(temp);
+ }
+
+ for (u32 cand_id : cand) {
+ if (cand_id >= int_id) {
+ break;
+ }
+
+ const auto &cand_info = literal_info[cand_id];
+ const auto &cand_lit = build.literals.right.at(cand_id);
+
+ if (cand_lit.s.length() > longLitLengthThreshold) {
+ continue;
+ }
+
+ if (cand_info.requires_benefits) {
+ continue;
+ }
+
+ if (!cand_info.delayed_ids.empty()) {
+ /* TODO: allow cases where delayed ids are equivalent.
+ * This is awkward currently as the have not had their
+ * final ids allocated yet */
+ continue;
+ }
+
+ if (lits.find(cand_id) == lits.end()
+ || cand_info.vertices.size() != verts.size()
+ || cand_info.squash_group != curr_info.squash_group) {
+ continue;
+ }
+
+ /* if we are squashing groups we need to check if they are the
+ * same group */
+ if (cand_info.squash_group
+ && cand_info.group_mask != curr_info.group_mask) {
+ continue;
+ }
+
+ u32 final_id = cand_info.final_id;
+ assert(final_id != MO_INVALID_IDX);
+ assert(curr_info.final_id == MO_INVALID_IDX);
+ curr_info.final_id = final_id;
+ final_id_to_literal[final_id].insert(int_id);
+ goto next_lit;
+ }
+ }
+
+ assign_new_id:
+ /* oh well, have to give it a fresh one, hang the expense */
+ DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id);
+ assert(curr_info.final_id == MO_INVALID_IDX);
+ curr_info.final_id = *next_final_id;
+ final_id_to_literal[*next_final_id].insert(int_id);
+ (*next_final_id)++;
+ next_lit:;
+ }
+}
+
+static
+bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) {
+ assert(lit_id < build.literal_info.size());
+ const auto &info = build.literal_info[lit_id];
+ if (!info.vertices.empty()) {
+ return true;
+ }
+
+ for (const u32 &delayed_id : info.delayed_ids) {
+ assert(delayed_id < build.literal_info.size());
+ const rose_literal_info &delayed_info = build.literal_info[delayed_id];
+ if (!delayed_info.vertices.empty()) {
+ return true;
+ }
+ }
+
+ DEBUG_PRINTF("literal %u has no refs\n", lit_id);
+ return false;
+}
+
+/** \brief Allocate final literal IDs for all literals. */
+static
+void allocateFinalLiteralId(RoseBuildImpl &build,
+ size_t longLitLengthThreshold) {
+ set<u32> anch;
+ set<u32> norm;
+ set<u32> delay;
+
+ /* undelayed ids come first */
+ assert(build.final_id_to_literal.empty());
+ u32 next_final_id = 0;
+ for (u32 i = 0; i < build.literal_info.size(); i++) {
+ assert(!build.hasFinalId(i));
+
+ if (!isUsedLiteral(build, i)) {
+ /* what is this literal good for? absolutely nothing */
+ continue;
+ }
+
+ // The special EOD event literal has its own program and does not need
+ // a real literal ID.
+ if (i == build.eod_event_literal_id) {
+ assert(build.eod_event_literal_id != MO_INVALID_IDX);
+ continue;
+ }
+
+ if (build.isDelayed(i)) {
+ assert(!build.literal_info[i].requires_benefits);
+ delay.insert(i);
+ } else if (build.literals.right.at(i).table == ROSE_ANCHORED) {
+ anch.insert(i);
+ } else {
+ norm.insert(i);
+ }
+ }
+
+ /* normal lits */
+ allocateFinalIdToSet(build, norm, longLitLengthThreshold, &next_final_id);
+
+ /* next anchored stuff */
+ build.anchored_base_id = next_final_id;
+ allocateFinalIdToSet(build, anch, longLitLengthThreshold, &next_final_id);
+
+ /* delayed ids come last */
+ build.delay_base_id = next_final_id;
+ allocateFinalIdToSet(build, delay, longLitLengthThreshold, &next_final_id);
+}
+
static
aligned_unique_ptr<RoseEngine> addSmallWriteEngine(RoseBuildImpl &build,
aligned_unique_ptr<RoseEngine> rose) {
return rose2;
}
+/**
+ * \brief Returns the pair (number of literals, max length) for all real
+ * literals in the floating table that are in-use.
+ */
+static
+pair<size_t, size_t> floatingCountAndMaxLen(const RoseBuildImpl &build) {
+ size_t num = 0;
+ size_t max_len = 0;
+
+ for (const auto &e : build.literals.right) {
+ const u32 id = e.first;
+ const rose_literal_id &lit = e.second;
+
+ if (lit.table != ROSE_FLOATING) {
+ continue;
+ }
+ if (lit.delay) {
+ // Skip delayed literals, so that we only count the undelayed
+ // version that ends up in the HWLM table.
+ continue;
+ }
+ if (!isUsedLiteral(build, id)) {
+ continue;
+ }
+
+ num++;
+ max_len = max(max_len, lit.s.length());
+ }
+ DEBUG_PRINTF("%zu floating literals with max_len=%zu\n", num, max_len);
+ return {num, max_len};
+}
+
+size_t calcLongLitThreshold(const RoseBuildImpl &build,
+ const size_t historyRequired) {
+ const auto &cc = build.cc;
+
+ // In block mode, we should only use the long literal support for literals
+ // that cannot be handled by HWLM.
+ if (!cc.streaming) {
+ return HWLM_LITERAL_MAX_LEN;
+ }
+
+ size_t longLitLengthThreshold = ROSE_LONG_LITERAL_THRESHOLD_MIN;
+
+ // Expand to size of history we've already allocated. Note that we need N-1
+ // bytes of history to match a literal of length N.
+ longLitLengthThreshold = max(longLitLengthThreshold, historyRequired + 1);
+
+ // If we only have one literal, allow for a larger value in order to avoid
+ // building a long literal table for a trivial Noodle case that we could
+ // fit in history.
+ const auto num_len = floatingCountAndMaxLen(build);
+ if (num_len.first == 1) {
+ if (num_len.second > longLitLengthThreshold) {
+ DEBUG_PRINTF("expanding for single literal of length %zu\n",
+ num_len.second);
+ longLitLengthThreshold = num_len.second;
+ }
+ }
+
+ // Clamp to max history available.
+ longLitLengthThreshold =
+ min(longLitLengthThreshold, size_t{cc.grey.maxHistoryAvailable} + 1);
+
+ return longLitLengthThreshold;
+}
+
aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
DerivedBoundaryReports dboundary(boundary);
size_t historyRequired = calcHistoryRequired(); // Updated by HWLM.
+ size_t longLitLengthThreshold = calcLongLitThreshold(*this,
+ historyRequired);
+ DEBUG_PRINTF("longLitLengthThreshold=%zu\n", longLitLengthThreshold);
+
+ allocateFinalLiteralId(*this, longLitLengthThreshold);
auto anchored_dfas = buildAnchoredDfas(*this);
build_context bc;
bc.floatingMinLiteralMatchOffset =
findMinFloatingLiteralMatch(*this, anchored_dfas);
+ bc.longLitLengthThreshold = longLitLengthThreshold;
bc.needs_catchup = needsCatchup(*this, anchored_dfas);
recordResources(bc.resources, *this);
if (!anchored_dfas.empty()) {
u32 eodProgramOffset = writeEodProgram(*this, bc, eodNfaIterOffset);
+ size_t longLitStreamStateRequired = 0;
+ u32 longLitTableOffset = buildLongLiteralTable(*this, bc.engine_blob,
+ bc.longLiterals, longLitLengthThreshold, &historyRequired,
+ &longLitStreamStateRequired);
+
vector<mmbit_sparse_iter> activeLeftIter;
buildActiveLeftIter(leftInfoTable, activeLeftIter);
// Build floating HWLM matcher.
rose_group fgroups = 0;
size_t fsize = 0;
- size_t floatingStreamStateRequired = 0;
- auto ftable = buildFloatingMatcher(*this, &fgroups, &fsize, &historyRequired,
- &floatingStreamStateRequired);
+ auto ftable = buildFloatingMatcher(*this, bc.longLitLengthThreshold,
+ &fgroups, &fsize, &historyRequired);
u32 fmatcherOffset = 0;
if (ftable) {
currOffset = ROUNDUP_CL(currOffset);
memset(&stateOffsets, 0, sizeof(stateOffsets));
fillStateOffsets(*this, bc.numStates, anchorStateSize,
activeArrayCount, activeLeftCount, laggedRoseCount,
- floatingStreamStateRequired, historyRequired,
+ longLitStreamStateRequired, historyRequired,
&stateOffsets);
scatter_plan_raw state_scatter;
engine->ematcherOffset = ematcherOffset;
engine->sbmatcherOffset = sbmatcherOffset;
engine->fmatcherOffset = fmatcherOffset;
+ engine->longLitTableOffset = longLitTableOffset;
engine->amatcherMinWidth = findMinWidth(*this, ROSE_ANCHORED);
engine->fmatcherMinWidth = findMinWidth(*this, ROSE_FLOATING);
engine->eodmatcherMinWidth = findMinWidth(*this, ROSE_EOD_ANCHORED);
engine->totalNumLiterals = verify_u32(literal_info.size());
engine->asize = verify_u32(asize);
engine->ematcherRegionSize = ematcher_region_size;
- engine->floatingStreamState = verify_u32(floatingStreamStateRequired);
+ engine->longLitStreamState = verify_u32(longLitStreamStateRequired);
engine->boundary.reportEodOffset = boundary_out.reportEodOffset;
engine->boundary.reportZeroOffset = boundary_out.reportZeroOffset;
#define ANCHORED_REHOME_DEEP 25
#define ANCHORED_REHOME_SHORT_LEN 3
-#ifdef DEBUG
-static UNUSED
-void printLitInfo(const rose_literal_info &li, u32 id) {
- DEBUG_PRINTF("lit_info %u\n", id);
- DEBUG_PRINTF(" parent %u%s", li.undelayed_id,
- li.delayed_ids.empty() ? "":", children:");
- for (u32 d_id : li.delayed_ids) {
- printf(" %u", d_id);
- }
- printf("\n");
- DEBUG_PRINTF(" group %llu %s\n", li.group_mask, li.squash_group ? "s":"");
-}
-#endif
-
-static
-void allocateFinalIdToSet(const RoseGraph &g, const set<u32> &lits,
- deque<rose_literal_info> *literal_info,
- map<u32, set<u32> > *final_id_to_literal,
- u32 *next_final_id) {
- /* We can allocate the same final id to multiple literals of the same type
- * if they share the same vertex set and trigger the same delayed literal
- * ids and squash the same roles and have the same group squashing
- * behaviour. Benefits literals cannot be merged. */
-
- for (u32 int_id : lits) {
- rose_literal_info &curr_info = (*literal_info)[int_id];
- const auto &verts = curr_info.vertices;
-
- if (!verts.empty() && !curr_info.requires_benefits
- && curr_info.delayed_ids.empty()) {
- vector<u32> cand;
- insert(&cand, cand.end(), g[*verts.begin()].literals);
- for (auto v : verts) {
- vector<u32> temp;
- set_intersection(cand.begin(), cand.end(),
- g[v].literals.begin(),
- g[v].literals.end(),
- inserter(temp, temp.end()));
- cand.swap(temp);
- }
-
- for (u32 cand_id : cand) {
- if (cand_id >= int_id) {
- break;
- }
-
- const rose_literal_info &cand_info = (*literal_info)[cand_id];
-
- if (cand_info.requires_benefits) {
- continue;
- }
-
- if (!cand_info.delayed_ids.empty()) {
- /* TODO: allow cases where delayed ids are equivalent.
- * This is awkward currently as the have not had their
- * final ids allocated yet */
- continue;
- }
-
- if (lits.find(cand_id) == lits.end()
- || cand_info.vertices.size() != verts.size()
- || cand_info.squash_group != curr_info.squash_group) {
- continue;
- }
-
- /* if we are squashing groups we need to check if they are the
- * same group */
- if (cand_info.squash_group
- && cand_info.group_mask != curr_info.group_mask) {
- continue;
- }
-
- u32 final_id = cand_info.final_id;
- assert(final_id != MO_INVALID_IDX);
- assert(curr_info.final_id == MO_INVALID_IDX);
- curr_info.final_id = final_id;
- (*final_id_to_literal)[final_id].insert(int_id);
- goto next_lit;
- }
- }
-
- /* oh well, have to give it a fresh one, hang the expense */
- DEBUG_PRINTF("allocating final id %u to %u\n", *next_final_id, int_id);
- assert(curr_info.final_id == MO_INVALID_IDX);
- curr_info.final_id = *next_final_id;
- (*final_id_to_literal)[*next_final_id].insert(int_id);
- (*next_final_id)++;
- next_lit:;
- }
-}
-
-static
-bool isUsedLiteral(const RoseBuildImpl &build, u32 lit_id) {
- assert(lit_id < build.literal_info.size());
- const auto &info = build.literal_info[lit_id];
- if (!info.vertices.empty()) {
- return true;
- }
-
- for (const u32 &delayed_id : info.delayed_ids) {
- assert(delayed_id < build.literal_info.size());
- const rose_literal_info &delayed_info = build.literal_info[delayed_id];
- if (!delayed_info.vertices.empty()) {
- return true;
- }
- }
-
- DEBUG_PRINTF("literal %u has no refs\n", lit_id);
- return false;
-}
-
-/** \brief Allocate final literal IDs for all literals.
- *
- * These are the literal ids used in the bytecode.
- */
-static
-void allocateFinalLiteralId(RoseBuildImpl &tbi) {
- RoseGraph &g = tbi.g;
-
- set<u32> anch;
- set<u32> norm;
- set<u32> delay;
-
- /* undelayed ids come first */
- assert(tbi.final_id_to_literal.empty());
- u32 next_final_id = 0;
- for (u32 i = 0; i < tbi.literal_info.size(); i++) {
- assert(!tbi.hasFinalId(i));
-
- if (!isUsedLiteral(tbi, i)) {
- /* what is this literal good for? absolutely nothing */
- continue;
- }
-
- // The special EOD event literal has its own program and does not need
- // a real literal ID.
- if (i == tbi.eod_event_literal_id) {
- assert(tbi.eod_event_literal_id != MO_INVALID_IDX);
- continue;
- }
-
- if (tbi.isDelayed(i)) {
- assert(!tbi.literal_info[i].requires_benefits);
- delay.insert(i);
- } else if (tbi.literals.right.at(i).table == ROSE_ANCHORED) {
- anch.insert(i);
- } else {
- norm.insert(i);
- }
- }
-
- /* normal lits */
- allocateFinalIdToSet(g, norm, &tbi.literal_info, &tbi.final_id_to_literal,
- &next_final_id);
-
- /* next anchored stuff */
- tbi.anchored_base_id = next_final_id;
- allocateFinalIdToSet(g, anch, &tbi.literal_info, &tbi.final_id_to_literal,
- &next_final_id);
-
- /* delayed ids come last */
- tbi.delay_base_id = next_final_id;
- allocateFinalIdToSet(g, delay, &tbi.literal_info, &tbi.final_id_to_literal,
- &next_final_id);
-}
-
#define MAX_EXPLOSION_NC 3
static
bool limited_explosion(const ue2_literal &s) {
continue;
}
- if (limited_explosion(lit.s)) {
+ // We don't want to explode long literals, as they require confirmation
+ // with a CHECK_LITERAL instruction and need unique final_ids.
+ // TODO: we could allow explosion for literals where the prefixes
+ // covered by CHECK_LITERAL are identical.
+ if (lit.s.length() <= ROSE_LONG_LITERAL_THRESHOLD_MIN &&
+ limited_explosion(lit.s)) {
DEBUG_PRINTF("need to explode existing string '%s'\n",
dumpString(lit.s).c_str());
literal_info[id].requires_explode = true;
/* final prep work */
remapCastleTops(*this);
- allocateFinalLiteralId(*this);
inspectRoseTops(*this);
buildRoseSquashMasks(*this);
static
void dumpRoseTestLiterals(const RoseBuildImpl &build, const string &base) {
- auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED);
+ size_t historyRequired = build.calcHistoryRequired();
+ size_t longLitLengthThreshold =
+ calcLongLitThreshold(build, historyRequired);
+
+ auto lits = fillHamsterLiteralList(build, ROSE_ANCHORED,
+ longLitLengthThreshold);
dumpTestLiterals(base + "rose_anchored_test_literals.txt", lits);
- lits = fillHamsterLiteralList(build, ROSE_FLOATING);
+ lits = fillHamsterLiteralList(build, ROSE_FLOATING, longLitLengthThreshold);
dumpTestLiterals(base + "rose_float_test_literals.txt", lits);
- lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED);
+ lits = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED,
+ build.ematcher_region_size);
dumpTestLiterals(base + "rose_eod_test_literals.txt", lits);
if (!build.cc.streaming) {
lits = fillHamsterLiteralList(build, ROSE_FLOATING,
- ROSE_SMALL_BLOCK_LEN);
+ ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
auto lits2 = fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK,
- ROSE_SMALL_BLOCK_LEN);
+ ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
lits.insert(end(lits), begin(lits2), end(lits2));
dumpTestLiterals(base + "rose_smallblock_test_literals.txt", lits);
}
#define ROSE_GROUPS_MAX 64
+#define ROSE_LONG_LITERAL_THRESHOLD_MIN 33
+
struct BoundaryReports;
struct CastleProto;
struct CompileContext;
ReportID next_nfa_report;
};
+size_t calcLongLitThreshold(const RoseBuildImpl &build,
+ const size_t historyRequired);
+
// Free functions, in rose_build_misc.cpp
bool hasAnchHistorySucc(const RoseGraph &g, RoseVertex v);
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "rose_build_long_lit.h"
+
+#include "rose_build_engine_blob.h"
+#include "rose_build_impl.h"
+#include "stream_long_lit_hash.h"
+#include "util/alloc.h"
+#include "util/bitutils.h"
+#include "util/verify_types.h"
+#include "util/compile_context.h"
+
+using namespace std;
+
+namespace ue2 {
+
+/** \brief Minimum size for a non-empty hash table. */
+static constexpr u32 MIN_HASH_TABLE_SIZE = 4096;
+
+struct LongLitModeInfo {
+ u32 boundary = 0; //!< One above the largest index for this mode.
+ u32 positions = 0; //!< Total number of string positions.
+ u32 hashEntries = 0; //!< Number of hash table entries.
+};
+
+struct LongLitInfo {
+ LongLitModeInfo caseful;
+ LongLitModeInfo nocase;
+};
+
+static
+u32 roundUpToPowerOfTwo(u32 x) {
+ assert(x != 0);
+ u32 bits = lg2(x - 1) + 1;
+ assert(bits < 32);
+ return 1U << bits;
+}
+
+static
+LongLitInfo analyzeLongLits(const vector<ue2_case_string> &lits,
+ size_t max_len) {
+ LongLitInfo info;
+ u32 hashedPositionsCase = 0;
+ u32 hashedPositionsNocase = 0;
+
+ // Caseful boundary is the index of the first nocase literal, as we're
+ // ordered (caseful, nocase).
+ auto first_nocase = find_if(begin(lits), end(lits),
+ [](const ue2_case_string &lit) { return lit.nocase; });
+ info.caseful.boundary = verify_u32(distance(lits.begin(), first_nocase));
+
+ // Nocase boundary is the size of the literal set.
+ info.nocase.boundary = verify_u32(lits.size());
+
+ for (const auto &lit : lits) {
+ if (lit.nocase) {
+ hashedPositionsNocase += lit.s.size() - max_len;
+ info.nocase.positions += lit.s.size();
+ } else {
+ hashedPositionsCase += lit.s.size() - max_len;
+ info.caseful.positions += lit.s.size();
+ }
+ }
+
+ info.caseful.hashEntries = hashedPositionsCase
+ ? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsCase))
+ : 0;
+ info.nocase.hashEntries = hashedPositionsNocase
+ ? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsNocase))
+ : 0;
+
+ DEBUG_PRINTF("caseful: boundary=%u, positions=%u, hashedPositions=%u, "
+ "hashEntries=%u\n",
+ info.caseful.boundary, info.caseful.positions,
+ hashedPositionsCase, info.caseful.hashEntries);
+ DEBUG_PRINTF("nocase: boundary=%u, positions=%u, hashedPositions=%u, "
+ "hashEntries=%u\n",
+ info.nocase.boundary, info.nocase.positions,
+ hashedPositionsNocase, info.nocase.hashEntries);
+
+ return info;
+}
+
+static
+void fillHashes(const vector<ue2_case_string> &lits, size_t max_len,
+ RoseLongLitHashEntry *tab, size_t numEntries, bool nocase,
+ const map<u32, u32> &litToOffsetVal) {
+ const u32 nbits = lg2(numEntries);
+ map<u32, deque<pair<u32, u32>>> bucketToLitOffPairs;
+ map<u32, u64a> bucketToBitfield;
+
+ for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) {
+ const ue2_case_string &lit = lits[lit_id];
+ if (nocase != lit.nocase) {
+ continue;
+ }
+ for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) {
+ const u8 *substr = (const u8 *)lit.s.c_str() + offset;
+ u32 h = hashLongLiteral(substr, max_len, lit.nocase);
+ u32 h_ent = h & ((1U << nbits) - 1);
+ u32 h_low = (h >> nbits) & 63;
+ bucketToLitOffPairs[h_ent].emplace_back(lit_id, offset);
+ bucketToBitfield[h_ent] |= (1ULL << h_low);
+ }
+ }
+
+ // this used to be a set<u32>, but a bitset is much much faster given that
+ // we're using it only for membership testing.
+ boost::dynamic_bitset<> filledBuckets(numEntries); // all zero by default.
+
+ // sweep out bitfield entries and save the results swapped accordingly
+ // also, anything with bitfield entries is put in filledBuckets
+ for (const auto &m : bucketToBitfield) {
+ const u32 &bucket = m.first;
+ const u64a &contents = m.second;
+ tab[bucket].bitfield = contents;
+ filledBuckets.set(bucket);
+ }
+
+ // store out all our chains based on free values in our hash table.
+ // find nearest free locations that are empty (there will always be more
+ // entries than strings, at present)
+ for (auto &m : bucketToLitOffPairs) {
+ u32 bucket = m.first;
+ deque<pair<u32, u32>> &d = m.second;
+
+ // sort d by distance of the residual string (len minus our depth into
+ // the string). We need to put the 'furthest back' string first...
+ stable_sort(d.begin(), d.end(),
+ [](const pair<u32, u32> &a, const pair<u32, u32> &b) {
+ if (a.second != b.second) {
+ return a.second > b.second; /* longest is first */
+ }
+ return a.first < b.first;
+ });
+
+ while (1) {
+ // first time through is always at bucket, then we fill in links
+ filledBuckets.set(bucket);
+ RoseLongLitHashEntry *ent = &tab[bucket];
+ u32 lit_id = d.front().first;
+ u32 offset = d.front().second;
+
+ ent->state = verify_u32(litToOffsetVal.at(lit_id) +
+ offset + max_len);
+ ent->link = (u32)LINK_INVALID;
+
+ d.pop_front();
+ if (d.empty()) {
+ break;
+ }
+ // now, if there is another value
+ // find a bucket for it and put in 'bucket' and repeat
+ // all we really need to do is find something not in filledBuckets,
+ // ideally something close to bucket
+ // we search backward and forward from bucket, trying to stay as
+ // close as possible.
+ UNUSED bool found = false;
+ int bucket_candidate = 0;
+ for (u32 k = 1; k < numEntries * 2; k++) {
+ bucket_candidate = bucket + (((k & 1) == 0)
+ ? (-(int)k / 2) : (k / 2));
+ if (bucket_candidate < 0 ||
+ (size_t)bucket_candidate >= numEntries) {
+ continue;
+ }
+ if (!filledBuckets.test(bucket_candidate)) {
+ found = true;
+ break;
+ }
+ }
+
+ assert(found);
+ bucket = bucket_candidate;
+ ent->link = bucket;
+ }
+ }
+}
+
+u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob,
+ vector<ue2_case_string> &lits,
+ size_t longLitLengthThreshold,
+ size_t *historyRequired,
+ size_t *longLitStreamStateRequired) {
+ // Work in terms of history requirement (i.e. literal len - 1).
+ const size_t max_len = longLitLengthThreshold - 1;
+
+ // We should only be building the long literal hash table in streaming mode.
+ if (!build.cc.streaming) {
+ return 0;
+ }
+
+ if (lits.empty()) {
+ DEBUG_PRINTF("no long literals\n");
+ return 0;
+ }
+
+ // The last char of each literal is trimmed as we're not interested in full
+ // matches, only partial matches.
+ for (auto &lit : lits) {
+ assert(!lit.s.empty());
+ lit.s.pop_back();
+ }
+
+ // Sort by caseful/caseless and in lexicographical order.
+ stable_sort(begin(lits), end(lits), [](const ue2_case_string &a,
+ const ue2_case_string &b) {
+ if (a.nocase != b.nocase) {
+ return a.nocase < b.nocase;
+ }
+ return a.s < b.s;
+ });
+
+ // Find literals that are prefixes of other literals (including
+ // duplicates). Note that we iterate in reverse, since we want to retain
+ // only the longest string from a set of prefixes.
+ auto it = unique(lits.rbegin(), lits.rend(), [](const ue2_case_string &a,
+ const ue2_case_string &b) {
+ return a.nocase == b.nocase && a.s.size() >= b.s.size() &&
+ equal(b.s.begin(), b.s.end(), a.s.begin());
+ });
+
+ // Erase dupes found by unique().
+ lits.erase(lits.begin(), it.base());
+
+ LongLitInfo info = analyzeLongLits(lits, max_len);
+
+ // first assess the size and find our caseless threshold
+ size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable));
+
+ size_t litTabOffset = headerSize;
+
+ size_t litTabNumEntries = lits.size() + 1;
+ size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(RoseLongLiteral));
+
+ size_t wholeLitTabOffset = litTabOffset + litTabSize;
+ size_t totalWholeLitTabSize =
+ ROUNDUP_16(info.caseful.positions + info.nocase.positions);
+
+ size_t htOffsetCase = wholeLitTabOffset + totalWholeLitTabSize;
+ size_t htSizeCase = info.caseful.hashEntries * sizeof(RoseLongLitHashEntry);
+ size_t htOffsetNocase = htOffsetCase + htSizeCase;
+ size_t htSizeNocase =
+ info.nocase.hashEntries * sizeof(RoseLongLitHashEntry);
+
+ size_t tabSize = ROUNDUP_16(htOffsetNocase + htSizeNocase);
+
+ // need to add +2 to both of these to allow space for the actual largest
+ // value as well as handling the fact that we add one to the space when
+ // storing out a position to allow zero to mean "no stream state value"
+ u8 streamBitsCase = lg2(roundUpToPowerOfTwo(info.caseful.positions + 2));
+ u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(info.nocase.positions + 2));
+ u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8;
+
+ auto table = aligned_zmalloc_unique<char>(tabSize);
+ assert(table); // otherwise would have thrown std::bad_alloc
+
+ // then fill it in
+ char *ptr = table.get();
+ RoseLongLitTable *header = (RoseLongLitTable *)ptr;
+ // fill in header
+ header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
+ header->boundaryCase = info.caseful.boundary;
+ header->hashOffsetCase = verify_u32(htOffsetCase);
+ header->hashNBitsCase = lg2(info.caseful.hashEntries);
+ header->streamStateBitsCase = streamBitsCase;
+ header->boundaryNocase = info.nocase.boundary;
+ header->hashOffsetNocase = verify_u32(htOffsetNocase);
+ header->hashNBitsNocase = lg2(info.nocase.hashEntries);
+ header->streamStateBitsNocase = streamBitsNocase;
+ assert(tot_state_bytes < sizeof(u64a));
+ header->streamStateBytes = verify_u8(tot_state_bytes); // u8
+
+ ptr += headerSize;
+
+ // now fill in the rest
+
+ RoseLongLiteral *litTabPtr = (RoseLongLiteral *)ptr;
+ ptr += litTabSize;
+
+ map<u32, u32> litToOffsetVal;
+ for (auto i = lits.begin(), e = lits.end(); i != e; ++i) {
+ u32 entry = verify_u32(i - lits.begin());
+ u32 offset = verify_u32(ptr - table.get());
+
+ // point the table entry to the string location
+ litTabPtr[entry].offset = offset;
+
+ litToOffsetVal[entry] = offset;
+
+ // copy the string into the string location
+ const auto &s = i->s;
+ memcpy(ptr, s.c_str(), s.size());
+
+ ptr += s.size(); // and the string location
+ }
+
+ // fill in final lit table entry with current ptr (serves as end value)
+ litTabPtr[lits.size()].offset = verify_u32(ptr - table.get());
+
+ // fill hash tables
+ ptr = table.get() + htOffsetCase;
+ fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr,
+ info.caseful.hashEntries, false, litToOffsetVal);
+ ptr += htSizeCase;
+ fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr,
+ info.nocase.hashEntries, true, litToOffsetVal);
+ ptr += htSizeNocase;
+
+ assert(ptr <= table.get() + tabSize);
+
+ DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize);
+ DEBUG_PRINTF("requires %zu bytes of history\n", max_len);
+ DEBUG_PRINTF("requires %u bytes of stream state\n", tot_state_bytes);
+
+ *historyRequired = max(*historyRequired, max_len);
+ *longLitStreamStateRequired = tot_state_bytes;
+
+ return blob.add(table.get(), tabSize, 16);
+}
+
+} // namespace ue2
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ROSE_BUILD_LONG_LIT_H
+#define ROSE_BUILD_LONG_LIT_H
+
+#include "ue2common.h"
+
+#include <vector>
+
+namespace ue2 {
+
+class RoseBuildImpl;
+class RoseEngineBlob;
+struct ue2_case_string;
+
+u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob,
+ std::vector<ue2_case_string> &lits,
+ size_t longLitLengthThreshold,
+ size_t *historyRequired,
+ size_t *longLitStreamStateRequired);
+
+} // namespace ue2
+
+
+#endif // ROSE_BUILD_LONG_LIT_H
static
bool isNoRunsLiteral(const RoseBuildImpl &build, const u32 id,
- const rose_literal_info &info) {
+ const rose_literal_info &info, const size_t max_len) {
DEBUG_PRINTF("lit id %u\n", id);
if (info.requires_benefits) {
return false;
}
+ if (build.literals.right.at(id).s.length() > max_len) {
+ DEBUG_PRINTF("requires literal check\n");
+ return false;
+ }
+
if (isDirectHighlander(build, id, info)) {
DEBUG_PRINTF("highlander direct report\n");
return true;
vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
rose_literal_table table,
- u32 max_offset) {
+ size_t max_len, u32 max_offset) {
vector<hwlmLiteral> lits;
for (const auto &e : build.literals.right) {
const vector<u8> &msk = e.second.msk;
const vector<u8> &cmp = e.second.cmp;
- bool noruns = isNoRunsLiteral(build, id, info);
+ bool noruns = isNoRunsLiteral(build, id, info, max_len);
if (info.requires_explode) {
DEBUG_PRINTF("exploding lit\n");
+
+ // We do not require_explode for long literals.
+ assert(lit.length() <= max_len);
+
case_iter cit = caseIterateBegin(lit);
case_iter cite = caseIterateEnd();
for (; cit != cite; ++cit) {
msk, cmp);
}
} else {
- const std::string &s = lit.get_string();
- const bool nocase = lit.any_nocase();
+ string s = lit.get_string();
+ bool nocase = lit.any_nocase();
DEBUG_PRINTF("id=%u, s='%s', nocase=%d, noruns=%d, msk=%s, "
"cmp=%s\n",
final_id, escapeString(s).c_str(), (int)nocase, noruns,
dumpMask(msk).c_str(), dumpMask(cmp).c_str());
+ if (s.length() > max_len) {
+ DEBUG_PRINTF("truncating to tail of length %zu\n", max_len);
+ s.erase(0, s.length() - max_len);
+ // We shouldn't have set a threshold below 8 chars.
+ assert(msk.size() <= max_len);
+ }
+
if (!maskIsConsistent(s, nocase, msk, cmp)) {
DEBUG_PRINTF("msk/cmp for literal can't match, skipping\n");
continue;
}
- lits.emplace_back(s, nocase, noruns, final_id, groups, msk, cmp);
+ lits.emplace_back(move(s), nocase, noruns, final_id, groups, msk,
+ cmp);
}
}
}
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
+ size_t longLitLengthThreshold,
rose_group *fgroups,
size_t *fsize,
- size_t *historyRequired,
- size_t *streamStateRequired) {
+ size_t *historyRequired) {
*fsize = 0;
*fgroups = 0;
- auto fl = fillHamsterLiteralList(build, ROSE_FLOATING);
+ auto fl = fillHamsterLiteralList(build, ROSE_FLOATING,
+ longLitLengthThreshold);
if (fl.empty()) {
DEBUG_PRINTF("empty floating matcher\n");
return nullptr;
if (build.cc.streaming) {
DEBUG_PRINTF("literal_history_required=%zu\n",
ctl.literal_history_required);
- DEBUG_PRINTF("literal_stream_state_required=%zu\n",
- ctl.literal_stream_state_required);
assert(ctl.literal_history_required <=
build.cc.grey.maxHistoryAvailable);
*historyRequired = max(*historyRequired,
ctl.literal_history_required);
- *streamStateRequired = ctl.literal_stream_state_required;
}
*fsize = hwlmSize(ftable.get());
return nullptr;
}
- auto lits = fillHamsterLiteralList(build, ROSE_FLOATING,
- ROSE_SMALL_BLOCK_LEN);
+ auto lits = fillHamsterLiteralList(
+ build, ROSE_FLOATING, ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
if (lits.empty()) {
DEBUG_PRINTF("no floating table\n");
return nullptr;
return nullptr;
}
- auto anchored_lits = fillHamsterLiteralList(build,
- ROSE_ANCHORED_SMALL_BLOCK, ROSE_SMALL_BLOCK_LEN);
+ auto anchored_lits =
+ fillHamsterLiteralList(build, ROSE_ANCHORED_SMALL_BLOCK,
+ ROSE_SMALL_BLOCK_LEN, ROSE_SMALL_BLOCK_LEN);
if (anchored_lits.empty()) {
DEBUG_PRINTF("no small-block anchored literals\n");
return nullptr;
size_t *esize) {
*esize = 0;
- auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED);
+ auto el = fillHamsterLiteralList(build, ROSE_EOD_ANCHORED,
+ build.ematcher_region_size);
if (el.empty()) {
DEBUG_PRINTF("no eod anchored literals\n");
* only lead to a pattern match after max_offset may be excluded.
*/
std::vector<hwlmLiteral> fillHamsterLiteralList(const RoseBuildImpl &build,
- rose_literal_table table, u32 max_offset = ROSE_BOUND_INF);
+ rose_literal_table table, size_t max_len,
+ u32 max_offset = ROSE_BOUND_INF);
aligned_unique_ptr<HWLM> buildFloatingMatcher(const RoseBuildImpl &build,
+ size_t longLitLengthThreshold,
rose_group *fgroups,
size_t *fsize,
- size_t *historyRequired,
- size_t *streamStateRequired);
+ size_t *historyRequired);
aligned_unique_ptr<HWLM> buildSmallBlockMatcher(const RoseBuildImpl &build,
size_t *sbsize);
inst->iter_offset = iter_offset;
}
+void RoseInstrCheckLongLit::write(void *dest, RoseEngineBlob &blob,
+ const OffsetMap &offset_map) const {
+ RoseInstrBase::write(dest, blob, offset_map);
+ auto *inst = static_cast<impl_type *>(dest);
+ assert(!literal.empty());
+ inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
+ inst->lit_length = verify_u32(literal.size());
+}
+
+void RoseInstrCheckLongLitNocase::write(void *dest, RoseEngineBlob &blob,
+ const OffsetMap &offset_map) const {
+ RoseInstrBase::write(dest, blob, offset_map);
+ auto *inst = static_cast<impl_type *>(dest);
+ assert(!literal.empty());
+ inst->lit_offset = blob.add(literal.c_str(), literal.size(), 1);
+ inst->lit_length = verify_u32(literal.size());
+}
+
static
OffsetMap makeOffsetMap(const RoseProgram &program, u32 *total_len) {
OffsetMap offset_map;
#include "util/hash.h"
#include "util/make_unique.h"
#include "util/ue2_containers.h"
+#include "util/ue2string.h"
#include <algorithm>
#include <array>
~RoseInstrMatcherEod() override;
};
+class RoseInstrCheckLongLit
+ : public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_LONG_LIT,
+ ROSE_STRUCT_CHECK_LONG_LIT,
+ RoseInstrCheckLongLit> {
+public:
+ std::string literal;
+
+ RoseInstrCheckLongLit(std::string literal_in)
+ : literal(std::move(literal_in)) {}
+
+ bool operator==(const RoseInstrCheckLongLit &ri) const {
+ return literal == ri.literal;
+ }
+
+ size_t hash() const override {
+ return hash_all(static_cast<int>(opcode), literal);
+ }
+
+ void write(void *dest, RoseEngineBlob &blob,
+ const OffsetMap &offset_map) const override;
+
+ bool equiv_to(const RoseInstrCheckLongLit &ri, const OffsetMap &,
+ const OffsetMap &) const {
+ return literal == ri.literal;
+ }
+};
+
+class RoseInstrCheckLongLitNocase
+ : public RoseInstrBaseNoTargets<ROSE_INSTR_CHECK_LONG_LIT_NOCASE,
+ ROSE_STRUCT_CHECK_LONG_LIT_NOCASE,
+ RoseInstrCheckLongLitNocase> {
+public:
+ std::string literal;
+
+ RoseInstrCheckLongLitNocase(std::string literal_in)
+ : literal(std::move(literal_in)) {
+ upperString(literal);
+ }
+
+ bool operator==(const RoseInstrCheckLongLitNocase &ri) const {
+ return literal == ri.literal;
+ }
+
+ size_t hash() const override {
+ return hash_all(static_cast<int>(opcode), literal);
+ }
+
+ void write(void *dest, RoseEngineBlob &blob,
+ const OffsetMap &offset_map) const override;
+
+ bool equiv_to(const RoseInstrCheckLongLitNocase &ri, const OffsetMap &,
+ const OffsetMap &) const {
+ return literal == ri.literal;
+ }
+};
+
class RoseInstrEnd
: public RoseInstrBaseTrivial<ROSE_INSTR_END, ROSE_STRUCT_END,
RoseInstrEnd> {
PROGRAM_CASE(MATCHER_EOD) {}
PROGRAM_NEXT_INSTRUCTION
+ PROGRAM_CASE(CHECK_LONG_LIT) {
+ os << " lit_offset " << ri->lit_offset << endl;
+ os << " lit_length " << ri->lit_length << endl;
+ const char *lit = (const char *)t + ri->lit_offset;
+ os << " literal: \""
+ << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
+ PROGRAM_CASE(CHECK_LONG_LIT_NOCASE) {
+ os << " lit_offset " << ri->lit_offset << endl;
+ os << " lit_length " << ri->lit_length << endl;
+ const char *lit = (const char *)t + ri->lit_offset;
+ os << " literal: \""
+ << escapeString(string(lit, ri->lit_length)) << "\"" << endl;
+ }
+ PROGRAM_NEXT_INSTRUCTION
+
default:
os << " UNKNOWN (code " << int{code} << ")" << endl;
os << " <stopping>" << endl;
}
+static
+void dumpLongLiteralTable(const RoseEngine *t, FILE *f) {
+ if (!t->longLitTableOffset) {
+ return;
+ }
+
+ fprintf(f, "\n");
+ fprintf(f, "Long literal table (streaming):\n");
+
+ const auto *ll_table =
+ (const struct RoseLongLitTable *)loadFromByteCodeOffset(
+ t, t->longLitTableOffset);
+
+ u32 num_caseful = ll_table->boundaryCase;
+ u32 num_caseless = ll_table->boundaryNocase - num_caseful;
+
+ fprintf(f, " longest len: %u\n", ll_table->maxLen);
+ fprintf(f, " counts: %u caseful, %u caseless\n", num_caseful,
+ num_caseless);
+ fprintf(f, " hash bits: %u caseful, %u caseless\n",
+ ll_table->hashNBitsCase, ll_table->hashNBitsNocase);
+ fprintf(f, " state bits: %u caseful, %u caseless\n",
+ ll_table->streamStateBitsCase, ll_table->streamStateBitsNocase);
+ fprintf(f, " stream state: %u bytes\n", ll_table->streamStateBytes);
+}
+
// Externally accessible functions
void roseDumpText(const RoseEngine *t, FILE *f) {
fprintf(f, " - history buffer : %u bytes\n", t->historyRequired);
fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8);
fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize);
- fprintf(f, " - floating matcher : %u bytes\n", t->floatingStreamState);
+ fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState);
fprintf(f, " - active array : %u bytes\n",
mmbit_size(t->activeArrayCount));
fprintf(f, " - active rose : %u bytes\n",
fprintf(f, "\nSmall-block literal matcher stats:\n\n");
hwlmPrintStats(sbtable, f);
}
+
+ dumpLongLiteralTable(t, f);
}
#define DUMP_U8(o, member) \
DUMP_U32(t, ematcherOffset);
DUMP_U32(t, fmatcherOffset);
DUMP_U32(t, sbmatcherOffset);
+ DUMP_U32(t, longLitTableOffset);
DUMP_U32(t, amatcherMinWidth);
DUMP_U32(t, fmatcherMinWidth);
DUMP_U32(t, eodmatcherMinWidth);
DUMP_U32(t, stateOffsets.anchorState);
DUMP_U32(t, stateOffsets.groups);
DUMP_U32(t, stateOffsets.groups_size);
- DUMP_U32(t, stateOffsets.floatingMatcherState);
+ DUMP_U32(t, stateOffsets.longLitState);
DUMP_U32(t, stateOffsets.somLocation);
DUMP_U32(t, stateOffsets.somValid);
DUMP_U32(t, stateOffsets.somWritable);
DUMP_U32(t, ematcherRegionSize);
DUMP_U32(t, somRevCount);
DUMP_U32(t, somRevOffsetOffset);
- DUMP_U32(t, floatingStreamState);
+ DUMP_U32(t, longLitStreamState);
fprintf(f, "}\n");
fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
}
/** Size of packed Rose groups value, in bytes. */
u32 groups_size;
- /** State for floating literal matcher (managed by HWLM). */
- u32 floatingMatcherState;
+ /** State for long literal support. */
+ u32 longLitState;
/** Packed SOM location slots. */
u32 somLocation;
u32 ematcherOffset; // offset of the eod-anchored literal matcher (bytes)
u32 fmatcherOffset; // offset of the floating literal matcher (bytes)
u32 sbmatcherOffset; // offset of the small-block literal matcher (bytes)
+ u32 longLitTableOffset; // offset of the long literal table
u32 amatcherMinWidth; /**< minimum number of bytes required for a pattern
* involved with the anchored table to produce a full
* match. */
u32 ematcherRegionSize; /* max region size to pass to ematcher */
u32 somRevCount; /**< number of som reverse nfas */
u32 somRevOffsetOffset; /**< offset to array of offsets to som rev nfas */
- u32 floatingStreamState; // size in bytes
+ u32 longLitStreamState; // size in bytes
struct scatter_full_plan state_init;
};
u32 anchoredMinDistance; /* start of region to run anchored table over */
};
+/**
+ * \brief Long literal table header.
+ */
+struct RoseLongLitTable {
+ /** \brief String ID one beyond the maximum entry for caseful literals. */
+ u32 boundaryCase;
+
+ /**
+ * \brief String ID one beyond the maximum entry for caseless literals.
+ * This is also the total size of the literal table.
+ */
+ u32 boundaryNocase;
+
+ /**
+ * \brief Offset of the caseful hash table (relative to RoseLongLitTable
+ * base).
+ *
+ * Offset is zero if no such table exists.
+ */
+ u32 hashOffsetCase;
+
+ /**
+ * \brief Offset of the caseless hash table (relative to RoseLongLitTable
+ * base).
+ *
+ * Offset is zero if no such table exists.
+ */
+ u32 hashOffsetNocase;
+
+ /** \brief lg2 of the size of the caseful hash table. */
+ u32 hashNBitsCase;
+
+ /** \brief lg2 of the size of the caseless hash table. */
+ u32 hashNBitsNocase;
+
+ /**
+ * \brief Number of bits of packed stream state for the caseful hash table.
+ */
+ u8 streamStateBitsCase;
+
+ /**
+ * \brief Number of bits of packed stream state for the caseless hash
+ * table.
+ */
+ u8 streamStateBitsNocase;
+
+ /** \brief Total size of packed stream state in bytes. */
+ u8 streamStateBytes;
+
+ /** \brief Max length of literal prefixes. */
+ u8 maxLen;
+};
+
+/**
+ * \brief One of these structures per literal entry in our long literal table.
+ */
+struct RoseLongLiteral {
+ /**
+ * \brief Offset of the literal string itself, relative to
+ * RoseLongLitTable base.
+ */
+ u32 offset;
+};
+
+/** \brief "No further links" value used for \ref RoseLongLitHashEntry::link. */
+#define LINK_INVALID 0xffffffff
+
+/**
+ * \brief One of these structures per hash table entry in our long literal
+ * table.
+ */
+struct RoseLongLitHashEntry {
+ /**
+ * \brief Bitfield used as a quick guard for hash buckets.
+ *
+ * For a given hash value N, the low six bits of N are taken and the
+ * corresponding bit is switched on in this bitfield if this bucket is used
+ * for that hash.
+ */
+ u64a bitfield;
+
+ /** \brief Offset in the literal table for this string. */
+ u32 state;
+
+ /** \brief Hash table index of next entry in the chain for this bucket. */
+ u32 link;
+};
+
static really_inline
const struct anchored_matcher_info *getALiteralMatcher(
const struct RoseEngine *t) {
/** \brief Run the EOD-anchored HWLM literal matcher. */
ROSE_INSTR_MATCHER_EOD,
- LAST_ROSE_INSTRUCTION = ROSE_INSTR_MATCHER_EOD //!< Sentinel.
+ /**
+ * \brief Confirm a case-sensitive literal at the current offset. In
+ * streaming mode, this makes use of the long literal table.
+ */
+ ROSE_INSTR_CHECK_LONG_LIT,
+
+ /**
+ * \brief Confirm a case-insensitive literal at the current offset. In
+ * streaming mode, this makes use of the long literal table.
+ */
+ ROSE_INSTR_CHECK_LONG_LIT_NOCASE,
+
+ LAST_ROSE_INSTRUCTION = ROSE_INSTR_CHECK_LONG_LIT_NOCASE //!< Sentinel.
};
struct ROSE_STRUCT_END {
u8 code; //!< From enum RoseInstructionCode.
};
+/** Note: check failure will halt program. */
+struct ROSE_STRUCT_CHECK_LONG_LIT {
+ u8 code; //!< From enum RoseInstructionCode.
+ u32 lit_offset; //!< Offset of literal string.
+ u32 lit_length; //!< Length of literal string.
+};
+
+/** Note: check failure will halt program. */
+struct ROSE_STRUCT_CHECK_LONG_LIT_NOCASE {
+ u8 code; //!< From enum RoseInstructionCode.
+ u32 lit_offset; //!< Offset of literal string.
+ u32 lit_length; //!< Length of literal string.
+};
+
#endif // ROSE_ROSE_PROGRAM_H
}
static really_inline
-u8 *getFloatingMatcherState(const struct RoseEngine *t, char *state) {
- return (u8 *)(state + t->stateOffsets.floatingMatcherState);
+u8 *getLongLitState(const struct RoseEngine *t, char *state) {
+ return (u8 *)(state + t->stateOffsets.longLitState);
}
static really_inline
#include "miracle.h"
#include "program_runtime.h"
#include "rose.h"
+#include "rose_internal.h"
+#include "stream_long_lit.h"
#include "hwlm/hwlm.h"
#include "nfa/mcclellan.h"
#include "nfa/nfa_api.h"
roseFlushLastByteHistory(t, scratch, offset + length);
tctxt->lastEndOffset = offset + length;
storeGroups(t, state, tctxt->groups);
+ storeLongLiteralState(t, state, scratch);
}
static really_inline
}
size_t hlength = scratch->core_info.hlen;
+ char rebuild = 0;
+
+ if (hlength) {
+ // Can only have long literal state or rebuild if this is not the
+ // first write to this stream.
+ loadLongLiteralState(t, state, scratch);
+ rebuild = (scratch->core_info.status & STATUS_DELAY_DIRTY) &&
+ (t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
+ offset < t->maxFloatingDelayedMatch);
+ }
- char rebuild = hlength &&
- (scratch->core_info.status & STATUS_DELAY_DIRTY) &&
- (t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
- offset < t->maxFloatingDelayedMatch);
DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n",
rebuild, scratch->core_info.status,
t->maxFloatingDelayedMatch, offset);
}
DEBUG_PRINTF("start=%zu\n", start);
- u8 *stream_state;
- if (t->floatingStreamState) {
- stream_state = getFloatingMatcherState(t, state);
- } else {
- stream_state = NULL;
- }
-
DEBUG_PRINTF("BEGIN FLOATING (over %zu/%zu)\n", flen, length);
hwlmExecStreaming(ftable, scratch, flen, start, roseFloatingCallback,
- scratch, tctxt->groups & t->floating_group_mask,
- stream_state);
+ scratch, tctxt->groups & t->floating_group_mask);
}
flush_delay_and_exit:
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef STREAM_LONG_LIT_H
+#define STREAM_LONG_LIT_H
+
+#include "rose.h"
+#include "rose_common.h"
+#include "rose_internal.h"
+#include "stream_long_lit_hash.h"
+#include "util/copybytes.h"
+
+static really_inline
+const struct RoseLongLiteral *
+getLitTab(const struct RoseLongLitTable *ll_table) {
+ return (const struct RoseLongLiteral *)((const char *)ll_table +
+ ROUNDUP_16(sizeof(struct RoseLongLitTable)));
+}
+
+static really_inline
+u32 get_start_lit_idx(const struct RoseLongLitTable *ll_table,
+ const char nocase) {
+ return nocase ? ll_table->boundaryCase : 0;
+}
+
+static really_inline
+u32 get_end_lit_idx(const struct RoseLongLitTable *ll_table,
+ const char nocase) {
+ return nocase ? ll_table->boundaryNocase : ll_table->boundaryCase;
+}
+
+// search for the literal index that contains the current state
+static rose_inline
+u32 findLitTabEntry(const struct RoseLongLitTable *ll_table,
+ u32 stateValue, const char nocase) {
+ const struct RoseLongLiteral *litTab = getLitTab(ll_table);
+ u32 lo = get_start_lit_idx(ll_table, nocase);
+ u32 hi = get_end_lit_idx(ll_table, nocase);
+
+ // Now move stateValue back by one so that we're looking for the
+ // litTab entry that includes it the string, not the one 'one past' it
+ stateValue -= 1;
+ assert(lo != hi);
+ assert(litTab[lo].offset <= stateValue);
+ assert(litTab[hi].offset > stateValue);
+
+ // binary search to find the entry e such that:
+ // litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
+ while (lo + 1 < hi) {
+ u32 mid = (lo + hi) / 2;
+ if (litTab[mid].offset <= stateValue) {
+ lo = mid;
+ } else { // (litTab[mid].offset > stateValue) {
+ hi = mid;
+ }
+ }
+ assert(litTab[lo].offset <= stateValue);
+ assert(litTab[hi].offset > stateValue);
+ return lo;
+}
+
+// Reads from stream state and unpacks values into stream state table.
+static really_inline
+void loadLongLitStreamState(const struct RoseLongLitTable *ll_table,
+ const u8 *ll_state, u32 *state_case,
+ u32 *state_nocase) {
+ assert(ll_table);
+ assert(ll_state);
+ assert(state_case && state_nocase);
+
+ u8 ss_bytes = ll_table->streamStateBytes;
+ u8 ssb = ll_table->streamStateBitsCase;
+ UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase;
+ assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
+
+#if defined(ARCH_32_BIT)
+ // On 32-bit hosts, we may be able to avoid having to do any u64a
+ // manipulation at all.
+ if (ss_bytes <= 4) {
+ u32 ssb_mask = (1U << ssb) - 1;
+ u32 streamVal = partial_load_u32(ll_state, ss_bytes);
+ *state_case = (u32)(streamVal & ssb_mask);
+ *state_nocase = (u32)(streamVal >> ssb);
+ return;
+ }
+#endif
+
+ u64a ssb_mask = (1ULL << ssb) - 1;
+ u64a streamVal = partial_load_u64a(ll_state, ss_bytes);
+ *state_case = (u32)(streamVal & ssb_mask);
+ *state_nocase = (u32)(streamVal >> ssb);
+}
+
+static really_inline
+u32 getBaseOffsetOfLits(const struct RoseLongLitTable *ll_table,
+ const char nocase) {
+ u32 lit_idx = get_start_lit_idx(ll_table, nocase);
+ return getLitTab(ll_table)[lit_idx].offset;
+}
+
+static really_inline
+u32 unpackStateVal(const struct RoseLongLitTable *ll_table, const char nocase,
+ u32 v) {
+ return v + getBaseOffsetOfLits(ll_table, nocase) - 1;
+}
+
+static really_inline
+u32 packStateVal(const struct RoseLongLitTable *ll_table, const char nocase,
+ u32 v) {
+ return v - getBaseOffsetOfLits(ll_table, nocase) + 1;
+}
+
+static rose_inline
+void loadLongLiteralStateMode(struct hs_scratch *scratch,
+ const struct RoseLongLitTable *ll_table,
+ const struct RoseLongLiteral *litTab,
+ const u32 state, const char nocase) {
+ if (!state) {
+ DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful");
+ return;
+ }
+
+ u32 stateValue = unpackStateVal(ll_table, nocase, state);
+ u32 idx = findLitTabEntry(ll_table, stateValue, nocase);
+ size_t found_offset = litTab[idx].offset;
+ const u8 *found_buf = found_offset + (const u8 *)ll_table;
+ size_t found_sz = stateValue - found_offset;
+
+ struct RoseContext *tctxt = &scratch->tctxt;
+ if (nocase) {
+ tctxt->ll_buf_nocase = found_buf;
+ tctxt->ll_len_nocase = found_sz;
+ } else {
+ tctxt->ll_buf = found_buf;
+ tctxt->ll_len = found_sz;
+ }
+}
+
+static rose_inline
+void loadLongLiteralState(const struct RoseEngine *t, char *state,
+ struct hs_scratch *scratch) {
+ if (!t->longLitTableOffset) {
+ return;
+ }
+
+ scratch->tctxt.ll_buf = scratch->core_info.hbuf;
+ scratch->tctxt.ll_len = scratch->core_info.hlen;
+ scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf;
+ scratch->tctxt.ll_len_nocase = scratch->core_info.hlen;
+
+ const struct RoseLongLitTable *ll_table =
+ getByOffset(t, t->longLitTableOffset);
+ const struct RoseLongLiteral *litTab = getLitTab(ll_table);
+ const u8 *ll_state = getLongLitState(t, state);
+
+ u32 state_case;
+ u32 state_nocase;
+ loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase);
+
+ loadLongLiteralStateMode(scratch, ll_table, litTab, state_case, 0);
+ loadLongLiteralStateMode(scratch, ll_table, litTab, state_nocase, 1);
+}
+
+static rose_inline
+char confirmLongLiteral(const struct RoseLongLitTable *ll_table,
+ const hs_scratch_t *scratch, u32 hashState,
+ const char nocase) {
+ const struct RoseLongLiteral *litTab = getLitTab(ll_table);
+ u32 idx = findLitTabEntry(ll_table, hashState, nocase);
+ size_t found_offset = litTab[idx].offset;
+ const u8 *s = found_offset + (const u8 *)ll_table;
+ assert(hashState > found_offset);
+ size_t len = hashState - found_offset;
+ const u8 *buf = scratch->core_info.buf;
+ const size_t buf_len = scratch->core_info.len;
+
+ if (len > buf_len) {
+ const struct RoseContext *tctxt = &scratch->tctxt;
+ const u8 *hist = nocase ? tctxt->ll_buf_nocase : tctxt->ll_buf;
+ size_t hist_len = nocase ? tctxt->ll_len_nocase : tctxt->ll_len;
+
+ if (len > buf_len + hist_len) {
+ return 0; // Break out - not enough total history
+ }
+
+ size_t overhang = len - buf_len;
+ assert(overhang <= hist_len);
+
+ if (cmpForward(hist + hist_len - overhang, s, overhang, nocase)) {
+ return 0;
+ }
+ s += overhang;
+ len -= overhang;
+ }
+
+ // if we got here, we don't need history or we compared ok out of history
+ assert(len <= buf_len);
+
+ if (cmpForward(buf + buf_len - len, s, len, nocase)) {
+ return 0;
+ }
+
+ DEBUG_PRINTF("confirmed hashState=%u\n", hashState);
+ return 1;
+}
+
+static rose_inline
+void calcStreamingHash(const struct core_info *ci,
+ const struct RoseLongLitTable *ll_table, u8 hash_len,
+ u32 *hash_case, u32 *hash_nocase) {
+ assert(hash_len >= LONG_LIT_HASH_LEN);
+
+ // Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from
+ // location (end of buffer - hash_len). If this block can be satisfied
+ // entirely from either the current buffer or the history buffer, we pass
+ // in the pointer directly; otherwise we must make a copy.
+
+ u8 tempbuf[LONG_LIT_HASH_LEN];
+ const u8 *base;
+
+ if (hash_len > ci->len) {
+ size_t overhang = hash_len - ci->len;
+ if (overhang >= LONG_LIT_HASH_LEN) {
+ // Can read enough to hash from inside the history buffer.
+ assert(overhang <= ci->hlen);
+ base = ci->hbuf + ci->hlen - overhang;
+ } else {
+ // Copy: first chunk from history buffer.
+ assert(overhang <= ci->hlen);
+ copy_upto_32_bytes(tempbuf, ci->hbuf + ci->hlen - overhang,
+ overhang);
+ // Copy: second chunk from current buffer.
+ size_t copy_buf_len = LONG_LIT_HASH_LEN - overhang;
+ assert(copy_buf_len <= ci->len);
+ copy_upto_32_bytes(tempbuf + overhang, ci->buf, copy_buf_len);
+ // Read from our temporary buffer for the hash.
+ base = tempbuf;
+ }
+ } else {
+ // Can read enough to hash from inside the current buffer.
+ base = ci->buf + ci->len - hash_len;
+ }
+
+ if (ll_table->hashNBitsCase) {
+ *hash_case = hashLongLiteral(base, LONG_LIT_HASH_LEN, 0);
+ DEBUG_PRINTF("caseful hash %u\n", *hash_case);
+ }
+ if (ll_table->hashNBitsNocase) {
+ *hash_nocase = hashLongLiteral(base, LONG_LIT_HASH_LEN, 1);
+ DEBUG_PRINTF("caseless hash %u\n", *hash_nocase);
+ }
+}
+
+static really_inline
+const struct RoseLongLitHashEntry *
+getHashTableBase(const struct RoseLongLitTable *ll_table, const char nocase) {
+ const u32 hashOffset = nocase ? ll_table->hashOffsetNocase
+ : ll_table->hashOffsetCase;
+ return (const struct RoseLongLitHashEntry *)((const char *)ll_table +
+ hashOffset);
+}
+
+static rose_inline
+const struct RoseLongLitHashEntry *
+getLongLitHashEnt(const struct RoseLongLitTable *ll_table, u32 h,
+ const char nocase) {
+ u32 nbits = nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase;
+ if (!nbits) {
+ return NULL;
+ }
+
+ u32 h_ent = h & ((1 << nbits) - 1);
+ u32 h_low = (h >> nbits) & 63;
+
+ const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase);
+ const struct RoseLongLitHashEntry *ent = tab + h_ent;
+
+ if (!((ent->bitfield >> h_low) & 0x1)) {
+ return NULL;
+ }
+
+ return ent;
+}
+
+static rose_inline
+u32 storeLongLiteralStateMode(const struct hs_scratch *scratch,
+ const struct RoseLongLitTable *ll_table,
+ const struct RoseLongLitHashEntry *ent,
+ const char nocase) {
+ assert(ent);
+ assert(nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase);
+
+ const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase);
+
+ u32 packed_state = 0;
+ while (1) {
+ if (confirmLongLiteral(ll_table, scratch, ent->state, nocase)) {
+ packed_state = packStateVal(ll_table, nocase, ent->state);
+ DEBUG_PRINTF("set %s state to %u\n", nocase ? "nocase" : "case",
+ packed_state);
+ break;
+ }
+ if (ent->link == LINK_INVALID) {
+ break;
+ }
+ ent = tab + ent->link;
+ }
+ return packed_state;
+}
+
+#ifndef NDEBUG
+// Defensive checking (used in assert) that these table values don't overflow
+// the range available.
+static really_inline
+char streamingTableOverflow(u32 state_case, u32 state_nocase, u8 ssb,
+ u8 ssb_nc) {
+ u32 ssb_mask = (1ULL << (ssb)) - 1;
+ if (state_case & ~ssb_mask) {
+ return 1;
+ }
+ u32 ssb_nc_mask = (1ULL << (ssb_nc)) - 1;
+ if (state_nocase & ~ssb_nc_mask) {
+ return 1;
+ }
+ return 0;
+}
+#endif
+
+// Reads from stream state table and packs values into stream state.
+static rose_inline
+void storeLongLitStreamState(const struct RoseLongLitTable *ll_table,
+ u8 *ll_state, u32 state_case, u32 state_nocase) {
+ assert(ll_table);
+ assert(ll_state);
+
+ u8 ss_bytes = ll_table->streamStateBytes;
+ u8 ssb = ll_table->streamStateBitsCase;
+ UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase;
+ assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8);
+ assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc));
+
+#if defined(ARCH_32_BIT)
+ // On 32-bit hosts, we may be able to avoid having to do any u64a
+ // manipulation at all.
+ if (ss_bytes <= 4) {
+ u32 stagingStreamState = state_case;
+ stagingStreamState |= (state_nocase << ssb);
+ partial_store_u32(ll_state, stagingStreamState, ss_bytes);
+ return;
+ }
+#endif
+
+ u64a stagingStreamState = (u64a)state_case;
+ stagingStreamState |= (u64a)state_nocase << ssb;
+ partial_store_u64a(ll_state, stagingStreamState, ss_bytes);
+}
+
+static rose_inline
+void storeLongLiteralState(const struct RoseEngine *t, char *state,
+ struct hs_scratch *scratch) {
+ if (!t->longLitTableOffset) {
+ DEBUG_PRINTF("no table\n");
+ return;
+ }
+
+ struct core_info *ci = &scratch->core_info;
+ const struct RoseLongLitTable *ll_table =
+ getByOffset(t, t->longLitTableOffset);
+ assert(ll_table->maxLen);
+
+ DEBUG_PRINTF("maxLen=%u, len=%zu, hlen=%zu\n", ll_table->maxLen, ci->len,
+ ci->hlen);
+
+ u32 state_case = 0;
+ u32 state_nocase = 0;
+
+ // If we don't have enough history, we don't need to do anything.
+ if (ll_table->maxLen <= ci->len + ci->hlen) {
+ u32 hash_case = 0;
+ u32 hash_nocase = 0;
+
+ calcStreamingHash(ci, ll_table, ll_table->maxLen, &hash_case,
+ &hash_nocase);
+
+ const struct RoseLongLitHashEntry *ent_case =
+ getLongLitHashEnt(ll_table, hash_case, 0);
+ const struct RoseLongLitHashEntry *ent_nocase =
+ getLongLitHashEnt(ll_table, hash_nocase, 1);
+
+ DEBUG_PRINTF("ent_caseful=%p, ent_caseless=%p\n", ent_case, ent_nocase);
+
+ if (ent_case) {
+ state_case = storeLongLiteralStateMode(scratch, ll_table,
+ ent_case, 0);
+ }
+
+ if (ent_nocase) {
+ state_nocase = storeLongLiteralStateMode(scratch, ll_table,
+ ent_nocase, 1);
+ }
+ }
+
+ DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase);
+
+ u8 *ll_state = getLongLitState(t, state);
+ storeLongLitStreamState(ll_table, ll_state, state_case, state_nocase);
+}
+
+#endif // STREAM_LONG_LIT_H
--- /dev/null
+/*
+ * Copyright (c) 2016, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef STREAM_LONG_LIT_HASH_H
+#define STREAM_LONG_LIT_HASH_H
+
+#include "ue2common.h"
+#include "util/unaligned.h"
+
+/** \brief Length of the buffer operated on by \ref hashLongLiteral(). */
+#define LONG_LIT_HASH_LEN 24
+
+/** \brief Hash function used for long literal table in streaming mode. */
+static really_inline
+u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) {
+ const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
+ const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
+
+ // We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this
+ // hash are for strings longer than this.
+ assert(len >= 24);
+
+ u64a v1 = unaligned_load_u64a(ptr);
+ u64a v2 = unaligned_load_u64a(ptr + 8);
+ u64a v3 = unaligned_load_u64a(ptr + 16);
+ if (nocase) {
+ v1 &= CASEMASK;
+ v2 &= CASEMASK;
+ v3 &= CASEMASK;
+ }
+ v1 *= MULTIPLIER;
+ v2 *= MULTIPLIER * MULTIPLIER;
+ v3 *= MULTIPLIER * MULTIPLIER * MULTIPLIER;
+ v1 >>= 32;
+ v2 >>= 32;
+ v3 >>= 32;
+ return v1 ^ v2 ^ v3;
+}
+
+#endif // STREAM_LONG_LIT_HASH_H
assert(scratch);
assert(!can_stop_matching(scratch));
- char *state = getMultiState(stream_state);
-
const struct RoseEngine *rose = stream_state->rose;
const struct HWLM *ftable = getFLiteralMatcher(rose);
size_t len2 = scratch->core_info.len;
- u8 *hwlm_stream_state;
- if (rose->floatingStreamState) {
- hwlm_stream_state = getFloatingMatcherState(rose, state);
- } else {
- hwlm_stream_state = NULL;
- }
-
DEBUG_PRINTF("::: streaming rose ::: offset = %llu len = %zu\n",
stream_state->offset, scratch->core_info.len);
// start the match region at zero.
const size_t start = 0;
- hwlmExecStreaming(ftable, scratch, len2, start, roseCallback,
- scratch, rose->initialGroups, hwlm_stream_state);
+ hwlmExecStreaming(ftable, scratch, len2, start, roseCallback, scratch,
+ rose->initialGroups);
if (!told_to_stop_matching(scratch) &&
isAllExhausted(rose, scratch->core_info.exhaustionVector)) {
u32 filledDelayedSlots;
u32 curr_qi; /**< currently executing main queue index during
* \ref nfaQueueExec */
+
+ /**
+ * \brief Buffer for caseful long literal support, used in streaming mode
+ * only.
+ *
+ * If a long literal prefix was at the end of the buffer at the end of a
+ * stream write, then the long lit table hashes it and stores the result in
+ * stream state. At the start of the next write, this value is used to set
+ * this buffer to the matching prefix string (stored in the bytecode.
+ */
+ const u8 *ll_buf;
+
+ /** \brief Length in bytes of the string pointed to by ll_buf. */
+ size_t ll_len;
+
+ /** \brief Caseless version of ll_buf. */
+ const u8 *ll_buf_nocase;
+
+ /** \brief Length in bytes of the string pointed to by ll_buf_nocase. */
+ size_t ll_len_nocase;
};
struct match_deduper {
/// Compares two strings, returns non-zero if they're different.
u32 cmp(const char *a, const char *b, size_t len, bool nocase);
+/**
+ * \brief String type that also records whether the whole string is caseful or
+ * caseless.
+ *
+ * You should use \ref ue2_literal if you need to represent a mixed-case
+ * literal.
+ */
+struct ue2_case_string {
+ ue2_case_string(std::string s_in, bool nocase_in)
+ : s(std::move(s_in)), nocase(nocase_in) {
+ if (nocase) {
+ upperString(s);
+ }
+ }
+
+ bool operator==(const ue2_case_string &other) const {
+ return s == other.s && nocase == other.nocase;
+ }
+
+ std::string s;
+ bool nocase;
+};
+
struct ue2_literal {
public:
/// Single element proxy, pointed to by our const_iterator.
static
hwlm_error_t safeExecStreaming(const FDR *fdr, const u8 *hbuf, size_t hlen,
const u8 *buf, size_t len, size_t start,
- HWLMCallback cb, void *ctxt, hwlm_group_t groups,
- u8 *stream_state) {
+ HWLMCallback cb, void *ctxt,
+ hwlm_group_t groups) {
array<u8, 16> wrapped_history = {{'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}};
if (hlen < 16) {
memcpy(new_hbuf, hbuf, hlen);
hbuf = new_hbuf;
}
- return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, ctxt, groups,
- stream_state);
+ return fdrExecStreaming(fdr, hbuf, hlen, buf, len, start, cb, ctxt, groups);
}
TEST_P(FDRp, SmallStreaming) {
expected.push_back(match(2, 2, 1));
safeExecStreaming(fdr.get(), (const u8 *)"", 0, (const u8 *)"aaar", 4, 0,
- decentCallback, &matches, HWLM_ALL_GROUPS, nullptr);
+ decentCallback, &matches, HWLM_ALL_GROUPS);
for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) {
EXPECT_EQ(expected[i], matches[i]);
}
expected.push_back(match(1, 8, 10));
safeExecStreaming(fdr.get(), (const u8 *)"aaar", 4, (const u8 *)"dvark", 5,
- 0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr);
+ 0, decentCallback, &matches, HWLM_ALL_GROUPS);
for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) {
EXPECT_EQ(expected[i], matches[i] + 4);
safeExecStreaming(fdr.get(), (const u8 *)"foobar", 6,
(const u8 *)"aardvarkkk", 10, 0, decentCallback, &matches,
- HWLM_ALL_GROUPS, nullptr);
+ HWLM_ALL_GROUPS);
for (u32 i = 0; i < MIN(expected.size(), matches.size()); i++) {
EXPECT_EQ(expected[i], matches[i] + 6);
EXPECT_EQ(0U, count);
}
-TEST_P(FDRp, VeryLongLiteral) {
- const u32 hint = GetParam();
- SCOPED_TRACE(hint);
- vector<hwlmLiteral> lits;
-
- string s1000;
- for(int i = 0; i < 1000; i++) {
- s1000 += char('A' + i % 10);
- }
-
- string s66k;
- for(int i = 0; i < 66; i++) {
- s66k += s1000;
- }
-
- string corpus = s66k + s66k;
- lits.push_back(hwlmLiteral(s66k.c_str(), 0, 10));
-
- auto fdr = fdrBuildTableHinted(lits, false, hint, get_current_target(), Grey());
- CHECK_WITH_TEDDY_OK_TO_FAIL(fdr, hint);
-
- vector<match> matches;
- u32 rv = fdrExec(fdr.get(), (const u8 *)s66k.c_str(), s66k.size(), 0,
- decentCallback, &matches, HWLM_ALL_GROUPS);
- EXPECT_EQ(0U, rv);
- ASSERT_EQ(1U, matches.size());
- ASSERT_EQ(match(0, 65999, 10), matches[0]);
-
- matches.clear();
- rv = fdrExec(fdr.get(), (const u8 *)corpus.c_str(), corpus.size(), 0,
- decentCallback, &matches, HWLM_ALL_GROUPS);
- EXPECT_EQ(0U, rv);
- for (u32 i = 0; i < matches.size(); i++) {
- ASSERT_EQ(match(10 * i, 65999 + 10 * i, 10), matches[i]);
- }
- EXPECT_EQ(6601U, matches.size());
-}
-
TEST_P(FDRp, moveByteStream) {
const u32 hint = GetParam();
SCOPED_TRACE(hint);
// check matches
vector<match> matches;
- fdrStatus = safeExecStreaming(
- fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2,
- 0, decentCallback, &matches, HWLM_ALL_GROUPS, nullptr);
+ fdrStatus = safeExecStreaming(fdr.get(), (const u8 *)data1, data_len1,
+ (const u8 *)data2, data_len2, 0,
+ decentCallback, &matches, HWLM_ALL_GROUPS);
ASSERT_EQ(0, fdrStatus);
ASSERT_EQ(4U, matches.size());
// check matches
vector<match> matches;
- fdrStatus = safeExecStreaming(
- fdr.get(), (const u8 *)data1, data_len1, (const u8 *)data2, data_len2,
- 0, decentCallbackT, &matches, HWLM_ALL_GROUPS, nullptr);
+ fdrStatus = safeExecStreaming(fdr.get(), (const u8 *)data1, data_len1,
+ (const u8 *)data2, data_len2, 0,
+ decentCallbackT, &matches, HWLM_ALL_GROUPS);
ASSERT_EQ(HWLM_TERMINATED, fdrStatus);
ASSERT_EQ(1U, matches.size());
ASSERT_EQ(1U, matches.size());
}
-
-TEST(FDR, ManyLengths) {
- // UE-2400: we had a crash due to div by zero in the compiler when given a
- // set of literals with precisely 512 different lengths.
- const u32 num = 512;
- vector<hwlmLiteral> lits;
- char c = 0;
- string s;
- for (u32 i = 0; i < num; i++) {
- s.push_back(c++);
- lits.push_back(hwlmLiteral(s, false, i + 1));
- }
-
- auto fdr = fdrBuildTable(lits, false, get_current_target(), Grey());
- ASSERT_TRUE(fdr != nullptr);
-
- // Confirm that we can scan against this FDR table as well.
-
- vector<match> matches;
-
- hwlm_error_t fdrStatus =
- fdrExec(fdr.get(), (const u8 *)s.c_str(), s.size(), 0, decentCallback,
- &matches, HWLM_ALL_GROUPS);
- ASSERT_EQ(HWLM_SUCCESS, fdrStatus);
-
- ASSERT_EQ(768U, matches.size());
-}
const u8 *fhist = fake_history.data() + fake_history_size;
fdrStatus = fdrExecStreaming(fdr.get(), fhist, 0, d, streamChunk, 0,
countCallback, &matchesCounts,
- HWLM_ALL_GROUPS, nullptr);
+ HWLM_ALL_GROUPS);
ASSERT_EQ(0, fdrStatus);
for (u32 j = streamChunk; j < dataSize; j += streamChunk) {
if (j < 16) {
fdrStatus = fdrExecStreaming(fdr.get(), tmp_d, j, tmp_d + j,
streamChunk, 0, countCallback,
&matchesCounts,
- HWLM_ALL_GROUPS, nullptr);
+ HWLM_ALL_GROUPS);
} else {
fdrStatus = fdrExecStreaming(fdr.get(), d + j - 8, 8, d + j,
streamChunk, 0, countCallback,
&matchesCounts,
- HWLM_ALL_GROUPS, nullptr);
+ HWLM_ALL_GROUPS);
}
ASSERT_EQ(0, fdrStatus);
}