if (lit.table != ROSE_FLOATING) {
return;
}
+ assert(bc.longLitLengthThreshold > 0);
if (lit.s.length() <= bc.longLitLengthThreshold) {
return;
}
* ids and squash the same roles and have the same group squashing
* behaviour. Benefits literals cannot be merged. */
+ assert(longLitLengthThreshold > 0);
+
for (u32 int_id : lits) {
rose_literal_info &curr_info = literal_info[int_id];
const rose_literal_id &lit = build.literals.right.at(int_id);
#include "util/verify_types.h"
#include "util/compile_context.h"
+#include <algorithm>
+#include <numeric>
+
using namespace std;
namespace ue2 {
-/** \brief Minimum size for a non-empty hash table. */
-static constexpr u32 MIN_HASH_TABLE_SIZE = 4096;
+/** \brief Minimum size for a non-empty hash table. Must be a power of two. */
+static constexpr u32 MIN_HASH_TABLE_SIZE = 128;
+
+/** \brief Maximum load factor (between zero and one) for a hash table. */
+static constexpr double MAX_HASH_TABLE_LOAD = 0.7;
+
+/** \brief Minimum size (in bits) for a bloom filter. Must be a power of two. */
+static constexpr u32 MIN_BLOOM_FILTER_SIZE = 256;
+
+/** \brief Maximum load factor (between zero and one) for a bloom filter. */
+static constexpr double MAX_BLOOM_FILTER_LOAD = 0.25;
struct LongLitModeInfo {
- u32 boundary = 0; //!< One above the largest index for this mode.
- u32 positions = 0; //!< Total number of string positions.
- u32 hashEntries = 0; //!< Number of hash table entries.
+ u32 num_literals = 0; //!< Number of strings for this mode.
+ u32 hashed_positions = 0; //!< Number of hashable string positions.
};
struct LongLitInfo {
LongLitInfo analyzeLongLits(const vector<ue2_case_string> &lits,
size_t max_len) {
LongLitInfo info;
- u32 hashedPositionsCase = 0;
- u32 hashedPositionsNocase = 0;
- // Caseful boundary is the index of the first nocase literal, as we're
- // ordered (caseful, nocase).
- auto first_nocase = find_if(begin(lits), end(lits),
- [](const ue2_case_string &lit) { return lit.nocase; });
- info.caseful.boundary = verify_u32(distance(lits.begin(), first_nocase));
+ for (const auto &lit : lits) {
+ auto &lit_info = lit.nocase ? info.nocase : info.caseful;
+ assert(lit.s.size() > max_len);
+ lit_info.num_literals++;
+ lit_info.hashed_positions += lit.s.size() - max_len;
+ }
- // Nocase boundary is the size of the literal set.
- info.nocase.boundary = verify_u32(lits.size());
+ DEBUG_PRINTF("case: hashed %u positions\n", info.caseful.hashed_positions);
+ DEBUG_PRINTF("nocase: hashed %u positions\n", info.nocase.hashed_positions);
+
+ return info;
+}
+
+static
+void addToBloomFilter(vector<u8> &bloom, const u8 *substr, bool nocase) {
+ const u32 num_keys = verify_u32(bloom.size() * 8);
+ const u32 key_mask = (1U << lg2(num_keys)) -1;
+
+ const auto hash_functions = { bloomHash_1, bloomHash_2, bloomHash_3 };
+ for (const auto &hash_func : hash_functions) {
+ u32 hash = hash_func(substr, nocase);
+ u32 key = hash & key_mask;
+ DEBUG_PRINTF("set key %u (of %zu)\n", key, bloom.size() * 8);
+ bloom[key / 8] |= 1U << (key % 8);
+ }
+}
+
+static
+size_t bloomOccupancy(const vector<u8> &bloom) {
+ return accumulate(begin(bloom), end(bloom), 0,
+ [](const size_t &sum, const u8 &elem) {
+ return sum + popcount32(elem);
+ });
+}
+
+static
+double bloomLoad(const vector<u8> &bloom) {
+ return (double)bloomOccupancy(bloom) / (double)(bloom.size() * 8);
+}
+
+static
+vector<u8> buildBloomFilter(const vector<ue2_case_string> &lits, size_t max_len,
+ size_t num_entries, bool nocase) {
+ assert(num_entries % 8 == 0);
+ assert((num_entries & (num_entries - 1)) == 0); // Must be power of two.
+
+ vector<u8> bloom(num_entries / 8, 0);
+
+ if (!num_entries) {
+ return bloom;
+ }
for (const auto &lit : lits) {
- if (lit.nocase) {
- hashedPositionsNocase += lit.s.size() - max_len;
- info.nocase.positions += lit.s.size();
- } else {
- hashedPositionsCase += lit.s.size() - max_len;
- info.caseful.positions += lit.s.size();
+ if (nocase != lit.nocase) {
+ continue;
+ }
+ for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) {
+ const u8 *substr = (const u8 *)lit.s.c_str() + offset;
+ addToBloomFilter(bloom, substr, nocase);
}
}
- info.caseful.hashEntries = hashedPositionsCase
- ? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsCase))
- : 0;
- info.nocase.hashEntries = hashedPositionsNocase
- ? roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE, hashedPositionsNocase))
- : 0;
-
- DEBUG_PRINTF("caseful: boundary=%u, positions=%u, hashedPositions=%u, "
- "hashEntries=%u\n",
- info.caseful.boundary, info.caseful.positions,
- hashedPositionsCase, info.caseful.hashEntries);
- DEBUG_PRINTF("nocase: boundary=%u, positions=%u, hashedPositions=%u, "
- "hashEntries=%u\n",
- info.nocase.boundary, info.nocase.positions,
- hashedPositionsNocase, info.nocase.hashEntries);
+ DEBUG_PRINTF("%s bloom filter occupancy %zu of %zu entries\n",
+ nocase ? "nocase" : "caseful", bloomOccupancy(bloom),
+ num_entries);
- return info;
+ return bloom;
+}
+
+
+static
+vector<u8> makeBloomFilter(const vector<ue2_case_string> &lits,
+ size_t max_len, bool nocase) {
+ vector<u8> bloom;
+
+ size_t num_entries = MIN_BLOOM_FILTER_SIZE;
+ for (;;) {
+ bloom = buildBloomFilter(lits, max_len, num_entries, nocase);
+ DEBUG_PRINTF("built %s bloom for %zu entries: load %f\n",
+ nocase ? "nocase" : "caseful", num_entries,
+ bloomLoad(bloom));
+ if (bloomLoad(bloom) < MAX_BLOOM_FILTER_LOAD) {
+ break;
+ }
+ num_entries *= 2;
+ }
+ return bloom;
+}
+
+static
+size_t hashTableOccupancy(const vector<RoseLongLitHashEntry> &tab) {
+ return count_if(begin(tab), end(tab), [](const RoseLongLitHashEntry &ent) {
+ return ent.str_offset != 0;
+ });
+}
+
+static
+double hashTableLoad(const vector<RoseLongLitHashEntry> &tab) {
+ return (double)hashTableOccupancy(tab) / (double)(tab.size());
}
static
-void fillHashes(const vector<ue2_case_string> &lits, size_t max_len,
- RoseLongLitHashEntry *tab, size_t numEntries, bool nocase,
- const map<u32, u32> &litToOffsetVal) {
- const u32 nbits = lg2(numEntries);
- map<u32, deque<pair<u32, u32>>> bucketToLitOffPairs;
- map<u32, u64a> bucketToBitfield;
+vector<RoseLongLitHashEntry> buildHashTable(const vector<ue2_case_string> &lits,
+ size_t max_len,
+ const vector<u32> &litToOffsetVal,
+ size_t numEntries, bool nocase) {
+ vector<RoseLongLitHashEntry> tab(numEntries, {0,0});
+
+ if (!numEntries) {
+ return tab;
+ }
+
+ map<u32, vector<pair<u32, u32>>> hashToLitOffPairs;
for (u32 lit_id = 0; lit_id < lits.size(); lit_id++) {
const ue2_case_string &lit = lits[lit_id];
}
for (u32 offset = 1; offset < lit.s.size() - max_len + 1; offset++) {
const u8 *substr = (const u8 *)lit.s.c_str() + offset;
- u32 h = hashLongLiteral(substr, max_len, lit.nocase);
- u32 h_ent = h & ((1U << nbits) - 1);
- u32 h_low = (h >> nbits) & 63;
- bucketToLitOffPairs[h_ent].emplace_back(lit_id, offset);
- bucketToBitfield[h_ent] |= (1ULL << h_low);
+ u32 hash = hashLongLiteral(substr, max_len, lit.nocase);
+ hashToLitOffPairs[hash].emplace_back(lit_id, offset);
}
}
- // this used to be a set<u32>, but a bitset is much much faster given that
- // we're using it only for membership testing.
- boost::dynamic_bitset<> filledBuckets(numEntries); // all zero by default.
-
- // sweep out bitfield entries and save the results swapped accordingly
- // also, anything with bitfield entries is put in filledBuckets
- for (const auto &m : bucketToBitfield) {
- const u32 &bucket = m.first;
- const u64a &contents = m.second;
- tab[bucket].bitfield = contents;
- filledBuckets.set(bucket);
- }
-
- // store out all our chains based on free values in our hash table.
- // find nearest free locations that are empty (there will always be more
- // entries than strings, at present)
- for (auto &m : bucketToLitOffPairs) {
- u32 bucket = m.first;
- deque<pair<u32, u32>> &d = m.second;
+ for (auto &m : hashToLitOffPairs) {
+ u32 hash = m.first;
+ vector<pair<u32, u32>> &d = m.second;
+
+ // Sort by (offset, string) so that we'll be able to remove identical
+ // string prefixes.
+ stable_sort(begin(d), end(d),
+ [&](const pair<u32, u32> &a, const pair<u32, u32> &b) {
+ const auto &str_a = lits[a.first].s;
+ const auto &str_b = lits[b.first].s;
+ return tie(a.second, str_a) < tie(b.second, str_b);
+ });
- // sort d by distance of the residual string (len minus our depth into
- // the string). We need to put the 'furthest back' string first...
- stable_sort(d.begin(), d.end(),
+ // Remove entries that point to the same literal prefix.
+ d.erase(unique(begin(d), end(d),
+ [&](const pair<u32, u32> &a, const pair<u32, u32> &b) {
+ if (a.second != b.second) {
+ return false;
+ }
+ const auto &str_a = lits[a.first].s;
+ const auto &str_b = lits[b.first].s;
+ const size_t len = max_len + a.second;
+ return equal(begin(str_a), begin(str_a) + len,
+ begin(str_b));
+ }),
+ end(d));
+
+ // Sort d by distance of the residual string (len minus our depth into
+ // the string). We need to put the 'furthest back' string first.
+ stable_sort(begin(d), end(d),
[](const pair<u32, u32> &a, const pair<u32, u32> &b) {
if (a.second != b.second) {
return a.second > b.second; /* longest is first */
return a.first < b.first;
});
- while (1) {
- // first time through is always at bucket, then we fill in links
- filledBuckets.set(bucket);
- RoseLongLitHashEntry *ent = &tab[bucket];
- u32 lit_id = d.front().first;
- u32 offset = d.front().second;
-
- ent->state = verify_u32(litToOffsetVal.at(lit_id) +
- offset + max_len);
- ent->link = (u32)LINK_INVALID;
+ u32 bucket = hash % numEntries;
- d.pop_front();
- if (d.empty()) {
- break;
- }
- // now, if there is another value
- // find a bucket for it and put in 'bucket' and repeat
- // all we really need to do is find something not in filledBuckets,
- // ideally something close to bucket
- // we search backward and forward from bucket, trying to stay as
- // close as possible.
- UNUSED bool found = false;
- int bucket_candidate = 0;
- for (u32 k = 1; k < numEntries * 2; k++) {
- bucket_candidate = bucket + (((k & 1) == 0)
- ? (-(int)k / 2) : (k / 2));
- if (bucket_candidate < 0 ||
- (size_t)bucket_candidate >= numEntries) {
- continue;
- }
- if (!filledBuckets.test(bucket_candidate)) {
- found = true;
- break;
+ // Placement via linear probing.
+ for (const auto &lit_offset : d) {
+ while (tab[bucket].str_offset != 0) {
+ bucket++;
+ if (bucket == numEntries) {
+ bucket = 0;
}
}
- assert(found);
- bucket = bucket_candidate;
- ent->link = bucket;
+ u32 lit_id = lit_offset.first;
+ u32 offset = lit_offset.second;
+
+ DEBUG_PRINTF("hash 0x%08x lit_id %u offset %u bucket %u\n", hash,
+ lit_id, offset, bucket);
+
+ auto &entry = tab[bucket];
+ entry.str_offset = verify_u32(litToOffsetVal.at(lit_id));
+ assert(entry.str_offset != 0);
+ entry.str_len = offset + max_len;
+ }
+ }
+
+ DEBUG_PRINTF("%s hash table occupancy %zu of %zu entries\n",
+ nocase ? "nocase" : "caseful", hashTableOccupancy(tab),
+ numEntries);
+
+ return tab;
+}
+
+static
+vector<RoseLongLitHashEntry> makeHashTable(const vector<ue2_case_string> &lits,
+ size_t max_len,
+ const vector<u32> &litToOffsetVal,
+ u32 numPositions, bool nocase) {
+ vector<RoseLongLitHashEntry> tab;
+
+ // Note: for the hash table, we must always have at least enough entries
+ // for the number of hashable positions.
+ size_t num_entries = roundUpToPowerOfTwo(max(MIN_HASH_TABLE_SIZE,
+ numPositions));
+
+ for (;;) {
+ tab = buildHashTable(lits, max_len, litToOffsetVal, num_entries,
+ nocase);
+ DEBUG_PRINTF("built %s hash table for %zu entries: load %f\n",
+ nocase ? "nocase" : "caseful", num_entries,
+ hashTableLoad(tab));
+ if (hashTableLoad(tab) < MAX_HASH_TABLE_LOAD) {
+ break;
}
+ num_entries *= 2;
}
+ return tab;
+}
+
+static
+vector<u8> buildLits(const vector<ue2_case_string> &lits, u32 baseOffset,
+ vector<u32> &litToOffsetVal) {
+ vector<u8> blob;
+ litToOffsetVal.resize(lits.size(), 0);
+
+ u32 lit_id = 0;
+ for (const auto &lit : lits) {
+ u32 offset = baseOffset + verify_u32(blob.size());
+ blob.insert(blob.end(), begin(lit.s), end(lit.s));
+ litToOffsetVal[lit_id] = offset;
+ lit_id++;
+ }
+
+ DEBUG_PRINTF("built %zu bytes of strings\n", blob.size());
+ return blob;
}
u32 buildLongLiteralTable(const RoseBuildImpl &build, RoseEngineBlob &blob,
LongLitInfo info = analyzeLongLits(lits, max_len);
- // first assess the size and find our caseless threshold
- size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable));
-
- size_t litTabOffset = headerSize;
-
- size_t litTabNumEntries = lits.size() + 1;
- size_t litTabSize = ROUNDUP_16(litTabNumEntries * sizeof(RoseLongLiteral));
+ vector<u32> litToOffsetVal;
+ const size_t headerSize = ROUNDUP_16(sizeof(RoseLongLitTable));
+ vector<u8> lit_blob = buildLits(lits, headerSize, litToOffsetVal);
+
+ // Build caseful bloom filter and hash table.
+ vector<u8> bloom_case;
+ vector<RoseLongLitHashEntry> tab_case;
+ if (info.caseful.num_literals) {
+ bloom_case = makeBloomFilter(lits, max_len, false);
+ tab_case = makeHashTable(lits, max_len, litToOffsetVal,
+ info.caseful.hashed_positions, false);
+ }
- size_t wholeLitTabOffset = litTabOffset + litTabSize;
- size_t totalWholeLitTabSize =
- ROUNDUP_16(info.caseful.positions + info.nocase.positions);
+ // Build nocase bloom filter and hash table.
+ vector<u8> bloom_nocase;
+ vector<RoseLongLitHashEntry> tab_nocase;
+ if (info.nocase.num_literals) {
+ bloom_nocase = makeBloomFilter(lits, max_len, true);
+ tab_nocase = makeHashTable(lits, max_len, litToOffsetVal,
+ info.nocase.hashed_positions, true);
+ }
- size_t htOffsetCase = wholeLitTabOffset + totalWholeLitTabSize;
- size_t htSizeCase = info.caseful.hashEntries * sizeof(RoseLongLitHashEntry);
- size_t htOffsetNocase = htOffsetCase + htSizeCase;
- size_t htSizeNocase =
- info.nocase.hashEntries * sizeof(RoseLongLitHashEntry);
+ size_t wholeLitTabSize = ROUNDUP_16(byte_length(lit_blob));
+ size_t htOffsetCase = headerSize + wholeLitTabSize;
+ size_t htOffsetNocase = htOffsetCase + byte_length(tab_case);
+ size_t bloomOffsetCase = htOffsetNocase + byte_length(tab_nocase);
+ size_t bloomOffsetNocase = bloomOffsetCase + byte_length(bloom_case);
- size_t tabSize = ROUNDUP_16(htOffsetNocase + htSizeNocase);
+ size_t tabSize = ROUNDUP_16(bloomOffsetNocase + byte_length(bloom_nocase));
// need to add +2 to both of these to allow space for the actual largest
// value as well as handling the fact that we add one to the space when
// storing out a position to allow zero to mean "no stream state value"
- u8 streamBitsCase = lg2(roundUpToPowerOfTwo(info.caseful.positions + 2));
- u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(info.nocase.positions + 2));
+ u8 streamBitsCase = lg2(roundUpToPowerOfTwo(tab_case.size() + 2));
+ u8 streamBitsNocase = lg2(roundUpToPowerOfTwo(tab_nocase.size() + 2));
u32 tot_state_bytes = ROUNDUP_N(streamBitsCase + streamBitsNocase, 8) / 8;
auto table = aligned_zmalloc_unique<char>(tabSize);
assert(table); // otherwise would have thrown std::bad_alloc
- // then fill it in
- char *ptr = table.get();
- RoseLongLitTable *header = (RoseLongLitTable *)ptr;
- // fill in header
+ // Fill in the RoseLongLitTable header structure.
+ RoseLongLitTable *header = (RoseLongLitTable *)(table.get());
+ header->size = verify_u32(tabSize);
header->maxLen = verify_u8(max_len); // u8 so doesn't matter; won't go > 255
- header->boundaryCase = info.caseful.boundary;
- header->hashOffsetCase = verify_u32(htOffsetCase);
- header->hashNBitsCase = lg2(info.caseful.hashEntries);
- header->streamStateBitsCase = streamBitsCase;
- header->boundaryNocase = info.nocase.boundary;
- header->hashOffsetNocase = verify_u32(htOffsetNocase);
- header->hashNBitsNocase = lg2(info.nocase.hashEntries);
- header->streamStateBitsNocase = streamBitsNocase;
+ header->caseful.hashOffset = verify_u32(htOffsetCase);
+ header->caseful.hashBits = lg2(tab_case.size());
+ header->caseful.streamStateBits = streamBitsCase;
+ header->caseful.bloomOffset = verify_u32(bloomOffsetCase);
+ header->caseful.bloomBits = lg2(bloom_case.size() * 8);
+ header->nocase.hashOffset = verify_u32(htOffsetNocase);
+ header->nocase.hashBits = lg2(tab_nocase.size());
+ header->nocase.streamStateBits = streamBitsNocase;
+ header->nocase.bloomOffset = verify_u32(bloomOffsetNocase);
+ header->nocase.bloomBits = lg2(bloom_nocase.size() * 8);
assert(tot_state_bytes < sizeof(u64a));
header->streamStateBytes = verify_u8(tot_state_bytes); // u8
- ptr += headerSize;
-
- // now fill in the rest
-
- RoseLongLiteral *litTabPtr = (RoseLongLiteral *)ptr;
- ptr += litTabSize;
-
- map<u32, u32> litToOffsetVal;
- for (auto i = lits.begin(), e = lits.end(); i != e; ++i) {
- u32 entry = verify_u32(i - lits.begin());
- u32 offset = verify_u32(ptr - table.get());
-
- // point the table entry to the string location
- litTabPtr[entry].offset = offset;
-
- litToOffsetVal[entry] = offset;
-
- // copy the string into the string location
- const auto &s = i->s;
- memcpy(ptr, s.c_str(), s.size());
-
- ptr += s.size(); // and the string location
- }
-
- // fill in final lit table entry with current ptr (serves as end value)
- litTabPtr[lits.size()].offset = verify_u32(ptr - table.get());
-
- // fill hash tables
- ptr = table.get() + htOffsetCase;
- fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr,
- info.caseful.hashEntries, false, litToOffsetVal);
- ptr += htSizeCase;
- fillHashes(lits, max_len, (RoseLongLitHashEntry *)ptr,
- info.nocase.hashEntries, true, litToOffsetVal);
- ptr += htSizeNocase;
-
- assert(ptr <= table.get() + tabSize);
+ // Copy in the literal strings, hash tables and bloom filters,
+ copy_bytes(table.get() + headerSize, lit_blob);
+ copy_bytes(table.get() + htOffsetCase, tab_case);
+ copy_bytes(table.get() + bloomOffsetCase, bloom_case);
+ copy_bytes(table.get() + htOffsetNocase, tab_nocase);
+ copy_bytes(table.get() + bloomOffsetNocase, bloom_nocase);
DEBUG_PRINTF("built streaming table, size=%zu\n", tabSize);
DEBUG_PRINTF("requires %zu bytes of history\n", max_len);
#include <fstream>
#include <iomanip>
#include <map>
+#include <numeric>
#include <ostream>
-#include <string>
#include <sstream>
+#include <string>
#include <utility>
#ifndef DUMP_SUPPORT
}
+static
+void dumpLongLiteralSubtable(const RoseLongLitTable *ll_table,
+ const RoseLongLitSubtable *ll_sub, FILE *f) {
+ if (!ll_sub->hashBits) {
+ fprintf(f, " <no table>\n");
+ return;
+ }
+
+ const char *base = (const char *)ll_table;
+
+ u32 nbits = ll_sub->hashBits;
+ u32 num_entries = 1U << nbits;
+ const auto *tab = (const RoseLongLitHashEntry *)(base + ll_sub->hashOffset);
+ u32 hash_occ =
+ count_if(tab, tab + num_entries, [](const RoseLongLitHashEntry &ent) {
+ return ent.str_offset != 0;
+ });
+ float hash_occ_percent = ((float)hash_occ / (float)num_entries) * 100;
+
+ fprintf(f, " hash table : %u bits, occupancy %u/%u (%0.1f%%)\n",
+ nbits, hash_occ, num_entries, hash_occ_percent);
+
+ u32 bloom_bits = ll_sub->bloomBits;
+ u32 bloom_size = 1U << bloom_bits;
+ const u8 *bloom = (const u8 *)base + ll_sub->bloomOffset;
+ u32 bloom_occ = accumulate(bloom, bloom + bloom_size / 8, 0,
+ [](const u32 &sum, const u8 &elem) { return sum + popcount32(elem); });
+ float bloom_occ_percent = ((float)bloom_occ / (float)(bloom_size)) * 100;
+
+ fprintf(f, " bloom filter : %u bits, occupancy %u/%u (%0.1f%%)\n",
+ bloom_bits, bloom_occ, bloom_size, bloom_occ_percent);
+}
+
static
void dumpLongLiteralTable(const RoseEngine *t, FILE *f) {
if (!t->longLitTableOffset) {
(const struct RoseLongLitTable *)loadFromByteCodeOffset(
t, t->longLitTableOffset);
- u32 num_caseful = ll_table->boundaryCase;
- u32 num_caseless = ll_table->boundaryNocase - num_caseful;
-
- fprintf(f, " longest len: %u\n", ll_table->maxLen);
- fprintf(f, " counts: %u caseful, %u caseless\n", num_caseful,
- num_caseless);
- fprintf(f, " hash bits: %u caseful, %u caseless\n",
- ll_table->hashNBitsCase, ll_table->hashNBitsNocase);
- fprintf(f, " state bits: %u caseful, %u caseless\n",
- ll_table->streamStateBitsCase, ll_table->streamStateBitsNocase);
- fprintf(f, " stream state: %u bytes\n", ll_table->streamStateBytes);
+ fprintf(f, " total size : %u bytes\n", ll_table->size);
+ fprintf(f, " longest len : %u\n", ll_table->maxLen);
+ fprintf(f, " stream state : %u bytes\n", ll_table->streamStateBytes);
+
+ fprintf(f, " caseful:\n");
+ dumpLongLiteralSubtable(ll_table, &ll_table->caseful, f);
+
+ fprintf(f, " nocase:\n");
+ dumpLongLiteralSubtable(ll_table, &ll_table->nocase, f);
}
// Externally accessible functions
};
/**
- * \brief Long literal table header.
+ * \brief Long literal subtable for a particular mode (caseful or nocase).
*/
-struct RoseLongLitTable {
- /** \brief String ID one beyond the maximum entry for caseful literals. */
- u32 boundaryCase;
-
+struct RoseLongLitSubtable {
/**
- * \brief String ID one beyond the maximum entry for caseless literals.
- * This is also the total size of the literal table.
- */
- u32 boundaryNocase;
-
- /**
- * \brief Offset of the caseful hash table (relative to RoseLongLitTable
- * base).
+ * \brief Offset of the hash table (relative to RoseLongLitTable base).
*
* Offset is zero if no such table exists.
*/
- u32 hashOffsetCase;
+ u32 hashOffset;
/**
- * \brief Offset of the caseless hash table (relative to RoseLongLitTable
- * base).
+ * \brief Offset of the bloom filter (relative to RoseLongLitTable base).
*
* Offset is zero if no such table exists.
*/
- u32 hashOffsetNocase;
+ u32 bloomOffset;
- /** \brief lg2 of the size of the caseful hash table. */
- u32 hashNBitsCase;
+ /** \brief lg2 of the size of the hash table. */
+ u8 hashBits;
- /** \brief lg2 of the size of the caseless hash table. */
- u32 hashNBitsNocase;
+ /** \brief Size of the bloom filter in bits. */
+ u8 bloomBits;
- /**
- * \brief Number of bits of packed stream state for the caseful hash table.
- */
- u8 streamStateBitsCase;
+ /** \brief Number of bits of packed stream state used. */
+ u8 streamStateBits;
+};
+/**
+ * \brief Long literal table header.
+ */
+struct RoseLongLitTable {
/**
- * \brief Number of bits of packed stream state for the caseless hash
- * table.
+ * \brief Total size of the whole table (including strings, bloom filters,
+ * hash tables).
*/
- u8 streamStateBitsNocase;
+ u32 size;
+
+ /** \brief Caseful sub-table (hash table and bloom filter). */
+ struct RoseLongLitSubtable caseful;
+
+ /** \brief Caseless sub-table (hash table and bloom filter). */
+ struct RoseLongLitSubtable nocase;
/** \brief Total size of packed stream state in bytes. */
u8 streamStateBytes;
u8 maxLen;
};
-/**
- * \brief One of these structures per literal entry in our long literal table.
- */
-struct RoseLongLiteral {
- /**
- * \brief Offset of the literal string itself, relative to
- * RoseLongLitTable base.
- */
- u32 offset;
-};
-
-/** \brief "No further links" value used for \ref RoseLongLitHashEntry::link. */
-#define LINK_INVALID 0xffffffff
-
/**
* \brief One of these structures per hash table entry in our long literal
* table.
*/
struct RoseLongLitHashEntry {
/**
- * \brief Bitfield used as a quick guard for hash buckets.
- *
- * For a given hash value N, the low six bits of N are taken and the
- * corresponding bit is switched on in this bitfield if this bucket is used
- * for that hash.
+ * \brief Offset of the literal string itself, relative to
+ * RoseLongLitTable base. Zero if this bucket is empty.
*/
- u64a bitfield;
-
- /** \brief Offset in the literal table for this string. */
- u32 state;
+ u32 str_offset;
- /** \brief Hash table index of next entry in the chain for this bucket. */
- u32 link;
+ /** \brief Length of the literal string. */
+ u32 str_len;
};
static really_inline
tctxt->minMatchOffset = offset;
tctxt->minNonMpvMatchOffset = offset;
tctxt->next_mpv_offset = 0;
+ tctxt->ll_buf = scratch->core_info.hbuf;
+ tctxt->ll_len = scratch->core_info.hlen;
+ tctxt->ll_buf_nocase = scratch->core_info.hbuf;
+ tctxt->ll_len_nocase = scratch->core_info.hlen;
+
DEBUG_PRINTF("BEGIN: history len=%zu, buffer len=%zu groups=%016llx\n",
scratch->core_info.hlen, scratch->core_info.len, tctxt->groups);
MIN(t->floatingDistance, length + offset) - offset : 0;
}
+ loadLongLiteralState(t, state, scratch);
+
size_t hlength = scratch->core_info.hlen;
- char rebuild = 0;
-
- if (hlength) {
- // Can only have long literal state or rebuild if this is not the
- // first write to this stream.
- loadLongLiteralState(t, state, scratch);
- rebuild = (scratch->core_info.status & STATUS_DELAY_DIRTY) &&
- (t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
- offset < t->maxFloatingDelayedMatch);
- }
+ char rebuild = hlength &&
+ (scratch->core_info.status & STATUS_DELAY_DIRTY) &&
+ (t->maxFloatingDelayedMatch == ROSE_BOUND_INF ||
+ offset < t->maxFloatingDelayedMatch);
DEBUG_PRINTF("**rebuild %hhd status %hhu mfdm %u, offset %llu\n",
rebuild, scratch->core_info.status,
t->maxFloatingDelayedMatch, offset);
#include "util/copybytes.h"
static really_inline
-const struct RoseLongLiteral *
-getLitTab(const struct RoseLongLitTable *ll_table) {
- return (const struct RoseLongLiteral *)((const char *)ll_table +
- ROUNDUP_16(sizeof(struct RoseLongLitTable)));
-}
-
-static really_inline
-u32 get_start_lit_idx(const struct RoseLongLitTable *ll_table,
- const char nocase) {
- return nocase ? ll_table->boundaryCase : 0;
-}
-
-static really_inline
-u32 get_end_lit_idx(const struct RoseLongLitTable *ll_table,
- const char nocase) {
- return nocase ? ll_table->boundaryNocase : ll_table->boundaryCase;
-}
-
-// search for the literal index that contains the current state
-static rose_inline
-u32 findLitTabEntry(const struct RoseLongLitTable *ll_table,
- u32 stateValue, const char nocase) {
- const struct RoseLongLiteral *litTab = getLitTab(ll_table);
- u32 lo = get_start_lit_idx(ll_table, nocase);
- u32 hi = get_end_lit_idx(ll_table, nocase);
-
- // Now move stateValue back by one so that we're looking for the
- // litTab entry that includes it the string, not the one 'one past' it
- stateValue -= 1;
- assert(lo != hi);
- assert(litTab[lo].offset <= stateValue);
- assert(litTab[hi].offset > stateValue);
-
- // binary search to find the entry e such that:
- // litTab[e].offsetToLiteral <= stateValue < litTab[e+1].offsetToLiteral
- while (lo + 1 < hi) {
- u32 mid = (lo + hi) / 2;
- if (litTab[mid].offset <= stateValue) {
- lo = mid;
- } else { // (litTab[mid].offset > stateValue) {
- hi = mid;
- }
- }
- assert(litTab[lo].offset <= stateValue);
- assert(litTab[hi].offset > stateValue);
- return lo;
+const struct RoseLongLitHashEntry *
+getHashTableBase(const struct RoseLongLitTable *ll_table,
+ const struct RoseLongLitSubtable *ll_sub) {
+ assert(ll_sub->hashOffset);
+ return (const struct RoseLongLitHashEntry *)((const char *)ll_table +
+ ll_sub->hashOffset);
}
// Reads from stream state and unpacks values into stream state table.
assert(state_case && state_nocase);
u8 ss_bytes = ll_table->streamStateBytes;
- u8 ssb = ll_table->streamStateBitsCase;
- UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase;
+ u8 ssb = ll_table->caseful.streamStateBits;
+ UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits;
assert(ss_bytes == (ssb + ssb_nc + 7) / 8);
#if defined(ARCH_32_BIT)
*state_nocase = (u32)(streamVal >> ssb);
}
-static really_inline
-u32 getBaseOffsetOfLits(const struct RoseLongLitTable *ll_table,
- const char nocase) {
- u32 lit_idx = get_start_lit_idx(ll_table, nocase);
- return getLitTab(ll_table)[lit_idx].offset;
-}
-
-static really_inline
-u32 unpackStateVal(const struct RoseLongLitTable *ll_table, const char nocase,
- u32 v) {
- return v + getBaseOffsetOfLits(ll_table, nocase) - 1;
-}
-
-static really_inline
-u32 packStateVal(const struct RoseLongLitTable *ll_table, const char nocase,
- u32 v) {
- return v - getBaseOffsetOfLits(ll_table, nocase) + 1;
-}
-
static rose_inline
void loadLongLiteralStateMode(struct hs_scratch *scratch,
const struct RoseLongLitTable *ll_table,
- const struct RoseLongLiteral *litTab,
+ const struct RoseLongLitSubtable *ll_sub,
const u32 state, const char nocase) {
if (!state) {
DEBUG_PRINTF("no state for %s\n", nocase ? "caseless" : "caseful");
return;
}
- u32 stateValue = unpackStateVal(ll_table, nocase, state);
- u32 idx = findLitTabEntry(ll_table, stateValue, nocase);
- size_t found_offset = litTab[idx].offset;
- const u8 *found_buf = found_offset + (const u8 *)ll_table;
- size_t found_sz = stateValue - found_offset;
+ const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub);
+ const struct RoseLongLitHashEntry *ent = tab + state - 1;
+
+ assert(ent->str_offset + ent->str_len <= ll_table->size);
+ const u8 *found_buf = (const u8 *)ll_table + ent->str_offset;
+ size_t found_sz = ent->str_len;
struct RoseContext *tctxt = &scratch->tctxt;
if (nocase) {
return;
}
+ // If we don't have any long literals in play, these values must point to
+ // the real history buffer so that CHECK_LITERAL instructions examine the
+ // history buffer.
scratch->tctxt.ll_buf = scratch->core_info.hbuf;
scratch->tctxt.ll_len = scratch->core_info.hlen;
scratch->tctxt.ll_buf_nocase = scratch->core_info.hbuf;
scratch->tctxt.ll_len_nocase = scratch->core_info.hlen;
+ if (!scratch->core_info.hlen) {
+ return;
+ }
+
const struct RoseLongLitTable *ll_table =
getByOffset(t, t->longLitTableOffset);
- const struct RoseLongLiteral *litTab = getLitTab(ll_table);
const u8 *ll_state = getLongLitState(t, state);
u32 state_case;
u32 state_nocase;
loadLongLitStreamState(ll_table, ll_state, &state_case, &state_nocase);
- loadLongLiteralStateMode(scratch, ll_table, litTab, state_case, 0);
- loadLongLiteralStateMode(scratch, ll_table, litTab, state_nocase, 1);
+ DEBUG_PRINTF("loaded {%u, %u}\n", state_case, state_nocase);
+
+ loadLongLiteralStateMode(scratch, ll_table, &ll_table->caseful,
+ state_case, 0);
+ loadLongLiteralStateMode(scratch, ll_table, &ll_table->nocase,
+ state_nocase, 1);
}
static rose_inline
char confirmLongLiteral(const struct RoseLongLitTable *ll_table,
- const hs_scratch_t *scratch, u32 hashState,
+ const struct hs_scratch *scratch,
+ const struct RoseLongLitHashEntry *ent,
const char nocase) {
- const struct RoseLongLiteral *litTab = getLitTab(ll_table);
- u32 idx = findLitTabEntry(ll_table, hashState, nocase);
- size_t found_offset = litTab[idx].offset;
- const u8 *s = found_offset + (const u8 *)ll_table;
- assert(hashState > found_offset);
- size_t len = hashState - found_offset;
+ assert(ent->str_offset + ent->str_len <= ll_table->size);
+ const u8 *s = (const u8 *)ll_table + ent->str_offset;
+ size_t len = ent->str_len;
const u8 *buf = scratch->core_info.buf;
const size_t buf_len = scratch->core_info.len;
return 0;
}
- DEBUG_PRINTF("confirmed hashState=%u\n", hashState);
return 1;
}
static rose_inline
-void calcStreamingHash(const struct core_info *ci,
- const struct RoseLongLitTable *ll_table, u8 hash_len,
- u32 *hash_case, u32 *hash_nocase) {
+const u8 *prepScanBuffer(const struct core_info *ci,
+ const struct RoseLongLitTable *ll_table, u8 *tempbuf) {
+ const u8 hash_len = ll_table->maxLen;
assert(hash_len >= LONG_LIT_HASH_LEN);
// Our hash function operates over LONG_LIT_HASH_LEN bytes, starting from
// entirely from either the current buffer or the history buffer, we pass
// in the pointer directly; otherwise we must make a copy.
- u8 tempbuf[LONG_LIT_HASH_LEN];
const u8 *base;
if (hash_len > ci->len) {
base = ci->buf + ci->len - hash_len;
}
- if (ll_table->hashNBitsCase) {
- *hash_case = hashLongLiteral(base, LONG_LIT_HASH_LEN, 0);
- DEBUG_PRINTF("caseful hash %u\n", *hash_case);
- }
- if (ll_table->hashNBitsNocase) {
- *hash_nocase = hashLongLiteral(base, LONG_LIT_HASH_LEN, 1);
- DEBUG_PRINTF("caseless hash %u\n", *hash_nocase);
- }
-}
-
-static really_inline
-const struct RoseLongLitHashEntry *
-getHashTableBase(const struct RoseLongLitTable *ll_table, const char nocase) {
- const u32 hashOffset = nocase ? ll_table->hashOffsetNocase
- : ll_table->hashOffsetCase;
- return (const struct RoseLongLitHashEntry *)((const char *)ll_table +
- hashOffset);
-}
-
-static rose_inline
-const struct RoseLongLitHashEntry *
-getLongLitHashEnt(const struct RoseLongLitTable *ll_table, u32 h,
- const char nocase) {
- u32 nbits = nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase;
- if (!nbits) {
- return NULL;
- }
-
- u32 h_ent = h & ((1 << nbits) - 1);
- u32 h_low = (h >> nbits) & 63;
-
- const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase);
- const struct RoseLongLitHashEntry *ent = tab + h_ent;
-
- if (!((ent->bitfield >> h_low) & 0x1)) {
- return NULL;
- }
-
- return ent;
-}
-
-static rose_inline
-u32 storeLongLiteralStateMode(const struct hs_scratch *scratch,
- const struct RoseLongLitTable *ll_table,
- const struct RoseLongLitHashEntry *ent,
- const char nocase) {
- assert(ent);
- assert(nocase ? ll_table->hashNBitsNocase : ll_table->hashNBitsCase);
-
- const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, nocase);
-
- u32 packed_state = 0;
- while (1) {
- if (confirmLongLiteral(ll_table, scratch, ent->state, nocase)) {
- packed_state = packStateVal(ll_table, nocase, ent->state);
- DEBUG_PRINTF("set %s state to %u\n", nocase ? "nocase" : "case",
- packed_state);
- break;
- }
- if (ent->link == LINK_INVALID) {
- break;
- }
- ent = tab + ent->link;
- }
- return packed_state;
+ return base;
}
#ifndef NDEBUG
assert(ll_state);
u8 ss_bytes = ll_table->streamStateBytes;
- u8 ssb = ll_table->streamStateBitsCase;
- UNUSED u8 ssb_nc = ll_table->streamStateBitsNocase;
+ u8 ssb = ll_table->caseful.streamStateBits;
+ UNUSED u8 ssb_nc = ll_table->nocase.streamStateBits;
assert(ss_bytes == ROUNDUP_N(ssb + ssb_nc, 8) / 8);
assert(!streamingTableOverflow(state_case, state_nocase, ssb, ssb_nc));
partial_store_u64a(ll_state, stagingStreamState, ss_bytes);
}
+static really_inline
+char has_bit(const u8 *data, u32 bit) {
+ return (data[bit / 8] >> (bit % 8)) & 1;
+}
+
+static rose_inline
+char bloomHasKey(const u8 *bloom, u32 bloom_mask, u32 hash) {
+ return has_bit(bloom, hash & bloom_mask);
+}
+
+static rose_inline
+char checkBloomFilter(const struct RoseLongLitTable *ll_table,
+ const struct RoseLongLitSubtable *ll_sub,
+ const u8 *scan_buf, char nocase) {
+ assert(ll_sub->bloomBits);
+
+ const u8 *bloom = (const u8 *)ll_table + ll_sub->bloomOffset;
+ const u32 bloom_mask = (1U << ll_sub->bloomBits) - 1;
+
+ char v = 1;
+ v &= bloomHasKey(bloom, bloom_mask, bloomHash_1(scan_buf, nocase));
+ v &= bloomHasKey(bloom, bloom_mask, bloomHash_2(scan_buf, nocase));
+ v &= bloomHasKey(bloom, bloom_mask, bloomHash_3(scan_buf, nocase));
+ return v;
+}
+
+/**
+ * \brief Look for a hit in the hash table.
+ *
+ * Returns zero if not found, otherwise returns (bucket + 1).
+ */
+static rose_inline
+u32 checkHashTable(const struct RoseLongLitTable *ll_table,
+ const struct RoseLongLitSubtable *ll_sub, const u8 *scan_buf,
+ const struct hs_scratch *scratch, char nocase) {
+ const u32 nbits = ll_sub->hashBits;
+ assert(nbits && nbits < 32);
+ const u32 num_entries = 1U << nbits;
+
+ const struct RoseLongLitHashEntry *tab = getHashTableBase(ll_table, ll_sub);
+
+ u32 hash = hashLongLiteral(scan_buf, LONG_LIT_HASH_LEN, nocase);
+ u32 bucket = hash & ((1U << nbits) - 1);
+
+ while (tab[bucket].str_offset != 0) {
+ DEBUG_PRINTF("checking bucket %u\n", bucket);
+ if (confirmLongLiteral(ll_table, scratch, &tab[bucket], nocase)) {
+ DEBUG_PRINTF("found hit for bucket %u\n", bucket);
+ return bucket + 1;
+ }
+
+ if (++bucket == num_entries) {
+ bucket = 0;
+ }
+ }
+
+ return 0;
+}
+
static rose_inline
void storeLongLiteralState(const struct RoseEngine *t, char *state,
struct hs_scratch *scratch) {
// If we don't have enough history, we don't need to do anything.
if (ll_table->maxLen <= ci->len + ci->hlen) {
- u32 hash_case = 0;
- u32 hash_nocase = 0;
+ u8 tempbuf[LONG_LIT_HASH_LEN];
+ const u8 *scan_buf = prepScanBuffer(ci, ll_table, tempbuf);
- calcStreamingHash(ci, ll_table, ll_table->maxLen, &hash_case,
- &hash_nocase);
-
- const struct RoseLongLitHashEntry *ent_case =
- getLongLitHashEnt(ll_table, hash_case, 0);
- const struct RoseLongLitHashEntry *ent_nocase =
- getLongLitHashEnt(ll_table, hash_nocase, 1);
-
- DEBUG_PRINTF("ent_caseful=%p, ent_caseless=%p\n", ent_case, ent_nocase);
-
- if (ent_case) {
- state_case = storeLongLiteralStateMode(scratch, ll_table,
- ent_case, 0);
+ if (ll_table->caseful.hashBits &&
+ checkBloomFilter(ll_table, &ll_table->caseful, scan_buf, 0)) {
+ state_case = checkHashTable(ll_table, &ll_table->caseful, scan_buf,
+ scratch, 0);
}
- if (ent_nocase) {
- state_nocase = storeLongLiteralStateMode(scratch, ll_table,
- ent_nocase, 1);
+ if (ll_table->nocase.hashBits &&
+ checkBloomFilter(ll_table, &ll_table->nocase, scan_buf, 1)) {
+ state_nocase = checkHashTable(ll_table, &ll_table->nocase, scan_buf,
+ scratch, 1);
}
+ } else {
+ DEBUG_PRINTF("not enough history (%zu bytes)\n", ci->len + ci->hlen);
}
DEBUG_PRINTF("store {%u, %u}\n", state_case, state_nocase);
#define STREAM_LONG_LIT_HASH_H
#include "ue2common.h"
+#include "util/bitutils.h"
#include "util/unaligned.h"
/** \brief Length of the buffer operated on by \ref hashLongLiteral(). */
#define LONG_LIT_HASH_LEN 24
+/** \brief Multiplier used by al the hash functions below. */
+#define HASH_MULTIPLIER 0x0b4e0ef37bc32127ULL
+
/** \brief Hash function used for long literal table in streaming mode. */
static really_inline
u32 hashLongLiteral(const u8 *ptr, UNUSED size_t len, char nocase) {
- const u64a CASEMASK = 0xdfdfdfdfdfdfdfdfULL;
- const u64a MULTIPLIER = 0x0b4e0ef37bc32127ULL;
-
// We unconditionally hash LONG_LIT_HASH_LEN bytes; all use cases of this
// hash are for strings longer than this.
assert(len >= 24);
u64a v2 = unaligned_load_u64a(ptr + 8);
u64a v3 = unaligned_load_u64a(ptr + 16);
if (nocase) {
- v1 &= CASEMASK;
- v2 &= CASEMASK;
- v3 &= CASEMASK;
+ v1 &= OCTO_CASE_CLEAR;
+ v2 &= OCTO_CASE_CLEAR;
+ v3 &= OCTO_CASE_CLEAR;
}
- v1 *= MULTIPLIER;
- v2 *= MULTIPLIER * MULTIPLIER;
- v3 *= MULTIPLIER * MULTIPLIER * MULTIPLIER;
+ v1 *= HASH_MULTIPLIER;
+ v2 *= HASH_MULTIPLIER * HASH_MULTIPLIER;
+ v3 *= HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER;
v1 >>= 32;
v2 >>= 32;
v3 >>= 32;
return v1 ^ v2 ^ v3;
}
+/**
+ * \brief Internal, used by the bloom filter hash functions below. Hashes 16
+ * bytes beginning at (ptr + offset).
+ */
+static really_inline
+u32 bloomHash_i(const u8 *ptr, u32 offset, u64a multiplier, char nocase) {
+ assert(offset + 16 <= LONG_LIT_HASH_LEN);
+
+ u64a v = unaligned_load_u64a(ptr + offset);
+ if (nocase) {
+ v &= OCTO_CASE_CLEAR;
+ }
+ v *= multiplier;
+ return v >> 32;
+}
+
+/*
+ * We ensure that we see every byte the first LONG_LIT_HASH_LEN bytes of input
+ * data (using at least one of the following functions).
+ */
+
+static really_inline
+u32 bloomHash_1(const u8 *ptr, char nocase) {
+ const u64a multiplier = HASH_MULTIPLIER;
+ return bloomHash_i(ptr, 0, multiplier, nocase);
+}
+
+static really_inline
+u32 bloomHash_2(const u8 *ptr, char nocase) {
+ const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER;
+ return bloomHash_i(ptr, 4, multiplier, nocase);
+}
+
+static really_inline
+u32 bloomHash_3(const u8 *ptr, char nocase) {
+ const u64a multiplier = HASH_MULTIPLIER * HASH_MULTIPLIER * HASH_MULTIPLIER;
+ return bloomHash_i(ptr, 8, multiplier, nocase);
+}
+
#endif // STREAM_LONG_LIT_HASH_H
#define CASE_BIT 0x20
#define CASE_CLEAR 0xdf
#define DOUBLE_CASE_CLEAR 0xdfdf
+#define OCTO_CASE_CLEAR 0xdfdfdfdfdfdfdfdfULL
static really_inline
u32 clz32(u32 x) {