src/hs_version.h
src/scratch.h
src/state.h
+ src/stream_compress.c
+ src/stream_compress.h
src/ue2common.h
src/compiler/asserts.cpp
src/compiler/asserts.h
*/
#define HS_ARCH_ERROR (-11)
+/**
+ * Provided buffer was too small.
+ *
+ * This error indicates that there was insufficient space in the buffer. The
+ * call should be repeated with a larger provided buffer.
+ *
+ * Note: in this situation, it is normal for the amount of space required to be
+ * returned in the same manner as the used space would have been returned if the
+ * call was successful.
+ */
+#define HS_INSUFFICIENT_SPACE (-12)
+
/** @} */
#ifdef __cplusplus
match_event_handler onEvent,
void *context);
+/**
+ * Creates a compressed representation of the provided stream in the buffer
+ * provided. This compressed representation can be converted back into a stream
+ * state by using @ref hs_expand_stream() or @ref hs_reset_and_expand_stream().
+ * The size of the compressed representation will be placed into @a used_space.
+ *
+ * If there is not sufficient space in the buffer to hold the compressed
+ * represention, @ref HS_INSUFFICIENT_SPACE will be returned and @a used_space
+ * will be populated with the amount of space required.
+ *
+ * Note: this function does not close the provided stream, you may continue to
+ * use the stream or to free it with @ref hs_close_stream().
+ *
+ * @param stream
+ * The stream (as created by @ref hs_open_stream()) to be compressed.
+ *
+ * @param buf
+ * Buffer to write the compressed representation into. Note: if the call is
+ * just being used to determine the amount of space required, it is allowed
+ * to pass NULL here and @a buf_space as 0.
+ *
+ * @param buf_space
+ * The number of bytes in @a buf. If buf_space is too small, the call will
+ * fail with @ref HS_INSUFFICIENT_SPACE.
+ *
+ * @param used_space
+ * Pointer to where the amount of used space will be written to. The used
+ * buffer space is always less than or equal to @a buf_space. If the call
+ * fails with @ref HS_INSUFFICIENT_SPACE, this pointer will be used to
+ * write out the amount of buffer space required.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, @ref HS_INSUFFICIENT_SPACE if the provided
+ * buffer is too small.
+ */
+hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf,
+ size_t buf_space, size_t *used_space);
+
+/**
+ * Decompresses a compressed representation created by @ref hs_compress_stream()
+ * into a new stream.
+ *
+ * Note: @a buf must correspond to a complete compressed representation created
+ * by @ref hs_compress_stream() of a stream that was opened against @a db. It is
+ * not always possible to detect misuse of this API and behaviour is undefined
+ * if these properties are not satisfied.
+ *
+ * @param db
+ * The compiled pattern database that the compressed stream was opened
+ * against.
+ *
+ * @param stream
+ * On success, a pointer to the expanded @ref hs_stream_t will be
+ * returned; NULL on failure.
+ *
+ * @param buf
+ * A compressed representation of a stream. These compressed forms are
+ * created by @ref hs_compress_stream().
+ *
+ * @param buf_size
+ * The size in bytes of the compressed representation.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
+hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream,
+ const char *buf, size_t buf_size);
+
+/**
+ * Decompresses a compressed representation created by @ref hs_compress_stream()
+ * on top of the 'to' stream. The 'to' stream will first be reset (reporting
+ * any EOD matches if a non-NULL @a onEvent callback handler is provided).
+ *
+ * Note: the 'to' stream must be opened against the same database as the
+ * compressed stream.
+ *
+ * Note: @a buf must correspond to a complete compressed representation created
+ * by @ref hs_compress_stream() of a stream that was opened against @a db. It is
+ * not always possible to detect misuse of this API and behaviour is undefined
+ * if these properties are not satisfied.
+ *
+ * @param to_stream
+ * A pointer to the generated @ref hs_stream_t will be
+ * returned; NULL on failure.
+ *
+ * @param buf
+ * A compressed representation of a stream. These compressed forms are
+ * created by @ref hs_compress_stream().
+ *
+ * @param buf_size
+ * The size in bytes of the compressed representation.
+ *
+ * @param scratch
+ * A per-thread scratch space allocated by @ref hs_alloc_scratch(). This is
+ * allowed to be NULL only if the @a onEvent callback is also NULL.
+ *
+ * @param onEvent
+ * Pointer to a match event callback function. If a NULL pointer is given,
+ * no matches will be returned.
+ *
+ * @param context
+ * The user defined pointer which will be passed to the callback function
+ * when a match occurs.
+ *
+ * @return
+ * @ref HS_SUCCESS on success, other values on failure.
+ */
+hs_error_t hs_reset_and_expand_stream(hs_stream_t *to_stream,
+ const char *buf, size_t buf_size,
+ hs_scratch_t *scratch,
+ match_event_handler onEvent,
+ void *context);
+
/**
* The block (non-streaming) regular expression scanner.
*
so->activeLeafArray = curr_offset; /* TODO: limit size of array */
curr_offset += mmbit_size(activeArrayCount);
+ so->activeLeafArray_size = mmbit_size(activeArrayCount);
so->activeLeftArray = curr_offset; /* TODO: limit size of array */
+ curr_offset += mmbit_size(activeLeftCount);
so->activeLeftArray_size = mmbit_size(activeLeftCount);
- curr_offset += so->activeLeftArray_size;
so->longLitState = curr_offset;
curr_offset += longLitStreamStateRequired;
+ so->longLitState_size = longLitStreamStateRequired;
// ONE WHOLE BYTE for each active leftfix with lag.
so->leftfixLagTable = curr_offset;
// Exhaustion multibit.
so->exhausted = curr_offset;
curr_offset += mmbit_size(build.rm.numEkeys());
+ so->exhausted_size = mmbit_size(build.rm.numEkeys());
// SOM locations and valid/writeable multibit structures.
if (build.ssm.numSomSlots()) {
curr_offset += mmbit_size(build.ssm.numSomSlots());
so->somWritable = curr_offset;
curr_offset += mmbit_size(build.ssm.numSomSlots());
+ so->somMultibit_size = mmbit_size(build.ssm.numSomSlots());
} else {
// No SOM handling, avoid growing the stream state any further.
so->somLocation = 0;
}
// note: state space for mask nfas is allocated later
+ so->nfaStateBegin = curr_offset;
so->end = curr_offset;
}
static
void allocateStateSpace(const engine_info &eng_info, NfaInfo &nfa_info,
RoseStateOffsets *so, u32 *scratchStateSize,
- u32 *streamStateSize, u32 *transientStateSize) {
+ u32 *transientStateSize) {
u32 state_offset;
if (eng_info.transient) {
// Transient engines do not use stream state, but must have room in
// Pack NFA stream state on to the end of the Rose stream state.
state_offset = so->end;
so->end += eng_info.stream_size;
- *streamStateSize += eng_info.stream_size;
}
nfa_info.stateOffset = state_offset;
static
void updateNfaState(const build_context &bc, vector<NfaInfo> &nfa_infos,
RoseStateOffsets *so, u32 *scratchStateSize,
- u32 *streamStateSize, u32 *transientStateSize) {
+ u32 *transientStateSize) {
if (nfa_infos.empty()) {
return;
}
- *streamStateSize = 0;
*transientStateSize = 0;
*scratchStateSize = 0;
NfaInfo &nfa_info = nfa_infos[qi];
const auto &eng_info = bc.engine_info_by_queue.at(qi);
allocateStateSpace(eng_info, nfa_info, so, scratchStateSize,
- streamStateSize, transientStateSize);
+ transientStateSize);
}
}
// Update state offsets to do with NFAs in proto and in the NfaInfo
// structures.
updateNfaState(bc, infos, &proto.stateOffsets, &proto.scratchStateSize,
- &proto.nfaStateSize, &proto.tStateSize);
+ &proto.tStateSize);
proto.nfaInfoOffset = bc.engine_blob.add_range(infos);
}
proto.totalNumLiterals = verify_u32(literal_info.size());
proto.asize = verify_u32(atable.size());
proto.ematcherRegionSize = ematcher_region_size;
- proto.longLitStreamState = verify_u32(longLitStreamStateRequired);
proto.size = currOffset;
fprintf(f, "state space required : %u bytes\n", t->stateOffsets.end);
fprintf(f, " - history buffer : %u bytes\n", t->historyRequired);
- fprintf(f, " - exhaustion vector : %u bytes\n", (t->ekeyCount + 7) / 8);
+ fprintf(f, " - exhaustion vector : %u bytes\n",
+ t->stateOffsets.exhausted_size);
fprintf(f, " - role state mmbit : %u bytes\n", t->stateSize);
fprintf(f, " - long lit matcher : %u bytes\n", t->longLitStreamState);
fprintf(f, " - active array : %u bytes\n",
- mmbit_size(t->activeArrayCount));
+ t->stateOffsets.activeLeafArray_size);
fprintf(f, " - active rose : %u bytes\n",
- mmbit_size(t->activeLeftCount));
+ t->stateOffsets.activeLeftArray_size);
fprintf(f, " - anchored state : %u bytes\n", t->anchorStateSize);
- fprintf(f, " - nfa state : %u bytes\n", t->nfaStateSize);
+ fprintf(f, " - nfa state : %u bytes\n",
+ t->stateOffsets.end - t->stateOffsets.nfaStateBegin);
fprintf(f, " - (trans. nfa state): %u bytes\n", t->tStateSize);
fprintf(f, " - one whole bytes : %u bytes\n",
t->stateOffsets.anchorState - t->stateOffsets.leftfixLagTable);
DUMP_U32(t, rolesWithStateCount);
DUMP_U32(t, stateSize);
DUMP_U32(t, anchorStateSize);
- DUMP_U32(t, nfaStateSize);
DUMP_U32(t, tStateSize);
DUMP_U32(t, smallWriteOffset);
DUMP_U32(t, amatcherOffset);
DUMP_U32(t, delayRebuildLength);
DUMP_U32(t, stateOffsets.history);
DUMP_U32(t, stateOffsets.exhausted);
+ DUMP_U32(t, stateOffsets.exhausted_size);
DUMP_U32(t, stateOffsets.activeLeafArray);
+ DUMP_U32(t, stateOffsets.activeLeafArray_size);
DUMP_U32(t, stateOffsets.activeLeftArray);
DUMP_U32(t, stateOffsets.activeLeftArray_size);
DUMP_U32(t, stateOffsets.leftfixLagTable);
DUMP_U32(t, stateOffsets.groups);
DUMP_U32(t, stateOffsets.groups_size);
DUMP_U32(t, stateOffsets.longLitState);
+ DUMP_U32(t, stateOffsets.longLitState_size);
DUMP_U32(t, stateOffsets.somLocation);
DUMP_U32(t, stateOffsets.somValid);
DUMP_U32(t, stateOffsets.somWritable);
+ DUMP_U32(t, stateOffsets.somMultibit_size);
+ DUMP_U32(t, stateOffsets.nfaStateBegin);
DUMP_U32(t, stateOffsets.end);
DUMP_U32(t, boundary.reportEodOffset);
DUMP_U32(t, boundary.reportZeroOffset);
DUMP_U32(t, ematcherRegionSize);
DUMP_U32(t, somRevCount);
DUMP_U32(t, somRevOffsetOffset);
- DUMP_U32(t, longLitStreamState);
fprintf(f, "}\n");
fprintf(f, "sizeof(RoseEngine) = %zu\n", sizeof(RoseEngine));
}
#define OWB_ZOMBIE_ALWAYS_YES 128 /* nfa will always answer yes to any rose
* prefix checks */
+/* offset of the status flags in the stream state. */
+#define ROSE_STATE_OFFSET_STATUS_FLAGS 0
+
+/* offset of role mmbit in stream state (just after the status flag byte). */
+#define ROSE_STATE_OFFSET_ROLE_MMBIT sizeof(u8)
+
/**
* \brief Rose state offsets.
*
struct RoseStateOffsets {
/** History buffer.
*
- * First byte is an 8-bit count of the number of valid history bytes
- * available, followed by the history itself. Max size of history is
- * RoseEngine::historyRequired. */
+ * Max size of history is RoseEngine::historyRequired. */
u32 history;
- /** Exhausted bitvector.
+ /** Exhausted multibit.
*
- * 1 bit per exhaustible key (used by Highlander mode). If a bit is set,
+ * entry per exhaustible key (used by Highlander mode). If a bit is set,
* reports with that ekey should not be delivered to the user. */
u32 exhausted;
+ /** size of exhausted multibit */
+ u32 exhausted_size;
+
/** Multibit for active suffix/outfix engines. */
u32 activeLeafArray;
- /** Multibit for active Rose (prefix/infix) engines. */
+ /** Size of multibit for active suffix/outfix engines in bytes. */
+ u32 activeLeafArray_size;
+
+ /** Multibit for active leftfix (prefix/infix) engines. */
u32 activeLeftArray;
- /** Size of the active Rose array multibit, in bytes. */
+ /** Size of multibit for active leftfix (prefix/infix) engines in bytes. */
u32 activeLeftArray_size;
/** Table of lag information (stored as one byte per engine) for active
/** State for long literal support. */
u32 longLitState;
+ /** Size of the long literal state. */
+ u32 longLitState_size;
+
/** Packed SOM location slots. */
u32 somLocation;
/** Multibit guarding SOM location slots. */
u32 somWritable;
+ /** Size of each of the somValid and somWritable multibits, in bytes. */
+ u32 somMultibit_size;
+
+ /** Begin of the region where NFA engine state is stored.
+ * The NFA state region extends to end. */
+ u32 nfaStateBegin;
+
/** Total size of Rose state, in bytes. */
u32 end;
};
u32 stateSize; /* size of the state bitset
* WARNING: not the size of the rose state */
u32 anchorStateSize; /* size of the state for the anchor dfas */
- u32 nfaStateSize; /* total size of the state for the mask/rose nfas */
u32 tStateSize; /* total size of the state for transient rose nfas */
u32 scratchStateSize; /**< uncompressed state req'd for NFAs in scratch;
* used for sizing scratch only. */
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
static really_inline
void *getRoleState(char *state) {
- return state + sizeof(u8); // status flags
+ return state + ROSE_STATE_OFFSET_ROLE_MMBIT;
}
/** \brief Fetch the active array for suffix nfas. */
#include "som/som_runtime.h"
#include "som/som_stream.h"
#include "state.h"
+#include "stream_compress.h"
#include "ue2common.h"
#include "util/exhaust.h"
#include "util/multibit.h"
/** \brief Retrieve status bitmask from stream state. */
static really_inline
u8 getStreamStatus(const char *state) {
- u8 status = *(const u8 *)state;
+ u8 status = *(const u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS);
assert((status & ~STATUS_VALID_BITS) == 0);
return status;
}
static really_inline
void setStreamStatus(char *state, u8 status) {
assert((status & ~STATUS_VALID_BITS) == 0);
- *(u8 *)state = status;
+ *(u8 *)(state + ROSE_STATE_OFFSET_STATUS_FLAGS) = status;
}
/** \brief Initialise SOM state. Used in both block and streaming mode. */
return HS_SUCCESS;
}
+
+HS_PUBLIC_API
+hs_error_t hs_compress_stream(const hs_stream_t *stream, char *buf,
+ size_t buf_space, size_t *used_space) {
+ if (unlikely(!stream || !used_space)) {
+ return HS_INVALID;
+ }
+
+ if (unlikely(buf_space && !buf)) {
+ return HS_INVALID;
+ }
+
+ const struct RoseEngine *rose = stream->rose;
+
+ size_t stream_size = size_compress_stream(rose, stream);
+
+ DEBUG_PRINTF("require %zu [orig %zu]\n", stream_size,
+ rose->stateOffsets.end + sizeof(struct hs_stream));
+ *used_space = stream_size;
+
+ if (buf_space < stream_size) {
+ return HS_INSUFFICIENT_SPACE;
+ }
+ compress_stream(buf, stream_size, rose, stream);
+
+ return HS_SUCCESS;
+}
+
+hs_error_t hs_expand_stream(const hs_database_t *db, hs_stream_t **stream,
+ const char *buf, size_t buf_size) {
+ if (unlikely(!stream || !buf)) {
+ return HS_INVALID;
+ }
+
+ *stream = NULL;
+
+ hs_error_t err = validDatabase(db);
+ if (unlikely(err != HS_SUCCESS)) {
+ return err;
+ }
+
+ const struct RoseEngine *rose = hs_get_bytecode(db);
+ if (unlikely(!ISALIGNED_16(rose))) {
+ return HS_INVALID;
+ }
+
+ if (unlikely(rose->mode != HS_MODE_STREAM)) {
+ return HS_DB_MODE_ERROR;
+ }
+
+ size_t stream_size = rose->stateOffsets.end + sizeof(struct hs_stream);
+
+ struct hs_stream *s = hs_stream_alloc(stream_size);
+ if (unlikely(!s)) {
+ return HS_NOMEM;
+ }
+
+ if (!expand_stream(s, rose, buf, buf_size)) {
+ hs_stream_free(s);
+ return HS_INVALID;
+ }
+
+ *stream = s;
+ return HS_SUCCESS;
+}
+
+hs_error_t hs_reset_and_expand_stream(hs_stream_t *to_stream,
+ const char *buf, size_t buf_size,
+ hs_scratch_t *scratch,
+ match_event_handler onEvent,
+ void *context) {
+ if (unlikely(!to_stream || !buf)) {
+ return HS_INVALID;
+ }
+
+ const struct RoseEngine *rose = to_stream->rose;
+
+ if (onEvent) {
+ if (!scratch || !validScratch(to_stream->rose, scratch)) {
+ return HS_INVALID;
+ }
+ if (unlikely(markScratchInUse(scratch))) {
+ return HS_SCRATCH_IN_USE;
+ }
+ report_eod_matches(to_stream, scratch, onEvent, context);
+ unmarkScratchInUse(scratch);
+ }
+
+ if (expand_stream(to_stream, rose, buf, buf_size)) {
+ return HS_SUCCESS;
+ } else {
+ return HS_INVALID;
+ }
+}
--- /dev/null
+/*
+ * Copyright (c) 2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "stream_compress.h"
+
+#include "state.h"
+#include "nfa/nfa_internal.h"
+#include "rose/rose_internal.h"
+#include "util/multibit.h"
+#include "util/uniform_ops.h"
+
+#include <string.h>
+
+#define COPY_IN(p, sz) do { \
+ assert(currOffset + sz <= buf_size); \
+ memcpy(buf + currOffset, p, sz); \
+ currOffset += sz; \
+ DEBUG_PRINTF("co = %zu\n", currOffset); \
+ } while (0);
+
+#define COPY_OUT(p, sz) do { \
+ if (currOffset + sz > buf_size) { \
+ return 0; \
+ } \
+ memcpy(p, buf + currOffset, sz); \
+ currOffset += sz; \
+ DEBUG_PRINTF("co = %zu\n", currOffset); \
+ } while (0);
+
+#define SIZE_COPY_IN(p, sz) do { \
+ currOffset += sz; \
+ DEBUG_PRINTF("co = %zu\n", currOffset); \
+ } while (0);
+
+#define COPY COPY_OUT
+#define ASSIGN(lhs, rhs) do { lhs = rhs; } while (0)
+#define FN_SUFFIX expand
+#define STREAM_QUAL
+#define BUF_QUAL const
+#include "stream_compress_impl.h"
+
+int expand_stream(struct hs_stream *stream, const struct RoseEngine *rose,
+ const char *buf, size_t buf_size) {
+ return sc_expand(rose, stream, buf, buf_size);
+}
+
+#define COPY COPY_IN
+#define ASSIGN(lhs, rhs) do { } while (0)
+#define FN_SUFFIX compress
+#define STREAM_QUAL const
+#define BUF_QUAL
+#include "stream_compress_impl.h"
+
+size_t compress_stream(char *buf, size_t buf_size,
+ const struct RoseEngine *rose,
+ const struct hs_stream *stream) {
+ return sc_compress(rose, stream, buf, buf_size);
+}
+
+#define COPY SIZE_COPY_IN
+#define ASSIGN(lhs, rhs) do { } while (0)
+#define FN_SUFFIX size
+#define STREAM_QUAL const
+#define BUF_QUAL UNUSED
+#include "stream_compress_impl.h"
+
+size_t size_compress_stream(const struct RoseEngine *rose,
+ const struct hs_stream *stream) {
+ return sc_size(rose, stream, NULL, 0);
+}
--- /dev/null
+/*
+ * Copyright (c) 2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \file
+ * \brief Functions for dynamic compress/expand for streams.
+ */
+
+#ifndef STREAM_COMPRESS_H
+#define STREAM_COMPRESS_H
+
+#include <stdlib.h>
+
+struct hs_stream;
+struct RoseEngine;
+
+int expand_stream(struct hs_stream *out, const struct RoseEngine *rose,
+ const char *buf, size_t buf_size);
+
+size_t compress_stream(char *buf, size_t buf_size,
+ const struct RoseEngine *rose,
+ const struct hs_stream *src);
+
+size_t size_compress_stream(const struct RoseEngine *rose,
+ const struct hs_stream *stream);
+
+#endif
--- /dev/null
+/*
+ * Copyright (c) 2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "util/join.h"
+
+#define COPY_FIELD(x) COPY(&x, sizeof(x))
+
+/* TODO: replace with a multibit compress/expand call */
+#define COPY_MULTIBIT(mm_p, mm_size_bytes) COPY(mm_p, mm_size_bytes)
+
+#define COPY_LEFTFIXES JOIN(sc_left_, FN_SUFFIX)
+#define COPY_SOM_INFO JOIN(sc_som_, FN_SUFFIX)
+
+static
+size_t COPY_LEFTFIXES(const struct RoseEngine *rose, size_t currOffset,
+ STREAM_QUAL struct hs_stream *stream,
+ BUF_QUAL char *buf, UNUSED size_t buf_size) {
+ if (!rose->activeLeftIterOffset) {
+ return currOffset;
+ }
+
+ const struct RoseStateOffsets *so = &rose->stateOffsets;
+ STREAM_QUAL char *stream_body
+ = ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream);
+
+ /* Note: in the expand case the active left array has already been copied
+ * into the stream. */
+ const u8 *ara = (const u8 *)(stream_body + so->activeLeftArray);
+ const u32 arCount = rose->activeLeftCount;
+ const struct LeftNfaInfo *left_table = getLeftTable(rose);
+
+ /* We only want to look at non-transient leftfixes */
+ const struct mmbit_sparse_iter *it = getActiveLeftIter(rose);
+ struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
+ u32 dummy;
+ u32 ri = mmbit_sparse_iter_begin(ara, arCount, &dummy, it, si_state);
+ for (; ri != MMB_INVALID;
+ ri = mmbit_sparse_iter_next(ara, arCount, ri, &dummy, it, si_state)) {
+ u32 qi = ri + rose->leftfixBeginQueue;
+ UNUSED const struct LeftNfaInfo *left = left_table + ri;
+ const struct NfaInfo *nfa_info = getNfaInfoByQueue(rose, qi);
+ const struct NFA *nfa = getNfaByInfo(rose, nfa_info);
+
+ COPY(stream_body + nfa_info->stateOffset, nfa->streamStateSize);
+ /* copy the one whole byte for active leftfixes as well */
+ assert(left->lagIndex != ROSE_OFFSET_INVALID);
+ COPY(stream_body + so->leftfixLagTable + left->lagIndex, 1);
+ }
+
+ return currOffset;
+}
+
+static
+size_t COPY_SOM_INFO(const struct RoseEngine *rose, size_t currOffset,
+ STREAM_QUAL struct hs_stream *stream,
+ BUF_QUAL char *buf, UNUSED size_t buf_size) {
+ const struct RoseStateOffsets *so = &rose->stateOffsets;
+
+ if (!so->somLocation) {
+ assert(!so->somValid);
+ assert(!so->somWritable);
+ return currOffset;
+ }
+
+ STREAM_QUAL char *stream_body
+ = ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream);
+
+ assert(so->somValid);
+ assert(so->somWritable);
+
+ COPY_MULTIBIT(stream_body + so->somWritable, so->somMultibit_size);
+ COPY_MULTIBIT(stream_body + so->somValid, so->somMultibit_size);
+
+ /* Copy only the som slots which contain valid values. */
+ /* Note: in the expand case the som valid array has been copied in. */
+ const u8 *svalid = (const u8 *)(stream_body + so->somValid);
+ u32 s_count = rose->somLocationCount;
+ u32 s_width = rose->somHorizon;
+ for (u32 slot = mmbit_iterate(svalid, s_count, MMB_INVALID);
+ slot != MMB_INVALID; slot = mmbit_iterate(svalid, s_count, slot)) {
+ COPY(stream_body + so->somLocation + slot * s_width, s_width);
+ }
+
+ return currOffset;
+}
+
+static
+size_t JOIN(sc_, FN_SUFFIX)(const struct RoseEngine *rose,
+ STREAM_QUAL struct hs_stream *stream,
+ BUF_QUAL char *buf, UNUSED size_t buf_size) {
+ size_t currOffset = 0;
+ const struct RoseStateOffsets *so = &rose->stateOffsets;
+
+ STREAM_QUAL char *stream_body
+ = ((STREAM_QUAL char *)stream) + sizeof(struct hs_stream);
+
+ COPY_FIELD(stream->offset);
+ ASSIGN(stream->rose, rose);
+
+ COPY(stream_body + ROSE_STATE_OFFSET_STATUS_FLAGS, 1);
+ COPY_MULTIBIT(stream_body + ROSE_STATE_OFFSET_ROLE_MMBIT, rose->stateSize);
+
+ /* stream is valid in compress/size, and stream->offset has been set already
+ * on the expand side */
+ u64a offset = stream->offset;
+ u32 history = MIN((u32)offset, rose->historyRequired);
+
+ /* copy the active mmbits */
+ COPY_MULTIBIT(stream_body + so->activeLeafArray, so->activeLeafArray_size);
+ COPY_MULTIBIT(stream_body + so->activeLeftArray, so->activeLeftArray_size);
+
+ COPY(stream_body + so->longLitState, so->longLitState_size);
+
+ /* Leftlag table will be handled later, for active leftfixes */
+
+ /* anchored table state is not required once we are deep in the stream */
+ if (offset <= rose->anchoredDistance) {
+ COPY(stream_body + so->anchorState, rose->anchorStateSize);
+ }
+
+ COPY(stream_body + so->groups, so->groups_size);
+
+ /* copy the real bits of history */
+ UNUSED u32 hend = so->history + rose->historyRequired;
+ COPY(stream_body + hend - history, history);
+
+ /* copy the exhaustion multibit */
+ COPY_MULTIBIT(stream_body + so->exhausted, so->exhausted_size);
+
+ /* copy nfa stream state for endfixes */
+ /* Note: in the expand case the active array has already been copied into
+ * the stream. */
+ const u8 *aa = (const u8 *)(stream_body + so->activeLeafArray);
+ u32 aaCount = rose->activeArrayCount;
+ for (u32 qi = mmbit_iterate(aa, aaCount, MMB_INVALID); qi != MMB_INVALID;
+ qi = mmbit_iterate(aa, aaCount, qi)) {
+ DEBUG_PRINTF("saving stream state for qi=%u\n", qi);
+ const struct NfaInfo *nfa_info = getNfaInfoByQueue(rose, qi);
+ const struct NFA *nfa = getNfaByInfo(rose, nfa_info);
+ COPY(stream_body + nfa_info->stateOffset, nfa->streamStateSize);
+ }
+
+ /* copy nfa stream state for leftfixes */
+ currOffset = COPY_LEFTFIXES(rose, currOffset, stream, buf, buf_size);
+ if (!currOffset) {
+ return 0;
+ }
+
+ currOffset = COPY_SOM_INFO(rose, currOffset, stream, buf, buf_size);
+ if (!currOffset) {
+ return 0;
+ }
+
+ return currOffset;
+}
+
+#undef ASSIGN
+#undef COPY
+#undef COPY_FIELD
+#undef COPT_LEFTFIXES
+#undef COPY_MULTIBIT
+#undef COPY_SOM_INFO
+#undef FN_SUFFIX
+#undef BUF_QUAL
+#undef STREAM_QUAL
/*
- * Copyright (c) 2015-2016, Intel Corporation
+ * Copyright (c) 2015-2017, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
ASSERT_EQ(HS_INVALID, err);
}
+TEST(HyperscanArgChecks, CompressStreamNoStream) {
+ char buf[100];
+ size_t used;
+ hs_error_t err = hs_compress_stream(nullptr, buf, sizeof(buf), &used);
+ ASSERT_EQ(HS_INVALID, err);
+}
+
+TEST(HyperscanArgChecks, CompressStreamNoUsed) {
+ hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
+ ASSERT_NE(nullptr, db);
+
+ hs_stream_t *stream;
+ hs_error_t err = hs_open_stream(db, 0, &stream);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ char buf[100];
+ err = hs_compress_stream(stream, buf, sizeof(buf), nullptr);
+ ASSERT_EQ(HS_INVALID, err);
+
+ err = hs_close_stream(stream, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_free_database(db);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(HyperscanArgChecks, CompressStreamNoBuf) {
+ hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
+ ASSERT_NE(nullptr, db);
+
+ hs_stream_t *stream;
+ hs_error_t err = hs_open_stream(db, 0, &stream);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ char buf[100];
+ size_t used;
+ err = hs_compress_stream(stream, nullptr, sizeof(buf), &used);
+ ASSERT_EQ(HS_INVALID, err);
+
+ err = hs_close_stream(stream, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_free_database(db);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(HyperscanArgChecks, CompressStreamSmallBuff) {
+ hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
+ ASSERT_NE(nullptr, db);
+
+ hs_stream_t *stream;
+ hs_error_t err = hs_open_stream(db, 0, &stream);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ char buf[100];
+ size_t used = 0;
+ err = hs_compress_stream(stream, buf, 1, &used);
+ ASSERT_EQ(HS_INSUFFICIENT_SPACE, err);
+ ASSERT_LT(0, used);
+
+ err = hs_close_stream(stream, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_free_database(db);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(HyperscanArgChecks, ExpandNoDb) {
+ hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
+ ASSERT_NE(nullptr, db);
+
+ hs_stream_t *stream1;
+ hs_error_t err = hs_open_stream(db, 0, &stream1);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ char buf[2000];
+ size_t used = 0;
+ err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ hs_stream_t *stream2;
+ err = hs_expand_stream(nullptr, &stream2, buf, used);
+ ASSERT_EQ(HS_INVALID, err);
+
+ err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_free_database(db);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(HyperscanArgChecks, ExpandNoTo) {
+ hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
+ ASSERT_NE(nullptr, db);
+
+ hs_stream_t *stream1;
+ hs_error_t err = hs_open_stream(db, 0, &stream1);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ char buf[2000];
+ size_t used = 0;
+ err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ hs_stream_t *stream2;
+ err = hs_expand_stream(db, nullptr, buf, used);
+ ASSERT_EQ(HS_INVALID, err);
+
+ err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_free_database(db);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(HyperscanArgChecks, ExpandNoBuf) {
+ hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
+ ASSERT_NE(nullptr, db);
+
+ hs_stream_t *stream1;
+ hs_error_t err = hs_open_stream(db, 0, &stream1);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ char buf[2000];
+ size_t used = 0;
+ err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ hs_stream_t *stream2;
+ err = hs_expand_stream(db, &stream2, nullptr, used);
+ ASSERT_EQ(HS_INVALID, err);
+
+ err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_free_database(db);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(HyperscanArgChecks, ExpandSmallBuf) {
+ hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
+ ASSERT_NE(nullptr, db);
+
+ hs_stream_t *stream1;
+ hs_error_t err = hs_open_stream(db, 0, &stream1);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ char buf[2000];
+ size_t used = 0;
+ err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ hs_stream_t *stream2;
+ err = hs_expand_stream(db, &stream2, buf, used / 2);
+ ASSERT_EQ(HS_INVALID, err);
+
+ err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_free_database(db);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(HyperscanArgChecks, ResetAndExpandNoStream) {
+ hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
+ ASSERT_NE(nullptr, db);
+
+ hs_stream_t *stream1;
+ hs_error_t err = hs_open_stream(db, 0, &stream1);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ char buf[2000];
+ size_t used = 0;
+ err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_reset_and_expand_stream(nullptr, buf, used, nullptr, nullptr,
+ nullptr);
+ ASSERT_EQ(HS_INVALID, err);
+
+ err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_free_database(db);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(HyperscanArgChecks, ResetAndExpandNoBuf) {
+ hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
+ ASSERT_NE(nullptr, db);
+
+ hs_stream_t *stream1;
+ hs_error_t err = hs_open_stream(db, 0, &stream1);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ char buf[2000];
+ size_t used = 0;
+ err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ hs_stream_t *stream2;
+ err = hs_open_stream(db, 0, &stream2);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_reset_and_expand_stream(stream2, nullptr, used, nullptr, nullptr,
+ nullptr);
+ ASSERT_EQ(HS_INVALID, err);
+
+ err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_close_stream(stream2, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_free_database(db);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+
+TEST(HyperscanArgChecks, ResetAndExpandSmallBuf) {
+ hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
+ ASSERT_NE(nullptr, db);
+
+ hs_stream_t *stream1;
+ hs_error_t err = hs_open_stream(db, 0, &stream1);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ char buf[2000];
+ size_t used = 0;
+ err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ hs_stream_t *stream2;
+ err = hs_open_stream(db, 0, &stream2);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_reset_and_expand_stream(stream2, buf, used / 2, nullptr, nullptr,
+ nullptr);
+ ASSERT_EQ(HS_INVALID, err);
+
+ err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_close_stream(stream2, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_free_database(db);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
+TEST(HyperscanArgChecks, ResetAndExpandNoScratch) {
+ hs_database_t *db = buildDB("(foo.*bar){3,}", 0, 0, HS_MODE_STREAM);
+ ASSERT_NE(nullptr, db);
+
+ hs_stream_t *stream1;
+ hs_error_t err = hs_open_stream(db, 0, &stream1);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ char buf[2000];
+ size_t used = 0;
+ err = hs_compress_stream(stream1, buf, sizeof(buf), &used);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ hs_stream_t *stream2;
+ err = hs_open_stream(db, 0, &stream2);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ int temp;
+
+ err = hs_reset_and_expand_stream(stream2, buf, used, nullptr, singleHandler,
+ &temp);
+ ASSERT_EQ(HS_INVALID, err);
+
+ err = hs_close_stream(stream1, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_close_stream(stream2, nullptr, nullptr, nullptr);
+ ASSERT_EQ(HS_SUCCESS, err);
+
+ err = hs_free_database(db);
+ ASSERT_EQ(HS_SUCCESS, err);
+}
+
class BadModeTest : public testing::TestWithParam<unsigned> {};
// hs_compile: Compile a pattern with bogus mode flags set.