From: Yann Collet
ZSTDLIB_STATIC_API size_t +ZSTD_compressSequencesAndLiterals( ZSTD_CCtx* cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* literals, size_t litSize); +This is a variant of ZSTD_compressSequences() which, + instead of receiving (src,srcSize) as input parameter, receives (literals,litSize), + aka all literals already extracted and grouped into a single continuous buffer. + This can be useful if the process generating the sequences also happens to generate the buffer of literals, + thus skipping an extraction + caching stage. + To be valid, `litSize` must be equal to the sum of all @.litLength fields in @inSeqs. + @return : final compressed size, or a ZSTD error code. + +
ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned magicVariant);Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index ecb9cfba8..1122e2f39 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -278,62 +278,6 @@ typedef enum { /*-******************************************* * Private declarations *********************************************/ -typedef struct seqDef_s { - U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ - U16 litLength; - U16 mlBase; /* mlBase == matchLength - MINMATCH */ -} seqDef; - -/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ -typedef enum { - ZSTD_llt_none = 0, /* no longLengthType */ - ZSTD_llt_literalLength = 1, /* represents a long literal */ - ZSTD_llt_matchLength = 2 /* represents a long match */ -} ZSTD_longLengthType_e; - -typedef struct { - seqDef* sequencesStart; - seqDef* sequences; /* ptr to end of sequences */ - BYTE* litStart; - BYTE* lit; /* ptr to end of literals */ - BYTE* llCode; - BYTE* mlCode; - BYTE* ofCode; - size_t maxNbSeq; - size_t maxNbLit; - - /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength - * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment - * the existing value of the litLength or matchLength by 0x10000. - */ - ZSTD_longLengthType_e longLengthType; - U32 longLengthPos; /* Index of the sequence to apply long length modification to */ -} seqStore_t; - -typedef struct { - U32 litLength; - U32 matchLength; -} ZSTD_sequenceLength; - -/** - * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences - * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. - */ -MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) -{ - ZSTD_sequenceLength seqLen; - seqLen.litLength = seq->litLength; - seqLen.matchLength = seq->mlBase + MINMATCH; - if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { - if (seqStore->longLengthType == ZSTD_llt_literalLength) { - seqLen.litLength += 0x10000; - } - if (seqStore->longLengthType == ZSTD_llt_matchLength) { - seqLen.matchLength += 0x10000; - } - } - return seqLen; -} /** * Contains the compressed frame size and an upper-bound for the decompressed frame size. @@ -347,10 +291,6 @@ typedef struct { unsigned long long decompressedBound; } ZSTD_frameSizeInfo; /* decompress & legacy */ -const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ -int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ - - /* ZSTD_invalidateRepCodes() : * ensures next compression will not use repcodes from previous block. * Note : only works with regular variant; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 9df219ded..a86748dae 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -6909,7 +6909,7 @@ ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1+1) { cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); - DEBUGLOG(5, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); + DEBUGLOG(5, "Block too small (%zu): data remains uncompressed: cSize=%zu", blockSize, cBlockSize); cSize += cBlockSize; ip += blockSize; op += cBlockSize; diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index d49023d88..e9f058771 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -83,6 +83,70 @@ typedef struct { ZSTD_fseCTables_t fse; } ZSTD_entropyCTables_t; +/*********************************************** +* Sequences * +***********************************************/ +typedef struct seqDef_s { + U32 offBase; /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */ + U16 litLength; + U16 mlBase; /* mlBase == matchLength - MINMATCH */ +} seqDef; + +/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */ +typedef enum { + ZSTD_llt_none = 0, /* no longLengthType */ + ZSTD_llt_literalLength = 1, /* represents a long literal */ + ZSTD_llt_matchLength = 2 /* represents a long match */ +} ZSTD_longLengthType_e; + +typedef struct { + seqDef* sequencesStart; + seqDef* sequences; /* ptr to end of sequences */ + BYTE* litStart; + BYTE* lit; /* ptr to end of literals */ + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; + + /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength + * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment + * the existing value of the litLength or matchLength by 0x10000. + */ + ZSTD_longLengthType_e longLengthType; + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ +} seqStore_t; + +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_sequenceLength; + +/** + * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +{ + ZSTD_sequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->mlBase + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthType == ZSTD_llt_literalLength) { + seqLen.litLength += 0x10000; + } + if (seqStore->longLengthType == ZSTD_llt_matchLength) { + seqLen.matchLength += 0x10000; + } + } + return seqLen; +} + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ +int ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ + + /*********************************************** * Entropy buffer statistics structs and funcs * ***********************************************/ diff --git a/lib/compress/zstd_compress_sequences.h b/lib/compress/zstd_compress_sequences.h index 4a3a05da9..fb393cca2 100644 --- a/lib/compress/zstd_compress_sequences.h +++ b/lib/compress/zstd_compress_sequences.h @@ -11,6 +11,7 @@ #ifndef ZSTD_COMPRESS_SEQUENCES_H #define ZSTD_COMPRESS_SEQUENCES_H +#include "zstd_compress_internal.h" /* seqDef */ #include "../common/fse.h" /* FSE_repeat, FSE_CTable */ #include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */