From: Yann Collet Date: Mon, 6 Jun 2016 22:51:51 +0000 (+0200) Subject: first implementation of the new dictionary API (untested) X-Git-Tag: v0.7.0^2~44 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=81e13ef7cfceb5b79e11eb4c3042b5c4cbadd9c4;p=thirdparty%2Fzstd.git first implementation of the new dictionary API (untested) --- diff --git a/lib/common/zstd.h b/lib/common/zstd.h index b899656d1..ea0840b69 100644 --- a/lib/common/zstd.h +++ b/lib/common/zstd.h @@ -129,9 +129,47 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapa /*-*********************** * Dictionary API *************************/ +/*! ZSTD_createCDict() : +* Create a digested dictionary, ready to start compression operation without startup delay. +* `dict` can be released after creation */ +typedef struct ZSTD_CDict_s ZSTD_CDict; +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : +* Compression using a pre-digested Dictionary +* In contrast with older ZSTD_compress_usingDict(), use dictionary without significant overhead. +* Note that compression level is decided during dictionary creation */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + +/*! ZSTD_createDDict() : +* Create a digested dictionary, ready to start decompression operation without startup delay. +* `dict` can be released after creation */ +typedef struct ZSTD_DDict_s ZSTD_DDict; +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* In contrast with older ZSTD_decompress_usingDict(), use dictionary without significant overhead. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/*-*********************** +* Deprecated API +*************************/ /*! ZSTD_compress_usingDict() : +* *** Warning : this function will soon be declared deprecated *** * Compression using a pre-defined Dictionary content (see dictBuilder). -* Note : dict can be NULL, in which case, it's equivalent to ZSTD_compressCCtx() */ +* Note 1 : This function load the dictionary, resulting in a significant startup time. +* Note 2 : `dict` must remain valid and unmodified during compression operation. +* Note 3 : `dict` can be `NULL`, in which case, it's equivalent to ZSTD_compressCCtx() */ ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -139,9 +177,12 @@ ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, int compressionLevel); /*! ZSTD_decompress_usingDict() : +* *** Warning : this function will soon be declared deprecated *** * Decompression using a pre-defined Dictionary content (see dictBuilder). -* Dictionary must be identical to the one used during compression, otherwise regenerated data will be corrupted. -* Note : dict can be NULL, in which case, it's equivalent to ZSTD_decompressDCtx() */ +* Dictionary must be identical to the one used during compression. +* Note 1 : This function load the dictionary, resulting in a significant startup time +* Note 2 : `dict` must remain valid and unmodified during compression operation. +* Note 3 : `dict` can be `NULL`, in which case, it's equivalent to ZSTD_decompressDCtx() */ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, @@ -158,7 +199,7 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, * ==================================================================================== */ /*--- Dependency ---*/ -#include "mem.h" +#include "mem.h" /* U32 */ /*--- Constants ---*/ @@ -187,22 +228,22 @@ static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable f /*--- Types ---*/ -typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy; /* from faster to stronger */ +typedef enum { ZSTD_fast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2, ZSTD_btlazy2, ZSTD_btopt } ZSTD_strategy; /*< from faster to stronger */ typedef struct { - U32 windowLog; /* largest match distance : larger == more compression, more memory needed during decompression */ - U32 chainLog; /* fully searched segment : larger == more compression, slower, more memory (useless for fast) */ - U32 hashLog; /* dispatch table : larger == faster, more memory */ - U32 searchLog; /* nb of searches : larger == more compression, slower */ - U32 searchLength; /* match length searched : larger == faster decompression, sometimes less compression */ - U32 targetLength; /* acceptable match size for optimal parser (only) : larger == more compression, slower */ + U32 windowLog; /*< largest match distance : larger == more compression, more memory needed during decompression */ + U32 chainLog; /*< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + U32 hashLog; /*< dispatch table : larger == faster, more memory */ + U32 searchLog; /*< nb of searches : larger == more compression, slower */ + U32 searchLength; /*< match length searched : larger == faster decompression, sometimes less compression */ + U32 targetLength; /*< acceptable match size for optimal parser (only) : larger == more compression, slower */ ZSTD_strategy strategy; } ZSTD_compressionParameters; typedef struct { - U32 contentSizeFlag; /* 1: content size will be in frame header (if known). */ - U32 checksumFlag; /* 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */ - U32 noDictIDFlag; /* 1: no dict ID will be saved into frame header (if dictionary compression) */ + U32 contentSizeFlag; /*< 1: content size will be in frame header (if known). */ + U32 checksumFlag; /*< 1: will generate a 22-bits checksum at end of frame, to be used for error detection by decompressor */ + U32 noDictIDFlag; /*< 1: no dict ID will be saved into frame header (if dictionary compression) */ } ZSTD_frameParameters; typedef struct { @@ -217,15 +258,16 @@ typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; v /*-************************************* -* Advanced functions +* Advanced compression functions ***************************************/ /*! ZSTD_createCCtx_advanced() : * Create a ZSTD compression context using external alloc and free functions */ ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); -/*! ZSTD_createDCtx_advanced() : - * Create a ZSTD decompression context using external alloc and free functions */ -ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +/*! ZSTD_createCDict_advanced() : + * Create a ZSTD_CDict using external alloc and free, and customized compression parameters */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_parameters params, ZSTD_customMem customMem); ZSTDLIB_API unsigned ZSTD_maxCLevel (void); @@ -251,27 +293,11 @@ ZSTDLIB_API size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx, const void* dict,size_t dictSize, ZSTD_parameters params); -/*! ZSTD_compress_usingPreparedDCtx() : -* Same as ZSTD_compress_usingDict, but using a reference context `preparedCCtx`, where dictionary has been loaded. -* It avoids reloading the dictionary each time. -* `preparedCCtx` must have been properly initialized using ZSTD_compressBegin_usingDict() or ZSTD_compressBegin_advanced(). -* Requires 2 contexts : 1 for reference (preparedCCtx) which will not be modified, and 1 to run the compression operation (cctx) */ -ZSTDLIB_API size_t ZSTD_compress_usingPreparedCCtx( - ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); - /*- Advanced Decompression functions -*/ -/*! ZSTD_decompress_usingPreparedDCtx() : -* Same as ZSTD_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded. -* It avoids reloading the dictionary each time. -* `preparedDCtx` must have been properly initialized using ZSTD_decompressBegin_usingDict(). -* Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */ -ZSTDLIB_API size_t ZSTD_decompress_usingPreparedDCtx( - ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx, - void* dst, size_t dstCapacity, - const void* src, size_t srcSize); +/*! ZSTD_createDCtx_advanced() : + * Create a ZSTD decompression context using external alloc and free functions */ +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); /* ************************************** @@ -405,6 +431,31 @@ ZSTDLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); ZSTDLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); +/*-*********************** +* Deprecated API +*************************/ +/*! ZSTD_compress_usingPreparedCCtx() : +* *** Warning : this function will soon be deprecated ! *** +* Same as ZSTD_compress_usingDict, but using a reference context `preparedCCtx`, where dictionary has been loaded. +* It avoids reloading the dictionary each time. +* `preparedCCtx` must have been properly initialized using ZSTD_compressBegin_usingDict() or ZSTD_compressBegin_advanced(). +* Requires 2 contexts : 1 for reference (preparedCCtx) which will not be modified, and 1 to run the compression operation (cctx) */ +ZSTDLIB_API size_t ZSTD_compress_usingPreparedCCtx( + ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*! ZSTD_decompress_usingPreparedDCtx() : +* *** Warning : this function will soon be deprecated ! *** +* Same as ZSTD_decompress_usingDict, but using a reference context `preparedDCtx`, where dictionary has been loaded. +* It avoids reloading the dictionary each time. +* `preparedDCtx` must have been properly initialized using ZSTD_decompressBegin_usingDict(). +* Requires 2 contexts : 1 for reference (preparedDCtx), which will not be modified, and 1 to run the decompression operation (dctx) */ +ZSTDLIB_API size_t ZSTD_decompress_usingPreparedDCtx( + ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + #endif /* ZSTD_STATIC_LINKING_ONLY */ #if defined (__cplusplus) diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index 9c06f603c..e57f38d90 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -2385,7 +2385,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* zc, /*! ZSTD_compressBegin_advanced() : * @return : 0, or an error code */ -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize) { @@ -2393,17 +2393,17 @@ size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* zc, { size_t const errorCode = ZSTD_checkCParams_advanced(params.cParams, pledgedSrcSize); if (ZSTD_isError(errorCode)) return errorCode; } - return ZSTD_compressBegin_internal(zc, dict, dictSize, params, pledgedSrcSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize); } -size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* zc, const void* dict, size_t dictSize, int compressionLevel) +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) { ZSTD_parameters params; memset(¶ms, 0, sizeof(params)); params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); - ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", zc->base, compressionLevel); - return ZSTD_compressBegin_internal(zc, dict, dictSize, params, 0); + ZSTD_LOG_BLOCK("%p: ZSTD_compressBegin_usingDict compressionLevel=%d\n", cctx->base, compressionLevel); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0); } @@ -2532,6 +2532,81 @@ size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcS } +/* ===== Dictionary API ===== */ + +struct ZSTD_CDict_s { + void* dictContent; + size_t dictContentSize; + ZSTD_CCtx* refContext; +}; /* typedef'd tp ZSTD_CDict within zstd.h */ + +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem) +{ + if (!customMem.customAlloc && !customMem.customFree) + customMem = defaultCustomMem; + + if (!customMem.customAlloc || !customMem.customFree) + return NULL; + + { ZSTD_CDict* const cdict = (ZSTD_CDict*) customMem.customAlloc(customMem.opaque, sizeof(*cdict)); + void* const dictContent = customMem.customAlloc(customMem.opaque, dictSize); + ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem); + + if (!dictContent || !cdict || !cctx) { + customMem.customFree(customMem.opaque, dictContent); + customMem.customFree(customMem.opaque, cdict); + customMem.customFree(customMem.opaque, cctx); + return NULL; + } + + memcpy(dictContent, dict, dictSize); + { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0); + if (ZSTD_isError(errorCode)) { + customMem.customFree(customMem.opaque, dictContent); + customMem.customFree(customMem.opaque, cdict); + customMem.customFree(customMem.opaque, cctx); + return NULL; + } } + + cdict->dictContent = dictContent; + cdict->dictContentSize = dictSize; + cdict->refContext = cctx; + return cdict; + } +} + +ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + ZSTD_parameters params; + memset(¶ms, 0, sizeof(params)); + params.cParams = ZSTD_getCParams(compressionLevel, 0, dictSize); + params.fParams.contentSizeFlag = 1; + return ZSTD_createCDict_advanced(dict, dictSize, params, allocator); +} + +size_t ZSTD_freeCDict(ZSTD_CDict* cdict) +{ + ZSTD_freeFunction const cFree = cdict->refContext->customMem.customFree; + void* const opaque = cdict->refContext->customMem.opaque; + ZSTD_freeCCtx(cdict->refContext); + cFree(opaque, cdict->dictContent); + cFree(opaque, cdict); + return 0; +} + +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict) +{ + return ZSTD_compress_usingPreparedCCtx(cctx, cdict->refContext, + dst, dstCapacity, + src, srcSize); +} + + + /*-===== Pre-defined compression levels =====-*/ #define ZSTD_DEFAULT_CLEVEL 1 diff --git a/lib/decompress/zstd_decompress.c b/lib/decompress/zstd_decompress.c index 9b7b1886b..344bd0201 100644 --- a/lib/decompress/zstd_decompress.c +++ b/lib/decompress/zstd_decompress.c @@ -1267,3 +1267,77 @@ size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t return 0; } + + +struct ZSTD_DDict_s { + void* dictContent; + size_t dictContentSize; + ZSTD_DCtx* refContext; +}; /* typedef'd tp ZSTD_CDict within zstd.h */ + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, ZSTD_customMem customMem) +{ + if (!customMem.customAlloc && !customMem.customFree) + customMem = defaultCustomMem; + + if (!customMem.customAlloc || !customMem.customFree) + return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) customMem.customAlloc(customMem.opaque, sizeof(*ddict)); + void* const dictContent = customMem.customAlloc(customMem.opaque, dictSize); + ZSTD_DCtx* const dctx = ZSTD_createDCtx_advanced(customMem); + + if (!dictContent || !ddict || !dctx) { + customMem.customFree(customMem.opaque, dictContent); + customMem.customFree(customMem.opaque, ddict); + customMem.customFree(customMem.opaque, dctx); + return NULL; + } + + memcpy(dictContent, dict, dictSize); + { size_t const errorCode = ZSTD_decompressBegin_usingDict(dctx, dictContent, dictSize); + if (ZSTD_isError(errorCode)) { + customMem.customFree(customMem.opaque, dictContent); + customMem.customFree(customMem.opaque, ddict); + customMem.customFree(customMem.opaque, dctx); + return NULL; + } } + + ddict->dictContent = dictContent; + ddict->dictContentSize = dictSize; + ddict->refContext = dctx; + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, ready to start decompression operation without startup delay. +* `dict` can be released after creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, allocator); +} + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + ZSTD_freeFunction const cFree = ddict->refContext->customMem.customFree; + void* const opaque = ddict->refContext->customMem.opaque; + ZSTD_freeDCtx(ddict->refContext); + cFree(opaque, ddict->dictContent); + cFree(opaque, ddict); + return 0; +} + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* In contrast with older ZSTD_decompress_usingDict(), use dictionary without significant overhead. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ + return ZSTD_decompress_usingPreparedDCtx(dctx, ddict->refContext, + dst, dstCapacity, + src, srcSize); +}