From: Nick Terrell Date: Thu, 7 May 2020 01:51:14 +0000 (-0700) Subject: [zdict] Stabilize ZDICT_finalizeDictionary() X-Git-Tag: v1.4.5^2~40 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=45c66dd298a40e969a0fe6d410fde84fd8a96697;p=thirdparty%2Fzstd.git [zdict] Stabilize ZDICT_finalizeDictionary() --- diff --git a/lib/dictBuilder/zdict.h b/lib/dictBuilder/zdict.h index 53f58d7ac..ff2e77faf 100644 --- a/lib/dictBuilder/zdict.h +++ b/lib/dictBuilder/zdict.h @@ -61,6 +61,53 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCap const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples); +typedef struct { + int compressionLevel; /*< optimize for a specific zstd compression level; 0 means default */ + unsigned notificationLevel; /*< Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ + unsigned dictID; /*< force dictID value; 0 means auto mode (32-bits random value) */ +} ZDICT_params_t; + +/*! ZDICT_finalizeDictionary(): + * Given a custom content as a basis for dictionary, and a set of samples, + * finalize dictionary by adding headers and statistics according to the zstd + * dictionary format. + * + * Samples must be stored concatenated in a flat buffer `samplesBuffer`, + * supplied with an array of sizes `samplesSizes`, providing the size of each + * sample in order. The samples are used to construct the statistics, so they + * should be representative of what you will compress with this dictionary. + * + * The compression level can be set in `parameters`. You should pass the + * compression level you expect to use in production. The statistics for each + * compression level differ, so tuning the dictionary for the compression level + * can help quite a bit. + * + * You can set an explicit dictionary ID in `parameters`, or allow us to pick + * a random dictionary ID for you, but we can't guarantee no collisions. + * + * The dstDictBuffer and the dictContent may overlap, and the content will be + * appended to the end of the header. If the header + the content doesn't fit in + * maxDictSize the beginning of the content is truncated to make room, since it + * is presumed that the most profitable content is at the end of the dictionary, + * since that is the cheapest to reference. + * + * `dictContentSize` must be >= ZDICT_CONTENTSIZE_MIN bytes. + * `maxDictSize` must be >= max(dictContentSize, ZSTD_DICTSIZE_MIN). + * + * @return: size of dictionary stored into `dstDictBuffer` (<= `maxDictSize`), + * or an error code, which can be tested by ZDICT_isError(). + * Note: ZDICT_finalizeDictionary() will push notifications into stderr if + * instructed to, using notificationLevel>0. + * NOTE: This function currently may fail in several edge cases including: + * * Not enough samples + * * Samples are uncompressible + * * Samples are all exactly the same + */ +ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dstDictBuffer, size_t maxDictSize, + const void* dictContent, size_t dictContentSize, + const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, + ZDICT_params_t parameters); + /*====== Helper functions ======*/ ZDICTLIB_API unsigned ZDICT_getDictID(const void* dictBuffer, size_t dictSize); /**< extracts dictID; @return zero if error (not a valid dictionary) */ @@ -79,11 +126,8 @@ ZDICTLIB_API const char* ZDICT_getErrorName(size_t errorCode); * Use them only in association with static linking. * ==================================================================================== */ -typedef struct { - int compressionLevel; /* optimize for a specific zstd compression level; 0 means default */ - unsigned notificationLevel; /* Write log to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */ - unsigned dictID; /* force dictID value; 0 means auto mode (32-bits random value) */ -} ZDICT_params_t; +#define ZDICT_CONTENTSIZE_MIN 128 +#define ZDICT_DICTSIZE_MIN 256 /*! ZDICT_cover_params_t: * k and d are the only required parameters. @@ -199,28 +243,6 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, const size_t* samplesSizes, unsigned nbSamples, ZDICT_fastCover_params_t* parameters); -/*! ZDICT_finalizeDictionary(): - * Given a custom content as a basis for dictionary, and a set of samples, - * finalize dictionary by adding headers and statistics. - * - * Samples must be stored concatenated in a flat buffer `samplesBuffer`, - * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order. - * - * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes. - * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes. - * - * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`), - * or an error code, which can be tested by ZDICT_isError(). - * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0. - * Note 2: dictBuffer and dictContent can overlap - */ -#define ZDICT_CONTENTSIZE_MIN 128 -#define ZDICT_DICTSIZE_MIN 256 -ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity, - const void* dictContent, size_t dictContentSize, - const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, - ZDICT_params_t parameters); - typedef struct { unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */ ZDICT_params_t zParams;