*/
static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs,
COVER_map_t *activeDmers, U32 begin,
- U32 end, COVER_params_t parameters) {
+ U32 end,
+ ZDICT_cover_params_t parameters) {
/* Constants */
const U32 k = parameters.k;
const U32 d = parameters.d;
* Check the validity of the parameters.
* Returns non-zero if the parameters are valid and 0 otherwise.
*/
-static int COVER_checkParameters(COVER_params_t parameters) {
+static int COVER_checkParameters(ZDICT_cover_params_t parameters) {
/* k and d are required parameters */
if (parameters.d == 0 || parameters.k == 0) {
return 0;
static size_t COVER_buildDictionary(const COVER_ctx_t *ctx, U32 *freqs,
COVER_map_t *activeDmers, void *dictBuffer,
size_t dictBufferCapacity,
- COVER_params_t parameters) {
+ ZDICT_cover_params_t parameters) {
BYTE *const dict = (BYTE *)dictBuffer;
size_t tail = dictBufferCapacity;
/* Divide the data up into epochs of equal size.
return tail;
}
-/**
- * Translate from COVER_params_t to ZDICT_params_t required for finalizing the
- * dictionary.
- */
-static ZDICT_params_t COVER_translateParams(COVER_params_t parameters) {
- ZDICT_params_t zdictParams;
- memset(&zdictParams, 0, sizeof(zdictParams));
- zdictParams.notificationLevel = 1;
- zdictParams.dictID = parameters.dictID;
- zdictParams.compressionLevel = parameters.compressionLevel;
- return zdictParams;
-}
-
-ZDICTLIB_API size_t COVER_trainFromBuffer(
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
- const size_t *samplesSizes, unsigned nbSamples, COVER_params_t parameters) {
+ const size_t *samplesSizes, unsigned nbSamples,
+ ZDICT_cover_params_t parameters) {
BYTE *const dict = (BYTE *)dictBuffer;
COVER_ctx_t ctx;
COVER_map_t activeDmers;
return ERROR(dstSize_tooSmall);
}
/* Initialize global data */
- g_displayLevel = parameters.notificationLevel;
+ g_displayLevel = parameters.zParams.notificationLevel;
/* Initialize context and activeDmers */
if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
parameters.d)) {
const size_t tail =
COVER_buildDictionary(&ctx, ctx.freqs, &activeDmers, dictBuffer,
dictBufferCapacity, parameters);
- ZDICT_params_t zdictParams = COVER_translateParams(parameters);
const size_t dictionarySize = ZDICT_finalizeDictionary(
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
- samplesBuffer, samplesSizes, nbSamples, zdictParams);
+ samplesBuffer, samplesSizes, nbSamples, parameters.zParams);
if (!ZSTD_isError(dictionarySize)) {
DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
(U32)dictionarySize);
size_t liveJobs;
void *dict;
size_t dictSize;
- COVER_params_t parameters;
+ ZDICT_cover_params_t parameters;
size_t compressedSize;
} COVER_best_t;
* If this dictionary is the best so far save it and its parameters.
*/
static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
- COVER_params_t parameters, void *dict,
+ ZDICT_cover_params_t parameters, void *dict,
size_t dictSize) {
if (!best) {
return;
const COVER_ctx_t *ctx;
COVER_best_t *best;
size_t dictBufferCapacity;
- COVER_params_t parameters;
+ ZDICT_cover_params_t parameters;
} COVER_tryParameters_data_t;
/**
/* Save parameters as local variables */
COVER_tryParameters_data_t *const data = (COVER_tryParameters_data_t *)opaque;
const COVER_ctx_t *const ctx = data->ctx;
- const COVER_params_t parameters = data->parameters;
+ const ZDICT_cover_params_t parameters = data->parameters;
size_t dictBufferCapacity = data->dictBufferCapacity;
size_t totalCompressedSize = ERROR(GENERIC);
/* Allocate space for hash table, dict, and freqs */
{
const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict,
dictBufferCapacity, parameters);
- const ZDICT_params_t zdictParams = COVER_translateParams(parameters);
dictBufferCapacity = ZDICT_finalizeDictionary(
dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
- ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples, zdictParams);
+ ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
+ parameters.zParams);
if (ZDICT_isError(dictBufferCapacity)) {
DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
goto _cleanup;
}
/* Create the cctx and cdict */
cctx = ZSTD_createCCtx();
- cdict =
- ZSTD_createCDict(dict, dictBufferCapacity, parameters.compressionLevel);
+ cdict = ZSTD_createCDict(dict, dictBufferCapacity,
+ parameters.zParams.compressionLevel);
if (!dst || !cctx || !cdict) {
goto _compressCleanup;
}
}
}
-ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void *dictBuffer,
- size_t dictBufferCapacity,
- const void *samplesBuffer,
- const size_t *samplesSizes,
- unsigned nbSamples,
- COVER_params_t *parameters) {
+ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
+ void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+ const size_t *samplesSizes, unsigned nbSamples,
+ ZDICT_cover_params_t *parameters) {
/* constants */
const unsigned nbThreads = parameters->nbThreads;
const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
const unsigned kIterations =
(1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
/* Local variables */
- const int displayLevel = parameters->notificationLevel;
+ const int displayLevel = parameters->zParams.notificationLevel;
unsigned iteration = 1;
unsigned d;
unsigned k;
/* Initialization */
COVER_best_init(&best);
/* Turn down global display level to clean up display at level 2 and below */
- g_displayLevel = parameters->notificationLevel - 1;
+ g_displayLevel = parameters->zParams.notificationLevel - 1;
/* Loop through d first because each new value needs a new context */
LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
kIterations);
}
-static size_t ZDICT_trainBuffer(dictItem* dictList, U32 dictListSize,
+static size_t ZDICT_trainBuffer_legacy(dictItem* dictList, U32 dictListSize,
const void* const buffer, size_t bufferSize, /* buffer must end with noisy guard band */
const size_t* fileSizes, unsigned nbFiles,
U32 minRatio, U32 notificationLevel)
} } }
}
-/*
-static size_t ZDICT_maxSampleSize(const size_t* fileSizes, unsigned nbFiles)
-{
- unsigned u;
- size_t max=0;
- for (u=0; u<nbFiles; u++)
- if (max < fileSizes[u]) max = fileSizes[u];
- return max;
-}
-*/
-
static size_t ZDICT_totalSampleSize(const size_t* fileSizes, unsigned nbFiles)
{
size_t total=0;
}
-/*! ZDICT_trainFromBuffer_unsafe() :
+/*! ZDICT_trainFromBuffer_unsafe_legacy() :
* Warning : `samplesBuffer` must be followed by noisy guard band.
* @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
*/
-size_t ZDICT_trainFromBuffer_unsafe(
+size_t ZDICT_trainFromBuffer_unsafe_legacy(
void* dictBuffer, size_t maxDictSize,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
- ZDICT_params_t params)
+ ZDICT_legacy_params_t params)
{
U32 const dictListSize = MAX(MAX(DICTLISTSIZE_DEFAULT, nbSamples), (U32)(maxDictSize/16));
dictItem* const dictList = (dictItem*)malloc(dictListSize * sizeof(*dictList));
size_t const targetDictSize = maxDictSize;
size_t const samplesBuffSize = ZDICT_totalSampleSize(samplesSizes, nbSamples);
size_t dictSize = 0;
- U32 const notificationLevel = params.notificationLevel;
+ U32 const notificationLevel = params.zParams.notificationLevel;
/* checks */
if (!dictList) return ERROR(memory_allocation);
ZDICT_initDictItem(dictList);
/* build dictionary */
- ZDICT_trainBuffer(dictList, dictListSize,
- samplesBuffer, samplesBuffSize,
- samplesSizes, nbSamples,
- minRep, notificationLevel);
+ ZDICT_trainBuffer_legacy(dictList, dictListSize,
+ samplesBuffer, samplesBuffSize,
+ samplesSizes, nbSamples,
+ minRep, notificationLevel);
/* display best matches */
- if (params.notificationLevel>= 3) {
+ if (params.zParams.notificationLevel>= 3) {
U32 const nb = MIN(25, dictList[0].pos);
U32 const dictContentSize = ZDICT_dictSize(dictList);
U32 u;
dictSize = ZDICT_addEntropyTablesFromBuffer_advanced(dictBuffer, dictContentSize, maxDictSize,
samplesBuffer, samplesSizes, nbSamples,
- params);
+ params.zParams);
}
/* clean up */
/* issue : samplesBuffer need to be followed by a noisy guard band.
* work around : duplicate the buffer, and add the noise */
-size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
- ZDICT_params_t params)
+size_t ZDICT_trainFromBuffer_legacy(void* dictBuffer, size_t dictBufferCapacity,
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+ ZDICT_legacy_params_t params)
{
size_t result;
void* newBuff;
memcpy(newBuff, samplesBuffer, sBuffSize);
ZDICT_fillNoise((char*)newBuff + sBuffSize, NOISELENGTH); /* guard band, for end of buffer condition */
- result = ZDICT_trainFromBuffer_unsafe(
- dictBuffer, dictBufferCapacity,
- newBuff, samplesSizes, nbSamples,
- params);
+ result =
+ ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, dictBufferCapacity, newBuff,
+ samplesSizes, nbSamples, params);
free(newBuff);
return result;
}
size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
{
- ZDICT_params_t params;
+ ZDICT_cover_params_t params;
memset(¶ms, 0, sizeof(params));
- return ZDICT_trainFromBuffer_advanced(dictBuffer, dictBufferCapacity,
- samplesBuffer, samplesSizes, nbSamples,
- params);
+ params.d = 8;
+ params.steps = 4;
+ return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
+ samplesBuffer, samplesSizes,
+ nbSamples, ¶ms);
}
size_t ZDICT_addEntropyTablesFromBuffer(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
#endif
-/*! ZDICT_trainFromBuffer() :
- Train a dictionary from an array of samples.
- Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- The resulting dictionary will be saved into `dictBuffer`.
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- or an error code, which can be tested with ZDICT_isError().
- Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
- It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
- In general, it's recommended to provide a few thousands samples, but this can vary a lot.
- It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
-*/
+/*! ZDICT_trainFromBuffer():
+ * Train a dictionary from an array of samples.
+ * Uses ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ * The resulting dictionary will be saved into `dictBuffer`.
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ * or an error code, which can be tested with ZDICT_isError().
+ * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte.
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
+ * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+ * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ */
ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
* ==================================================================================== */
typedef struct {
- unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
int compressionLevel; /* 0 means default; target a specific zstd compression level */
unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
- unsigned reserved[2]; /* reserved space for future parameters */
} ZDICT_params_t;
-
-/*! ZDICT_trainFromBuffer_advanced() :
- Same as ZDICT_trainFromBuffer() with control over more parameters.
- `parameters` is optional and can be provided with values set to 0 to mean "default".
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferSize`),
- or an error code, which can be tested by ZDICT_isError().
- note : ZDICT_trainFromBuffer_advanced() will send notifications into stderr if instructed to, using notificationLevel>0.
-*/
-ZDICTLIB_API size_t ZDICT_trainFromBuffer_advanced(void* dictBuffer, size_t dictBufferCapacity,
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
- ZDICT_params_t parameters);
-
-/*! COVER_params_t :
- For all values 0 means default.
- k and d are the only required parameters.
-*/
+/*! ZDICT_cover_params_t:
+ * For all values 0 means default.
+ * k and d are the only required parameters.
+ */
typedef struct {
unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
-
unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
- unsigned notificationLevel; /* Write to stderr; 0 = none (default); 1 = errors; 2 = progression; 3 = details; 4 = debug; */
- unsigned dictID; /* 0 means auto mode (32-bits random value); other : force dictID value */
- int compressionLevel; /* 0 means default; target a specific zstd compression level */
-} COVER_params_t;
-
-
-/*! COVER_trainFromBuffer() :
- Train a dictionary from an array of samples using the COVER algorithm.
- Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
- supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
- The resulting dictionary will be saved into `dictBuffer`.
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- or an error code, which can be tested with ZDICT_isError().
- Note : COVER_trainFromBuffer() requires about 9 bytes of memory for each input byte.
- Tips : In general, a reasonable dictionary has a size of ~ 100 KB.
- It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
- In general, it's recommended to provide a few thousands samples, but this can vary a lot.
- It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
-*/
-ZDICTLIB_API size_t COVER_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
- COVER_params_t parameters);
-
-/*! COVER_optimizeTrainFromBuffer() :
- The same requirements as above hold for all the parameters except `parameters`.
- This function tries many parameter combinations and picks the best parameters.
- `*parameters` is filled with the best parameters found, and the dictionary
- constructed with those parameters is stored in `dictBuffer`.
-
- All of the parameters d, k, steps are optional.
- If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
- if steps is zero it defaults to its default value.
- If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
-
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
- or an error code, which can be tested with ZDICT_isError().
- On success `*parameters` contains the parameters selected.
- Note : COVER_optimizeTrainFromBuffer() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
-*/
-ZDICTLIB_API size_t COVER_optimizeTrainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
- const void* samplesBuffer, const size_t *samplesSizes, unsigned nbSamples,
- COVER_params_t *parameters);
-
-/*! ZDICT_finalizeDictionary() :
-
- Given a custom content as a basis for dictionary, and a set of samples,
- finalize dictionary by adding headers and statistics.
-
- Samples must be stored concatenated in a flat buffer `samplesBuffer`,
- supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
-
- dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
- maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
-
- @return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
- or an error code, which can be tested by ZDICT_isError().
- note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
- note 2 : dictBuffer and dictContent can overlap
-*/
+ ZDICT_params_t zParams;
+} ZDICT_cover_params_t;
+
+
+/*! ZDICT_trainFromBuffer_cover():
+ * Train a dictionary from an array of samples using the COVER algorithm.
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ * The resulting dictionary will be saved into `dictBuffer`.
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ * or an error code, which can be tested with ZDICT_isError().
+ * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte.
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
+ * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+ * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ */
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover(
+ void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+ const size_t *samplesSizes, unsigned nbSamples,
+ ZDICT_cover_params_t parameters);
+
+/*! ZDICT_optimizeTrainFromBuffer_cover():
+ * The same requirements as above hold for all the parameters except `parameters`.
+ * This function tries many parameter combinations and picks the best parameters.
+ * `*parameters` is filled with the best parameters found, and the dictionary
+ * constructed with those parameters is stored in `dictBuffer`.
+ *
+ * All of the parameters d, k, steps are optional.
+ * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
+ * if steps is zero it defaults to its default value.
+ * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
+ *
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ * or an error code, which can be tested with ZDICT_isError().
+ * On success `*parameters` contains the parameters selected.
+ * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread.
+ */
+ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover(
+ void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+ const size_t *samplesSizes, unsigned nbSamples,
+ ZDICT_cover_params_t *parameters);
+
+/*! ZDICT_finalizeDictionary():
+ * Given a custom content as a basis for dictionary, and a set of samples,
+ * finalize dictionary by adding headers and statistics.
+ *
+ * Samples must be stored concatenated in a flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
+ *
+ * dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
+ * maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
+ *
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
+ * or an error code, which can be tested by ZDICT_isError().
+ * Note: ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
+ * Note 2: dictBuffer and dictContent can overlap
+ */
#define ZDICT_CONTENTSIZE_MIN 128
#define ZDICT_DICTSIZE_MIN 256
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t parameters);
-
+typedef struct {
+ unsigned selectivityLevel; /* 0 means default; larger => select more => larger dictionary */
+ ZDICT_params_t zParams;
+} ZDICT_legacy_params_t;
+
+/*! ZDICT_trainFromBuffer_legacy():
+ * Train a dictionary from an array of samples.
+ * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
+ * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
+ * The resulting dictionary will be saved into `dictBuffer`.
+ * `parameters` is optional and can be provided with values set to 0 to mean "default".
+ * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
+ * or an error code, which can be tested with ZDICT_isError().
+ * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
+ * It's obviously possible to target smaller or larger ones, just by specifying different `dictBufferCapacity`.
+ * In general, it's recommended to provide a few thousands samples, but this can vary a lot.
+ * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
+ * Note: ZDICT_trainFromBuffer_legacy() will send notifications into stderr if instructed to, using notificationLevel>0.
+ */
+ZDICTLIB_API size_t ZDICT_trainFromBuffer_legacy(
+ void *dictBuffer, size_t dictBufferCapacity, const void *samplesBuffer,
+ const size_t *samplesSizes, unsigned nbSamples, ZDICT_legacy_params_t parameters);
/* Deprecation warnings */
/* It is generally possible to disable deprecation warnings from compiler,
}
-/*! ZDICT_trainFromBuffer_unsafe() :
+/*! ZDICT_trainFromBuffer_unsafe_legacy() :
Strictly Internal use only !!
- Same as ZDICT_trainFromBuffer_advanced(), but does not control `samplesBuffer`.
+ Same as ZDICT_trainFromBuffer_legacy(), but does not control `samplesBuffer`.
`samplesBuffer` must be followed by noisy guard band to avoid out-of-buffer reads.
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
or an error code.
*/
-size_t ZDICT_trainFromBuffer_unsafe(void* dictBuffer, size_t dictBufferCapacity,
- const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
- ZDICT_params_t parameters);
+size_t ZDICT_trainFromBuffer_unsafe_legacy(void* dictBuffer, size_t dictBufferCapacity,
+ const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
+ ZDICT_legacy_params_t parameters);
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
const char** fileNamesTable, unsigned nbFiles,
- ZDICT_params_t *params, COVER_params_t *coverParams,
+ ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams,
int optimizeCover)
{
void* const dictBuffer = malloc(maxDictSize);
int result = 0;
/* Checks */
- if (params) g_displayLevel = params->notificationLevel;
- else if (coverParams) g_displayLevel = coverParams->notificationLevel;
+ if (params) g_displayLevel = params->zParams.notificationLevel;
+ else if (coverParams) g_displayLevel = coverParams->zParams.notificationLevel;
else EXM_THROW(13, "Neither dictionary algorith selected"); /* should not happen */
if ((!fileSizes) || (!srcBuffer) || (!dictBuffer)) EXM_THROW(12, "not enough memory for DiB_trainFiles"); /* should not happen */
if (g_tooLargeSamples) {
size_t dictSize;
if (params) {
DiB_fillNoise((char*)srcBuffer + benchedSize, NOISELENGTH); /* guard band, for end of buffer condition */
- dictSize = ZDICT_trainFromBuffer_unsafe(dictBuffer, maxDictSize,
- srcBuffer, fileSizes, nbFiles,
- *params);
+ dictSize = ZDICT_trainFromBuffer_unsafe_legacy(dictBuffer, maxDictSize,
+ srcBuffer, fileSizes, nbFiles,
+ *params);
} else if (optimizeCover) {
- dictSize = COVER_optimizeTrainFromBuffer(
- dictBuffer, maxDictSize, srcBuffer, fileSizes, nbFiles,
- coverParams);
+ dictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, maxDictSize,
+ srcBuffer, fileSizes, nbFiles,
+ coverParams);
if (!ZDICT_isError(dictSize)) {
- DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps);
+ DISPLAYLEVEL(2, "k=%u\nd=%u\nsteps=%u\n", coverParams->k, coverParams->d, coverParams->steps);
}
} else {
- dictSize = COVER_trainFromBuffer(dictBuffer, maxDictSize,
- srcBuffer, fileSizes, nbFiles,
- *coverParams);
+ dictSize =
+ ZDICT_trainFromBuffer_cover(dictBuffer, maxDictSize, srcBuffer,
+ fileSizes, nbFiles, *coverParams);
}
if (ZDICT_isError(dictSize)) {
DISPLAYLEVEL(1, "dictionary training failed : %s \n", ZDICT_getErrorName(dictSize)); /* should not happen */
*/
int DiB_trainFromFiles(const char* dictFileName, unsigned maxDictSize,
const char** fileNamesTable, unsigned nbFiles,
- ZDICT_params_t *params, COVER_params_t *coverParams,
+ ZDICT_legacy_params_t *params, ZDICT_cover_params_t *coverParams,
int optimizeCover);
#endif
* @return 1 means that cover parameters were correct
* @return 0 in case of malformed parameters
*/
-static unsigned parseCoverParameters(const char* stringPtr, COVER_params_t* params)
+static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params)
{
memset(params, 0, sizeof(*params));
for (; ;) {
return 1;
}
-static COVER_params_t defaultCoverParams(void)
+static ZDICT_cover_params_t defaultCoverParams(void)
{
- COVER_params_t params;
+ ZDICT_cover_params_t params;
memset(¶ms, 0, sizeof(params));
params.d = 8;
params.steps = 4;
unsigned fileNamesNb;
#endif
#ifndef ZSTD_NODICT
- COVER_params_t coverParams = defaultCoverParams();
+ ZDICT_cover_params_t coverParams = defaultCoverParams();
int cover = 1;
#endif
/* Check if dictionary builder is selected */
if (operation==zom_train) {
#ifndef ZSTD_NODICT
+ ZDICT_params_t zParams;
+ zParams.compressionLevel = dictCLevel;
+ zParams.notificationLevel = g_displayLevel;
+ zParams.dictID = dictID;
if (cover) {
int const optimize = !coverParams.k || !coverParams.d;
coverParams.nbThreads = nbThreads;
- coverParams.compressionLevel = dictCLevel;
- coverParams.notificationLevel = g_displayLevel;
- coverParams.dictID = dictID;
+ coverParams.zParams = zParams;
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, NULL, &coverParams, optimize);
} else {
- ZDICT_params_t dictParams;
+ ZDICT_legacy_params_t dictParams;
memset(&dictParams, 0, sizeof(dictParams));
- dictParams.compressionLevel = dictCLevel;
dictParams.selectivityLevel = dictSelect;
- dictParams.notificationLevel = g_displayLevel;
- dictParams.dictID = dictID;
+ dictParams.zParams = zParams;
operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenameTable, filenameIdx, &dictParams, NULL, 0);
}
#endif
size_t const sampleUnitSize = 8 KB;
U32 const nbSamples = (U32)(totalSampleSize / sampleUnitSize);
size_t* const samplesSizes = (size_t*) malloc(nbSamples * sizeof(size_t));
- COVER_params_t params;
+ ZDICT_cover_params_t params;
U32 dictID;
if (dictBuffer==NULL || samplesSizes==NULL) {
goto _output_error;
}
- DISPLAYLEVEL(4, "test%3i : COVER_trainFromBuffer : ", testNb++);
+ DISPLAYLEVEL(4, "test%3i : ZDICT_trainFromBuffer_cover : ", testNb++);
{ U32 u; for (u=0; u<nbSamples; u++) samplesSizes[u] = sampleUnitSize; }
memset(¶ms, 0, sizeof(params));
params.d = 1 + (FUZ_rand(&seed) % 16);
params.k = params.d + (FUZ_rand(&seed) % 256);
- dictSize = COVER_trainFromBuffer(dictBuffer, dictSize,
- CNBuffer, samplesSizes, nbSamples,
- params);
+ dictSize = ZDICT_trainFromBuffer_cover(dictBuffer, dictSize,
+ CNBuffer, samplesSizes, nbSamples,
+ params);
if (ZDICT_isError(dictSize)) goto _output_error;
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)dictSize);
if (dictID==0) goto _output_error;
DISPLAYLEVEL(4, "OK : %u \n", dictID);
- DISPLAYLEVEL(4, "test%3i : COVER_optimizeTrainFromBuffer : ", testNb++);
+ DISPLAYLEVEL(4, "test%3i : ZDICT_optimizeTrainFromBuffer_cover : ", testNb++);
memset(¶ms, 0, sizeof(params));
params.steps = 4;
- optDictSize = COVER_optimizeTrainFromBuffer(dictBuffer, optDictSize,
- CNBuffer, samplesSizes, nbSamples / 4,
- ¶ms);
+ optDictSize = ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, optDictSize,
+ CNBuffer, samplesSizes,
+ nbSamples / 4, ¶ms);
if (ZDICT_isError(optDictSize)) goto _output_error;
DISPLAYLEVEL(4, "OK, created dictionary of size %u \n", (U32)optDictSize);
&ZDICT_isError,
&ZDICT_getErrorName,
/* zdict.h: advanced functions */
- &ZDICT_trainFromBuffer_advanced,
+ &ZDICT_trainFromBuffer_cover,
+ &ZDICT_optimizeTrainFromBuffer_cover,
+ &ZDICT_finalizeDictionary,
+ &ZDICT_trainFromBuffer_legacy,
&ZDICT_addEntropyTablesFromBuffer,
NULL,
};