/*-**************************************
* Tuning parameters
****************************************/
+#define MINRATIO 4 /* minimum nb of apparition to be selected in dictionary */
#define ZDICT_MAX_SAMPLES_SIZE (2000U << 20)
-#define ZDICT_MIN_SAMPLES_SIZE 512
+#define ZDICT_MIN_SAMPLES_SIZE (ZDICT_CONTENTSIZE_MIN * MINRATIO)
/*-**************************************
#define NOISELENGTH 32
-#define MINRATIO 4
static const int g_compressionLevel_default = 6;
static const U32 g_selectivity_default = 9;
-static const size_t g_provision_entropySize = 192;
-static const size_t g_min_fast_dictContent = 192;
-static const size_t g_dictContentSize_min = 32;
/*-*************************************
/* checks */
if (!dictList) return ERROR(memory_allocation);
- if (maxDictSize <= g_provision_entropySize + g_min_fast_dictContent) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */
+ if (maxDictSize < ZDICT_DICTSIZE_MIN) { free(dictList); return ERROR(dstSize_tooSmall); } /* requested dictionary size is too small */
if (samplesBuffSize < ZDICT_MIN_SAMPLES_SIZE) { free(dictList); return ERROR(dictionaryCreation_failed); } /* not enough source to create dictionary */
/* init */
/* create dictionary */
{ U32 dictContentSize = ZDICT_dictSize(dictList);
- if (dictContentSize < g_dictContentSize_min) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
- if (dictContentSize < targetDictSize/3) {
+ if (dictContentSize < ZDICT_CONTENTSIZE_MIN) { free(dictList); return ERROR(dictionaryCreation_failed); } /* dictionary content too small */
+ if (dictContentSize < targetDictSize/4) {
DISPLAYLEVEL(2, "! warning : selected content significantly smaller than requested (%u < %u) \n", dictContentSize, (U32)maxDictSize);
+ if (samplesBuffSize < 10 * targetDictSize)
+ DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
if (minRep > MINRATIO) {
DISPLAYLEVEL(2, "! consider increasing selectivity to produce larger dictionary (-s%u) \n", selectivity+1);
DISPLAYLEVEL(2, "! note : larger dictionaries are not necessarily better, test its efficiency on samples \n");
}
- if (samplesBuffSize < 10 * targetDictSize)
- DISPLAYLEVEL(2, "! consider increasing the number of samples (total size : %u MB)\n", (U32)(samplesBuffSize>>20));
}
if ((dictContentSize > targetDictSize*3) && (nbSamples > 2*MINRATIO) && (selectivity>1)) {
while ((nbSamples >> proposedSelectivity) <= MINRATIO) { proposedSelectivity--; }
DISPLAYLEVEL(2, "! note : calculated dictionary significantly larger than requested (%u > %u) \n", dictContentSize, (U32)maxDictSize);
DISPLAYLEVEL(2, "! consider increasing dictionary size, or produce denser dictionary (-s%u) \n", proposedSelectivity);
- DISPLAYLEVEL(2, "! always test dictionary efficiency on samples \n");
+ DISPLAYLEVEL(2, "! always test dictionary efficiency on real samples \n");
}
/* limit dictionary size */
Samples must be stored concatenated in a flat buffer `samplesBuffer`,
supplied with an array of sizes `samplesSizes`, providing the size of each sample in order.
- dictContentSize must be > ZDICT_CONTENTSIZE_MIN bytes.
- maxDictSize must be >= dictContentSize, and must be > ZDICT_DICTSIZE_MIN bytes.
+ dictContentSize must be >= ZDICT_CONTENTSIZE_MIN bytes.
+ maxDictSize must be >= dictContentSize, and must be >= ZDICT_DICTSIZE_MIN bytes.
@return : size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`),
or an error code, which can be tested by ZDICT_isError().
note : ZDICT_finalizeDictionary() will push notifications into stderr if instructed to, using notificationLevel>0.
- note 2 : dictBuffer and customDictContent can overlap
+ note 2 : dictBuffer and dictContent can overlap
*/
-#define ZDICT_CONTENTSIZE_MIN 256
-#define ZDICT_DICTSIZE_MIN 512
+#define ZDICT_CONTENTSIZE_MIN 128
+#define ZDICT_DICTSIZE_MIN 256
ZDICTLIB_API size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
- const void* customDictContent, size_t dictContentSize,
+ const void* dictContent, size_t dictContentSize,
const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
ZDICT_params_t parameters);