* Specify errors at a finer granularity than `ERROR(generic)`.
* Add tests for bad parameters in the dictionary builder.
.DS_Store
googletest/
*.d
+*.vscode
* Prepare a context for dictionary building.
* The context is only dependent on the parameter `d` and can used multiple
* times.
- * Returns 1 on success or zero on error.
+ * Returns 0 on success or error code on error.
* The context must be destroyed with `COVER_ctx_destroy()`.
*/
-static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
+static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
const size_t *samplesSizes, unsigned nbSamples,
unsigned d, double splitPoint) {
const BYTE *const samples = (const BYTE *)samplesBuffer;
totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
(unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
- return 0;
+ return ERROR(srcSize_wrong);
}
/* Check if there are at least 5 training samples */
if (nbTrainSamples < 5) {
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
- return 0;
+ return ERROR(srcSize_wrong);
}
/* Check if there's testing sample */
if (nbTestSamples < 1) {
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
- return 0;
+ return ERROR(srcSize_wrong);
}
/* Zero the context */
memset(ctx, 0, sizeof(*ctx));
if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) {
DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n");
COVER_ctx_destroy(ctx);
- return 0;
+ return ERROR(memory_allocation);
}
ctx->freqs = NULL;
ctx->d = d;
(ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group);
ctx->freqs = ctx->suffix;
ctx->suffix = NULL;
- return 1;
+ return 0;
}
void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel)
/* Checks */
if (!COVER_checkParameters(parameters, dictBufferCapacity)) {
DISPLAYLEVEL(1, "Cover parameters incorrect\n");
- return ERROR(GENERIC);
+ return ERROR(parameter_outOfBound);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
- return ERROR(GENERIC);
+ return ERROR(srcSize_wrong);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
return ERROR(dstSize_tooSmall);
}
/* Initialize context and activeDmers */
- if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
- parameters.d, parameters.splitPoint)) {
- return ERROR(GENERIC);
+ {
+ size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+ parameters.d, parameters.splitPoint);
+ if (ZSTD_isError(initVal)) {
+ return initVal;
+ }
}
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel);
if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n");
COVER_ctx_destroy(&ctx);
- return ERROR(GENERIC);
+ return ERROR(memory_allocation);
}
DISPLAYLEVEL(2, "Building dictionary\n");
cctx, dst, dstCapacity, samples + offsets[i],
samplesSizes[i], cdict);
if (ZSTD_isError(size)) {
- totalCompressedSize = ERROR(GENERIC);
+ totalCompressedSize = size;
goto _compressCleanup;
}
totalCompressedSize += size;
/* Checks */
if (splitPoint <= 0 || splitPoint > 1) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
- return ERROR(GENERIC);
+ return ERROR(parameter_outOfBound);
}
if (kMinK < kMaxD || kMaxK < kMinK) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
- return ERROR(GENERIC);
+ return ERROR(parameter_outOfBound);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "Cover must have at least one input file\n");
- return ERROR(GENERIC);
+ return ERROR(srcSize_wrong);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
/* Initialize the context for this value of d */
COVER_ctx_t ctx;
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
- if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
- COVER_best_destroy(&best);
- POOL_free(pool);
- return ERROR(GENERIC);
+ {
+ const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint);
+ if (ZSTD_isError(initVal)) {
+ LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+ COVER_best_destroy(&best);
+ POOL_free(pool);
+ return initVal;
+ }
}
if (!warned) {
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel);
COVER_best_destroy(&best);
COVER_ctx_destroy(&ctx);
POOL_free(pool);
- return ERROR(GENERIC);
+ return ERROR(memory_allocation);
}
data->ctx = &ctx;
data->best = &best;
* Prepare a context for dictionary building.
* The context is only dependent on the parameter `d` and can used multiple
* times.
- * Returns 1 on success or zero on error.
+ * Returns 0 on success or error code on error.
* The context must be destroyed with `FASTCOVER_ctx_destroy()`.
*/
-static int
+static size_t
FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
const void* samplesBuffer,
const size_t* samplesSizes, unsigned nbSamples,
totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
(unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
- return 0;
+ return ERROR(srcSize_wrong);
}
/* Check if there are at least 5 training samples */
if (nbTrainSamples < 5) {
DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
- return 0;
+ return ERROR(srcSize_wrong);
}
/* Check if there's testing sample */
if (nbTestSamples < 1) {
DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
- return 0;
+ return ERROR(srcSize_wrong);
}
/* Zero the context */
if (ctx->offsets == NULL) {
DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
FASTCOVER_ctx_destroy(ctx);
- return 0;
+ return ERROR(memory_allocation);
}
/* Fill offsets from the samplesSizes */
if (ctx->freqs == NULL) {
DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
FASTCOVER_ctx_destroy(ctx);
- return 0;
+ return ERROR(memory_allocation);
}
DISPLAYLEVEL(2, "Computing frequencies\n");
FASTCOVER_computeFrequency(ctx->freqs, ctx);
- return 1;
+ return 0;
}
if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
parameters.accel)) {
DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
- return ERROR(GENERIC);
+ return ERROR(parameter_outOfBound);
}
if (nbSamples == 0) {
DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
- return ERROR(GENERIC);
+ return ERROR(srcSize_wrong);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
/* Assign corresponding FASTCOVER_accel_t to accelParams*/
accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
/* Initialize context */
- if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
+ {
+ size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
coverParams.d, parameters.splitPoint, parameters.f,
- accelParams)) {
- DISPLAYLEVEL(1, "Failed to initialize context\n");
- return ERROR(GENERIC);
+ accelParams);
+ if (ZSTD_isError(initVal)) {
+ DISPLAYLEVEL(1, "Failed to initialize context\n");
+ return initVal;
+ }
}
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel);
/* Build the dictionary */
/* Checks */
if (splitPoint <= 0 || splitPoint > 1) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
- return ERROR(GENERIC);
+ return ERROR(parameter_outOfBound);
}
if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
- return ERROR(GENERIC);
+ return ERROR(parameter_outOfBound);
}
if (kMinK < kMaxD || kMaxK < kMinK) {
LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
- return ERROR(GENERIC);
+ return ERROR(parameter_outOfBound);
}
if (nbSamples == 0) {
LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
- return ERROR(GENERIC);
+ return ERROR(srcSize_wrong);
}
if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
/* Initialize the context for this value of d */
FASTCOVER_ctx_t ctx;
LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
- if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) {
- LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
- COVER_best_destroy(&best);
- POOL_free(pool);
- return ERROR(GENERIC);
+ {
+ size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams);
+ if (ZSTD_isError(initVal)) {
+ LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
+ COVER_best_destroy(&best);
+ POOL_free(pool);
+ return initVal;
+ }
}
if (!warned) {
COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel);
COVER_best_destroy(&best);
FASTCOVER_ctx_destroy(&ctx);
POOL_free(pool);
- return ERROR(GENERIC);
+ return ERROR(memory_allocation);
}
data->ctx = &ctx;
data->best = &best;
/* analyze, build stats, starting with literals */
{ size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog);
if (HUF_isError(maxNbBits)) {
- eSize = ERROR(GENERIC);
+ eSize = maxNbBits;
DISPLAYLEVEL(1, " HUF_buildCTable error \n");
goto _cleanup;
}
total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u];
errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax);
if (FSE_isError(errorCode)) {
- eSize = ERROR(GENERIC);
+ eSize = errorCode;
DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n");
goto _cleanup;
}
total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u];
errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML);
if (FSE_isError(errorCode)) {
- eSize = ERROR(GENERIC);
+ eSize = errorCode;
DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n");
goto _cleanup;
}
total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u];
errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL);
if (FSE_isError(errorCode)) {
- eSize = ERROR(GENERIC);
+ eSize = errorCode;
DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n");
goto _cleanup;
}
/* write result to buffer */
{ size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog);
if (HUF_isError(hhSize)) {
- eSize = ERROR(GENERIC);
+ eSize = hhSize;
DISPLAYLEVEL(1, "HUF_writeCTable error \n");
goto _cleanup;
}
{ size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog);
if (FSE_isError(ohSize)) {
- eSize = ERROR(GENERIC);
+ eSize = ohSize;
DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n");
goto _cleanup;
}
{ size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog);
if (FSE_isError(mhSize)) {
- eSize = ERROR(GENERIC);
+ eSize = mhSize;
DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n");
goto _cleanup;
}
{ size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog);
if (FSE_isError(lhSize)) {
- eSize = ERROR(GENERIC);
+ eSize = lhSize;
DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n");
goto _cleanup;
}
}
if (maxDstSize<12) {
- eSize = ERROR(GENERIC);
+ eSize = ERROR(dstSize_tooSmall);
DISPLAYLEVEL(1, "not enough space to write RepOffsets \n");
goto _cleanup;
}
println "\n===> fastCover dictionary builder : advanced options "
-
TESTFILE="$PRGDIR"/zstdcli.c
./datagen > tmpDict
println "- Create first dictionary"
println "- Create second (different) dictionary"
$ZSTD --train-fastcover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
+$ZSTD --train-fastcover=k=56,d=8 && die "Create dictionary without input file"
println "- Create dictionary with short dictID"
$ZSTD --train-fastcover=k=46,d=8,f=15,split=80 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpDict1
cmp tmpDict tmpDict1 && die "dictionaries should have different ID !"
$ZSTD --train-fastcover=split=100 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
println "- Create dictionary using f=16"
$ZSTD --train-fastcover=f=16 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
+$ZSTD --train-fastcover=accel=15 -r "$TESTDIR"/*.c "$PRGDIR"/*.c && die "Created dictionary using accel=15"
println "- Create dictionary using accel=2"
$ZSTD --train-fastcover=accel=2 -r "$TESTDIR"/*.c "$PRGDIR"/*.c
println "- Create dictionary using accel=10"
$ZSTD -f tmp -D tmpDict
$ZSTD -d tmp.zst -D tmpDict -fo result
$DIFF "$TESTFILE" result
+$ZSTD --train-legacy=s=8 && die "Create dictionary without input files (should error)"
println "- Create second (different) dictionary"
$ZSTD --train-legacy=s=5 "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"
$ZSTD -f tmp -D tmpDict
$ZSTD -d tmp.zst -D tmpDict -fo result
$DIFF "$TESTFILE" result
+$ZSTD --train-cover=k=56,d=8 && die "Create dictionary without input file (should error)"
println "- Create second (different) dictionary"
$ZSTD --train-cover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC
$ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"