From: Tyler-Tran <39778355+Tyler-Tran@users.noreply.github.com> Date: Thu, 23 May 2019 01:57:50 +0000 (-0700) Subject: [dictBuilder] Be more specific than ERROR(generic) (#1616) X-Git-Tag: v1.4.1^2~41 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=cb47871a0a1d5e5bae3e87b2bb6200d859e82c6b;p=thirdparty%2Fzstd.git [dictBuilder] Be more specific than ERROR(generic) (#1616) * Specify errors at a finer granularity than `ERROR(generic)`. * Add tests for bad parameters in the dictionary builder. --- diff --git a/.gitignore b/.gitignore index d28a51291..abdf3b633 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,4 @@ _zstdbench/ .DS_Store googletest/ *.d +*.vscode diff --git a/lib/dictBuilder/cover.c b/lib/dictBuilder/cover.c index 21464ad03..961e1cb9d 100644 --- a/lib/dictBuilder/cover.c +++ b/lib/dictBuilder/cover.c @@ -526,10 +526,10 @@ static void COVER_ctx_destroy(COVER_ctx_t *ctx) { * Prepare a context for dictionary building. * The context is only dependent on the parameter `d` and can used multiple * times. - * Returns 1 on success or zero on error. + * Returns 0 on success or error code on error. * The context must be destroyed with `COVER_ctx_destroy()`. */ -static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, +static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, const size_t *samplesSizes, unsigned nbSamples, unsigned d, double splitPoint) { const BYTE *const samples = (const BYTE *)samplesBuffer; @@ -544,17 +544,17 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); - return 0; + return ERROR(srcSize_wrong); } /* Check if there are at least 5 training samples */ if (nbTrainSamples < 5) { DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); - return 0; + return ERROR(srcSize_wrong); } /* Check if there's testing sample */ if (nbTestSamples < 1) { DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); - return 0; + return ERROR(srcSize_wrong); } /* Zero the context */ memset(ctx, 0, sizeof(*ctx)); @@ -577,7 +577,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); COVER_ctx_destroy(ctx); - return 0; + return ERROR(memory_allocation); } ctx->freqs = NULL; ctx->d = d; @@ -624,7 +624,7 @@ static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); ctx->freqs = ctx->suffix; ctx->suffix = NULL; - return 1; + return 0; } void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) @@ -729,11 +729,11 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( /* Checks */ if (!COVER_checkParameters(parameters, dictBufferCapacity)) { DISPLAYLEVEL(1, "Cover parameters incorrect\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (nbSamples == 0) { DISPLAYLEVEL(1, "Cover must have at least one input file\n"); - return ERROR(GENERIC); + return ERROR(srcSize_wrong); } if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", @@ -741,15 +741,18 @@ ZDICTLIB_API size_t ZDICT_trainFromBuffer_cover( return ERROR(dstSize_tooSmall); } /* Initialize context and activeDmers */ - if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, - parameters.d, parameters.splitPoint)) { - return ERROR(GENERIC); + { + size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + parameters.d, parameters.splitPoint); + if (ZSTD_isError(initVal)) { + return initVal; + } } COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); COVER_ctx_destroy(&ctx); - return ERROR(GENERIC); + return ERROR(memory_allocation); } DISPLAYLEVEL(2, "Building dictionary\n"); @@ -810,7 +813,7 @@ size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, cctx, dst, dstCapacity, samples + offsets[i], samplesSizes[i], cdict); if (ZSTD_isError(size)) { - totalCompressedSize = ERROR(GENERIC); + totalCompressedSize = size; goto _compressCleanup; } totalCompressedSize += size; @@ -1022,15 +1025,15 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( /* Checks */ if (splitPoint <= 0 || splitPoint > 1) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (kMinK < kMaxD || kMaxK < kMinK) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (nbSamples == 0) { DISPLAYLEVEL(1, "Cover must have at least one input file\n"); - return ERROR(GENERIC); + return ERROR(srcSize_wrong); } if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", @@ -1054,11 +1057,14 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( /* Initialize the context for this value of d */ COVER_ctx_t ctx; LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); - if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) { - LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); - COVER_best_destroy(&best); - POOL_free(pool); - return ERROR(GENERIC); + { + const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } } if (!warned) { COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); @@ -1075,7 +1081,7 @@ ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( COVER_best_destroy(&best); COVER_ctx_destroy(&ctx); POOL_free(pool); - return ERROR(GENERIC); + return ERROR(memory_allocation); } data->ctx = &ctx; data->best = &best; diff --git a/lib/dictBuilder/fastcover.c b/lib/dictBuilder/fastcover.c index 5b6b941a9..40131e693 100644 --- a/lib/dictBuilder/fastcover.c +++ b/lib/dictBuilder/fastcover.c @@ -287,10 +287,10 @@ FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx) * Prepare a context for dictionary building. * The context is only dependent on the parameter `d` and can used multiple * times. - * Returns 1 on success or zero on error. + * Returns 0 on success or error code on error. * The context must be destroyed with `FASTCOVER_ctx_destroy()`. */ -static int +static size_t FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples, @@ -310,19 +310,19 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); - return 0; + return ERROR(srcSize_wrong); } /* Check if there are at least 5 training samples */ if (nbTrainSamples < 5) { DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples); - return 0; + return ERROR(srcSize_wrong); } /* Check if there's testing sample */ if (nbTestSamples < 1) { DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples); - return 0; + return ERROR(srcSize_wrong); } /* Zero the context */ @@ -347,7 +347,7 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, if (ctx->offsets == NULL) { DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n"); FASTCOVER_ctx_destroy(ctx); - return 0; + return ERROR(memory_allocation); } /* Fill offsets from the samplesSizes */ @@ -364,13 +364,13 @@ FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, if (ctx->freqs == NULL) { DISPLAYLEVEL(1, "Failed to allocate frequency table \n"); FASTCOVER_ctx_destroy(ctx); - return 0; + return ERROR(memory_allocation); } DISPLAYLEVEL(2, "Computing frequencies\n"); FASTCOVER_computeFrequency(ctx->freqs, ctx); - return 1; + return 0; } @@ -550,11 +550,11 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, parameters.accel)) { DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (nbSamples == 0) { DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); - return ERROR(GENERIC); + return ERROR(srcSize_wrong); } if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", @@ -564,11 +564,14 @@ ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity, /* Assign corresponding FASTCOVER_accel_t to accelParams*/ accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; /* Initialize context */ - if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, coverParams.d, parameters.splitPoint, parameters.f, - accelParams)) { - DISPLAYLEVEL(1, "Failed to initialize context\n"); - return ERROR(GENERIC); + accelParams); + if (ZSTD_isError(initVal)) { + DISPLAYLEVEL(1, "Failed to initialize context\n"); + return initVal; + } } COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); /* Build the dictionary */ @@ -627,19 +630,19 @@ ZDICT_optimizeTrainFromBuffer_fastCover( /* Checks */ if (splitPoint <= 0 || splitPoint > 1) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (kMinK < kMaxD || kMaxK < kMinK) { LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); - return ERROR(GENERIC); + return ERROR(parameter_outOfBound); } if (nbSamples == 0) { LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n"); - return ERROR(GENERIC); + return ERROR(srcSize_wrong); } if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n", @@ -666,11 +669,14 @@ ZDICT_optimizeTrainFromBuffer_fastCover( /* Initialize the context for this value of d */ FASTCOVER_ctx_t ctx; LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); - if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) { - LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); - COVER_best_destroy(&best); - POOL_free(pool); - return ERROR(GENERIC); + { + size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams); + if (ZSTD_isError(initVal)) { + LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); + COVER_best_destroy(&best); + POOL_free(pool); + return initVal; + } } if (!warned) { COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); @@ -687,7 +693,7 @@ ZDICT_optimizeTrainFromBuffer_fastCover( COVER_best_destroy(&best); FASTCOVER_ctx_destroy(&ctx); POOL_free(pool); - return ERROR(GENERIC); + return ERROR(memory_allocation); } data->ctx = &ctx; data->best = &best; diff --git a/lib/dictBuilder/zdict.c b/lib/dictBuilder/zdict.c index c753da0db..ee21ee1a9 100644 --- a/lib/dictBuilder/zdict.c +++ b/lib/dictBuilder/zdict.c @@ -741,7 +741,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, /* analyze, build stats, starting with literals */ { size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog); if (HUF_isError(maxNbBits)) { - eSize = ERROR(GENERIC); + eSize = maxNbBits; DISPLAYLEVEL(1, " HUF_buildCTable error \n"); goto _cleanup; } @@ -764,7 +764,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u]; errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax); if (FSE_isError(errorCode)) { - eSize = ERROR(GENERIC); + eSize = errorCode; DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n"); goto _cleanup; } @@ -773,7 +773,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u]; errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML); if (FSE_isError(errorCode)) { - eSize = ERROR(GENERIC); + eSize = errorCode; DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n"); goto _cleanup; } @@ -782,7 +782,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u]; errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL); if (FSE_isError(errorCode)) { - eSize = ERROR(GENERIC); + eSize = errorCode; DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n"); goto _cleanup; } @@ -791,7 +791,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, /* write result to buffer */ { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog); if (HUF_isError(hhSize)) { - eSize = ERROR(GENERIC); + eSize = hhSize; DISPLAYLEVEL(1, "HUF_writeCTable error \n"); goto _cleanup; } @@ -802,7 +802,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, { size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog); if (FSE_isError(ohSize)) { - eSize = ERROR(GENERIC); + eSize = ohSize; DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n"); goto _cleanup; } @@ -813,7 +813,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, { size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog); if (FSE_isError(mhSize)) { - eSize = ERROR(GENERIC); + eSize = mhSize; DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n"); goto _cleanup; } @@ -824,7 +824,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, { size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog); if (FSE_isError(lhSize)) { - eSize = ERROR(GENERIC); + eSize = lhSize; DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n"); goto _cleanup; } @@ -834,7 +834,7 @@ static size_t ZDICT_analyzeEntropy(void* dstBuffer, size_t maxDstSize, } if (maxDstSize<12) { - eSize = ERROR(GENERIC); + eSize = ERROR(dstSize_tooSmall); DISPLAYLEVEL(1, "not enough space to write RepOffsets \n"); goto _cleanup; } diff --git a/tests/playTests.sh b/tests/playTests.sh index afc21b7b9..df13a71f9 100755 --- a/tests/playTests.sh +++ b/tests/playTests.sh @@ -484,7 +484,6 @@ rm tmp* dictionary println "\n===> fastCover dictionary builder : advanced options " - TESTFILE="$PRGDIR"/zstdcli.c ./datagen > tmpDict println "- Create first dictionary" @@ -496,6 +495,7 @@ $DIFF "$TESTFILE" result println "- Create second (different) dictionary" $ZSTD --train-fastcover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC $ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!" +$ZSTD --train-fastcover=k=56,d=8 && die "Create dictionary without input file" println "- Create dictionary with short dictID" $ZSTD --train-fastcover=k=46,d=8,f=15,split=80 "$TESTDIR"/*.c "$PRGDIR"/*.c --dictID=1 -o tmpDict1 cmp tmpDict tmpDict1 && die "dictionaries should have different ID !" @@ -508,6 +508,7 @@ println "- Create dictionary using all samples for both training and testing" $ZSTD --train-fastcover=split=100 -r "$TESTDIR"/*.c "$PRGDIR"/*.c println "- Create dictionary using f=16" $ZSTD --train-fastcover=f=16 -r "$TESTDIR"/*.c "$PRGDIR"/*.c +$ZSTD --train-fastcover=accel=15 -r "$TESTDIR"/*.c "$PRGDIR"/*.c && die "Created dictionary using accel=15" println "- Create dictionary using accel=2" $ZSTD --train-fastcover=accel=2 -r "$TESTDIR"/*.c "$PRGDIR"/*.c println "- Create dictionary using accel=10" @@ -533,6 +534,7 @@ cp "$TESTFILE" tmp $ZSTD -f tmp -D tmpDict $ZSTD -d tmp.zst -D tmpDict -fo result $DIFF "$TESTFILE" result +$ZSTD --train-legacy=s=8 && die "Create dictionary without input files (should error)" println "- Create second (different) dictionary" $ZSTD --train-legacy=s=5 "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC $ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!" @@ -980,6 +982,7 @@ cp "$TESTFILE" tmp $ZSTD -f tmp -D tmpDict $ZSTD -d tmp.zst -D tmpDict -fo result $DIFF "$TESTFILE" result +$ZSTD --train-cover=k=56,d=8 && die "Create dictionary without input file (should error)" println "- Create second (different) dictionary" $ZSTD --train-cover=k=56,d=8 "$TESTDIR"/*.c "$PRGDIR"/*.c "$PRGDIR"/*.h -o tmpDictC $ZSTD -d tmp.zst -D tmpDictC -fo result && die "wrong dictionary not detected!"