]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
fullbench can run a verification function
authorYann Collet <yann.collet.73@gmail.com>
Tue, 7 Jan 2025 23:51:38 +0000 (15:51 -0800)
committerYann Collet <cyan@fb.com>
Thu, 16 Jan 2025 01:11:27 +0000 (17:11 -0800)
compressSequencesAndLiterals: fixed long lengths in scalar mode

lib/compress/zstd_compress.c
tests/fullbench.c

index d91fae619ad240569b1474f8741f786d7e02f6d0..c8dc86ccf9468c7becdc17557c4f2baa0cf32222 100644 (file)
@@ -7205,7 +7205,7 @@ void convertSequences_noRepcodes(
          */
     }
 
-    /* Handle leftover if nbSequences is odd */
+    /* Handle leftover if @nbSequences is odd */
     if (i < nbSequences) {
         /* Fallback: process last sequence */
         assert(i == nbSequences - 1);
@@ -7301,16 +7301,23 @@ static void convertSequences_noRepcodes(SeqDef* dstSeqs,
 
 #else /* no SSE */
 
-FORCE_INLINE_TEMPLATE void convertSequences_noRepcodes(SeqDef* dstSeqs,
+static size_t
+convertSequences_noRepcodes(SeqDef* dstSeqs,
                 const ZSTD_Sequence* const inSeqs, size_t nbSequences)
 {
+    size_t longLen = 0;
     size_t n;
     for (n=0; n<nbSequences; n++) {
         dstSeqs[n].offBase = OFFSET_TO_OFFBASE(inSeqs[n].offset);
         /* note: doesn't work if one length is > 65535 */
         dstSeqs[n].litLength = (U16)inSeqs[n].litLength;
         dstSeqs[n].mlBase = (U16)(inSeqs[n].matchLength - MINMATCH);
+        if (UNLIKELY(inSeqs[n].litLength > 65535)) {
+            assert(longLen == 0);
+            longLen = n + nbSequences + 1;
+        }
     }
+    return longLen;
 }
 
 #endif
@@ -7343,8 +7350,22 @@ static size_t ZSTD_convertBlockSequences_internal(ZSTD_CCtx* cctx,
 
     /* Convert Sequences from public format to internal format */
     if (!repcodeResolution) {
-        convertSequences_noRepcodes(cctx->seqStore.sequencesStart, inSeqs, nbSequences);
-        cctx->seqStore.sequences += nbSequences;
+        size_t const longl = convertSequences_noRepcodes(cctx->seqStore.sequencesStart, inSeqs, nbSequences-1);
+        cctx->seqStore.sequences = cctx->seqStore.sequencesStart + nbSequences-1;
+        if (longl) {
+            DEBUGLOG(5, "long length");
+            assert(cctx->seqStore.longLengthType == ZSTD_llt_none);
+            if (longl <= nbSequences-1) {
+                DEBUGLOG(5, "long match length detected at pos %zu", longl-1);
+                cctx->seqStore.longLengthType = ZSTD_llt_matchLength;
+                cctx->seqStore.longLengthPos = (U32)(longl-1);
+            } else {
+                DEBUGLOG(5, "long literals length detected at pos %zu", longl-nbSequences);
+                assert(longl <= 2* (nbSequences-1));
+                cctx->seqStore.longLengthType = ZSTD_llt_literalLength;
+                cctx->seqStore.longLengthPos = (U32)(longl-nbSequences);
+            }
+        }
     } else {
         for (seqNb = 0; seqNb < nbSequences - 1 ; seqNb++) {
             U32 const litLength = inSeqs[seqNb].litLength;
index 5683eca25a01595872cd977a456d60e5eda2f58f..739e55ef2eec833e12e16cf0abaddbba348c72ac 100644 (file)
@@ -687,6 +687,23 @@ local_convertSequences(const void* input, size_t inputSize,
     return nbSeqs;
 }
 
+static size_t
+check_compressedSequences(const void* compressed, size_t cSize, const void* orig, size_t origSize)
+{
+    size_t decSize;
+    int diff;
+    void* decompressed = malloc(origSize);
+    if (decompressed == NULL) return 2;
+
+    decSize = ZSTD_decompress(decompressed, origSize, compressed, cSize);
+    if (decSize != origSize) { free(decompressed); DISPLAY("ZSTD_decompress failed (%zu) ", decSize); return 1; }
+
+    diff = memcmp(decompressed, orig, origSize);
+    if (diff) { free(decompressed); return 1; }
+
+    free(decompressed);
+    return 0;
+}
 
 static PrepResult prepCopy(const void* src, size_t srcSize, int cLevel)
 {
@@ -714,40 +731,43 @@ static PrepResult prepShorterDstCapacity(const void* src, size_t srcSize, int cL
 *  List of Scenarios
 *********************************************************/
 
-/* if PrepFunction_f returns 0, benchmarking is cancelled */
+/* if PrepFunction_f returns PrepResult.prepBuffSize == 0, benchmarking is cancelled */
 typedef PrepResult (*PrepFunction_f)(const void* src, size_t srcSize, int cLevel);
 typedef size_t (*BenchedFunction_f)(const void* src, size_t srcSize, void* dst, size_t dstSize, void* opaque);
+/* must return 0, otherwise verification is considered failed */
+typedef size_t (*VerifFunction_f)(const void* processed, size_t procSize, const void* input, size_t inputSize);
 
 typedef struct {
     const char* name;
     PrepFunction_f preparation_f;
-    BenchedFunction_f benchedFunction;
+    BenchedFunction_f benched_f;
+    VerifFunction_f verif_f; /* optional */
 } BenchScenario;
 
 static BenchScenario kScenarios[] = {
-    { "compress", NULL, local_ZSTD_compress },
-    { "decompress", prepDecompress, local_ZSTD_decompress },
-    { "compress_freshCCtx", NULL, local_ZSTD_compress_freshCCtx },
-    { "decompressDCtx", prepDecompress, local_ZSTD_decompressDCtx },
-    { "compressContinue", NULL, local_ZSTD_compressContinue },
-    { "compressContinue_extDict", NULL, local_ZSTD_compressContinue_extDict },
-    { "decompressContinue", prepDecompress, local_ZSTD_decompressContinue },
-    { "compressStream", NULL, local_ZSTD_compressStream },
-    { "compressStream_freshCCtx", NULL, local_ZSTD_compressStream_freshCCtx },
-    { "decompressStream", prepDecompress, local_ZSTD_decompressStream },
-    { "compress2", NULL, local_ZSTD_compress2 },
-    { "compressStream2, end", NULL, local_ZSTD_compressStream2_end },
-    { "compressStream2, end & short", prepShorterDstCapacity, local_ZSTD_compressStream2_end },
-    { "compressStream2, continue", NULL, local_ZSTD_compressStream2_continue },
-    { "compressStream2, -T2, continue", NULL, local_ZSTD_compress_generic_T2_continue },
-    { "compressStream2, -T2, end", NULL, local_ZSTD_compress_generic_T2_end },
-    { "compressSequences", prepSequences, local_compressSequences },
-    { "compressSequencesAndLiterals", prepSequencesAndLiterals, local_compressSequencesAndLiterals },
-    { "convertSequences (1st block)", prepConvertSequences, local_convertSequences },
+    { "compress", NULL, local_ZSTD_compress, NULL },
+    { "decompress", prepDecompress, local_ZSTD_decompress, NULL },
+    { "compress_freshCCtx", NULL, local_ZSTD_compress_freshCCtx, NULL },
+    { "decompressDCtx", prepDecompress, local_ZSTD_decompressDCtx, NULL },
+    { "compressContinue", NULL, local_ZSTD_compressContinue, NULL },
+    { "compressContinue_extDict", NULL, local_ZSTD_compressContinue_extDict, NULL },
+    { "decompressContinue", prepDecompress, local_ZSTD_decompressContinue, NULL },
+    { "compressStream", NULL, local_ZSTD_compressStream, NULL },
+    { "compressStream_freshCCtx", NULL, local_ZSTD_compressStream_freshCCtx, NULL },
+    { "decompressStream", prepDecompress, local_ZSTD_decompressStream, NULL },
+    { "compress2", NULL, local_ZSTD_compress2, NULL },
+    { "compressStream2, end", NULL, local_ZSTD_compressStream2_end, NULL },
+    { "compressStream2, end & short", prepShorterDstCapacity, local_ZSTD_compressStream2_end, NULL },
+    { "compressStream2, continue", NULL, local_ZSTD_compressStream2_continue, NULL },
+    { "compressStream2, -T2, continue", NULL, local_ZSTD_compress_generic_T2_continue, NULL },
+    { "compressStream2, -T2, end", NULL, local_ZSTD_compress_generic_T2_end, NULL },
+    { "compressSequences", prepSequences, local_compressSequences, check_compressedSequences },
+    { "compressSequencesAndLiterals", prepSequencesAndLiterals, local_compressSequencesAndLiterals, check_compressedSequences },
+    { "convertSequences (1st block)", prepConvertSequences, local_convertSequences, NULL },
 #ifndef ZSTD_DLL_IMPORT
-    { "decodeLiteralsHeader (1st block)", prepLiterals, local_ZSTD_decodeLiteralsHeader },
-    { "decodeLiteralsBlock (1st block)", prepLiterals, local_ZSTD_decodeLiteralsBlock },
-    { "decodeSeqHeaders (1st block)", prepSequences1stBlock, local_ZSTD_decodeSeqHeaders },
+    { "decodeLiteralsHeader (1st block)", prepLiterals, local_ZSTD_decodeLiteralsHeader, NULL },
+    { "decodeLiteralsBlock (1st block)", prepLiterals, local_ZSTD_decodeLiteralsBlock, NULL },
+    { "decodeSeqHeaders (1st block)", prepSequences1stBlock, local_ZSTD_decodeSeqHeaders, NULL },
 #endif
 };
 #define NB_SCENARIOS (sizeof(kScenarios) / sizeof(kScenarios[0]))
@@ -767,13 +787,15 @@ static int benchMem(unsigned scenarioID,
     const char* benchName;
     BMK_benchFn_t benchFunction;
     PrepFunction_f prep_f;
+    VerifFunction_f verif_f;
     int errorcode = 0;
 
     if (scenarioID >= NB_SCENARIOS) return 0; /* scenario doesn't exist */
 
     benchName = kScenarios[scenarioID].name;
-    benchFunction = kScenarios[scenarioID].benchedFunction;
+    benchFunction = kScenarios[scenarioID].benched_f;
     prep_f = kScenarios[scenarioID].preparation_f;
+    verif_f = kScenarios[scenarioID].verif_f;
     if (prep_f == NULL) prep_f = prepCopy; /* default */
 
     /* Initialization */
@@ -857,6 +879,14 @@ static int benchMem(unsigned scenarioID,
                         scenarioID, benchName,
                         (double)origSrcSize * TIMELOOP_NANOSEC / bestResult.nanoSecPerRun / MB_UNIT,
                         (unsigned)newResult.sumOfReturn );
+
+                if (verif_f) {
+                    size_t const vRes = verif_f(dst, newResult.sumOfReturn, origSrc, origSrcSize);
+                    if (vRes) {
+                        DISPLAY(" validation failed ! (%zu)\n", vRes);
+                        break;
+                    }
+                }
             }
 
             if ( BMK_isCompleted_TimedFn(tfs) ) break;