]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
fixed decompression-only benchmark
authorYann Collet <cyan@fb.com>
Thu, 8 Nov 2018 20:36:39 +0000 (12:36 -0800)
committerYann Collet <cyan@fb.com>
Thu, 8 Nov 2018 20:36:39 +0000 (12:36 -0800)
lib/decompress/zstd_decompress_block.c
programs/bench.c

index 652ddd19489245fe04df3c392ecf61e047c46407..68298772e38a472a805d84cc5329476db8387195 100644 (file)
@@ -1026,17 +1026,18 @@ ZSTD_decompressSequencesLong_body(
     /* Regen sequences */
     if (nbSeq) {
 #define STORED_SEQS 4
-#define STOSEQ_MASK (STORED_SEQS-1)
+#define STORED_SEQS_MASK (STORED_SEQS-1)
 #define ADVANCED_SEQS 4
         seq_t sequences[STORED_SEQS];
         int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
         seqState_t seqState;
         int seqNb;
         dctx->fseEntropy = 1;
-        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
         seqState.prefixStart = prefixStart;
         seqState.pos = (size_t)(op-prefixStart);
         seqState.dictEnd = dictEnd;
+        assert(iend > ip);
         CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected);
         ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
         ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
@@ -1051,10 +1052,10 @@ ZSTD_decompressSequencesLong_body(
         /* decode and decompress */
         for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
             seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, isLongOffset);
-            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
             PREFETCH(sequence.match);  /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
-            sequences[seqNb&STOSEQ_MASK] = sequence;
+            sequences[seqNb & STORED_SEQS_MASK] = sequence;
             op += oneSeqSize;
         }
         if (seqNb<nbSeq) return ERROR(corruption_detected);
@@ -1062,7 +1063,7 @@ ZSTD_decompressSequencesLong_body(
         /* finish queue */
         seqNb -= seqAdvance;
         for ( ; seqNb<nbSeq ; seqNb++) {
-            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STOSEQ_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+            size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
             if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
             op += oneSeqSize;
         }
@@ -1070,7 +1071,7 @@ ZSTD_decompressSequencesLong_body(
         /* save reps for next block */
         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
 #undef STORED_SEQS
-#undef STOSEQ_MASK
+#undef STORED_SEQS_MASK
 #undef ADVANCED_SEQS
     }
 
@@ -1118,9 +1119,10 @@ ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
 #endif
 
 typedef size_t (*ZSTD_decompressSequences_t)(
-    ZSTD_DCtx *dctx, void *dst, size_t maxDstSize,
-    const void *seqStart, size_t seqSize, int nbSeq,
-    const ZSTD_longOffset_e isLongOffset);
+                            ZSTD_DCtx* dctx,
+                            void* dst, size_t maxDstSize,
+                            const void* seqStart, size_t seqSize, int nbSeq,
+                            const ZSTD_longOffset_e isLongOffset);
 
 static size_t
 ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
@@ -1136,10 +1138,17 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
   return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
 }
 
-static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
-                                void* dst, size_t maxDstSize,
-                                const void* seqStart, size_t seqSize, int nbSeq,
-                                const ZSTD_longOffset_e isLongOffset)
+
+/* ZSTD_decompressSequencesLong() :
+ * decompression function triggered when a minimum share of offsets is considered "long",
+ * aka out of cache.
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mearning "farther than memory cache distance".
+ * This function will try to mitigate main memory latency through the use of prefetching */
+static size_t
+ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
+                             void* dst, size_t maxDstSize,
+                             const void* seqStart, size_t seqSize, int nbSeq,
+                             const ZSTD_longOffset_e isLongOffset)
 {
     DEBUGLOG(5, "ZSTD_decompressSequencesLong");
 #if DYNAMIC_BMI2
@@ -1150,6 +1159,8 @@ static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
   return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
 }
 
+
+
 /* ZSTD_getLongOffsetsShare() :
  * condition : offTable must be valid
  * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
@@ -1188,7 +1199,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
      * In block mode, window size is not known, so we have to be conservative.
      * (note: but it could be evaluated from current-lowLimit)
      */
-    ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
+    ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
     DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
 
     if (srcSize >= ZSTD_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);
@@ -1208,7 +1219,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
         ip += seqHSize;
         srcSize -= seqHSize;
 
-        if ( (!frame || dctx->fParams.windowSize > (1<<24))
+        if ( (!frame || (dctx->fParams.windowSize > (1<<24)))
           && (nbSeq>0) ) {  /* could probably use a larger nbSeq limit */
             U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
             U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
index 326c1c1c56e5973a59171e731469517d1b9b7872..caa803572e552a89cbd062aff6f1d3ab8d09bce5 100644 (file)
@@ -522,22 +522,24 @@ static BMK_benchOutcome_t BMK_benchOutcome_setValidResult(BMK_benchResult_t resu
 
 
 /* benchMem with no allocation */
-static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
-            const void** srcPtrs, size_t* srcSizes,
-            void** cPtrs, size_t* cCapacities, size_t* cSizes,
-            void** resPtrs, size_t* resSizes,
-            void** resultBufferPtr, void* compressedBuffer,
-            size_t maxCompressedSize,
-            BMK_timedFnState_t* timeStateCompress,
-            BMK_timedFnState_t* timeStateDecompress,
-
-            const void* srcBuffer, size_t srcSize,
-            const size_t* fileSizes, unsigned nbFiles,
-            const int cLevel, const ZSTD_compressionParameters* comprParams,
-            const void* dictBuffer, size_t dictBufferSize,
-            ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
-            int displayLevel, const char* displayName,
-            const BMK_advancedParams_t* adv)
+static BMK_benchOutcome_t
+BMK_benchMemAdvancedNoAlloc(
+                    const void** srcPtrs, size_t* srcSizes,
+                    void** cPtrs, size_t* cCapacities, size_t* cSizes,
+                    void** resPtrs, size_t* resSizes,
+                    void** resultBufferPtr, void* compressedBuffer,
+                    size_t maxCompressedSize,
+                    BMK_timedFnState_t* timeStateCompress,
+                    BMK_timedFnState_t* timeStateDecompress,
+
+                    const void* srcBuffer, size_t srcSize,
+                    const size_t* fileSizes, unsigned nbFiles,
+                    const int cLevel,
+                    const ZSTD_compressionParameters* comprParams,
+                    const void* dictBuffer, size_t dictBufferSize,
+                    ZSTD_CCtx* cctx, ZSTD_DCtx* dctx,
+                    int displayLevel, const char* displayName,
+                    const BMK_advancedParams_t* adv)
 {
     size_t const blockSize = ((adv->blockSize>=32 && (adv->mode != BMK_decodeOnly)) ? adv->blockSize : srcSize) + (!srcSize);  /* avoid div by 0 */
     BMK_benchResult_t benchResult;
@@ -599,6 +601,7 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
                 cPtr += cCapacities[nbBlocks];
                 resPtr += thisBlockSize;
                 remaining -= thisBlockSize;
+                if (BMK_decodeOnly) { assert(nbBlocks==0); cSizes[nbBlocks] = thisBlockSize; }
             }
         }
     }
@@ -633,7 +636,6 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
         DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
 
         while (!(compressionCompleted && decompressionCompleted)) {
-
             if (!compressionCompleted) {
                 BMK_runOutcome_t const cOutcome =
                         BMK_benchTimedFn( timeStateCompress,
@@ -659,7 +661,6 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
                 }   }
 
                 {   int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
-                    markNb = (markNb+1) % NB_MARKS;
                     DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
                             marks[markNb], displayName,
                             (U32)srcSize, (U32)cSize,
@@ -690,7 +691,6 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
                 }
 
                 {   int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
-                    markNb = (markNb+1) % NB_MARKS;
                     DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
                             marks[markNb], displayName,
                             (U32)srcSize, (U32)benchResult.cSize,
@@ -700,6 +700,7 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
                 }
                 decompressionCompleted = BMK_isCompleted_TimedFn(timeStateDecompress);
             }
+            markNb = (markNb+1) % NB_MARKS;
         }   /* while (!(compressionCompleted && decompressionCompleted)) */
 
         /* CRC Checking */
@@ -707,7 +708,8 @@ static BMK_benchOutcome_t BMK_benchMemAdvancedNoAlloc(
             U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
             if ((adv->mode == BMK_both) && (crcOrig!=crcCheck)) {
                 size_t u;
-                DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
+                DISPLAY("!!! WARNING !!! %14s : Invalid Checksum : %x != %x   \n",
+                        displayName, (unsigned)crcOrig, (unsigned)crcCheck);
                 for (u=0; u<srcSize; u++) {
                     if (((const BYTE*)srcBuffer)[u] != resultBuffer[u]) {
                         U32 segNb, bNb, pos;