]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
improve benchmark measurement for small inputs
authorYann Collet <cyan@fb.com>
Tue, 20 Feb 2018 22:48:09 +0000 (14:48 -0800)
committerYann Collet <cyan@fb.com>
Tue, 20 Feb 2018 22:58:40 +0000 (14:58 -0800)
by invoking time() once per batch, instead of once per compression / decompression.
Batch is dynamically resized so that each round lasts approximately 1 second.

Also : increases time accuracy to nanosecond

programs/bench.c
programs/util.h

index bf3dcb47d571bb24de69009464843e4d75255137..e67c886b35207a06ee93fb24ff0c850c65c26c84 100644 (file)
@@ -22,7 +22,7 @@
 *  Compiler Warnings
 ****************************************/
 #ifdef _MSC_VER
-#  pragma warning(disable : 4127)                /* disable: C4127: conditional expression is constant */
+#  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
 #endif
 
 
@@ -34,6 +34,7 @@
 #include <stdlib.h>      /* malloc, free */
 #include <string.h>      /* memset */
 #include <stdio.h>       /* fprintf, fopen */
+#include <assert.h>      /* assert */
 
 #include "mem.h"
 #define ZSTD_STATIC_LINKING_ONLY
@@ -51,8 +52,9 @@
 #  define ZSTD_GIT_COMMIT_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_GIT_COMMIT)
 #endif
 
-#define TIMELOOP_MICROSEC     1*1000000ULL /* 1 second */
-#define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */
+#define TIMELOOP_MICROSEC     (1*1000000ULL) /* 1 second */
+#define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
+#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
 #define COOLPERIOD_SEC        10
 
 #define KB *(1 <<10)
@@ -264,7 +266,9 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
     {   U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
         U64 const crcOrig = g_decodeOnly ? 0 : XXH64(srcBuffer, srcSize, 0);
         UTIL_time_t coolTime;
-        U64 const maxTime = (g_nbSeconds * TIMELOOP_MICROSEC) + 1;
+        U64 const maxTime = (g_nbSeconds * TIMELOOP_NANOSEC) + 1;
+        U32 nbDecodeLoops = (U32)((100 MB) / (srcSize+1)) + 1;  /* initial conservative speed estimate */
+        U32 nbCompressionLoops = (U32)((2 MB) / (srcSize+1)) + 1;  /* initial conservative speed estimate */
         U64 totalCTime=0, totalDTime=0;
         U32 cCompleted=g_decodeOnly, dCompleted=0;
 #       define NB_MARKS 4
@@ -283,18 +287,16 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
             }
 
             if (!g_decodeOnly) {
-                UTIL_time_t clockStart;
                 /* Compression */
                 DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
                 if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase result buffer */
 
-                UTIL_sleepMilli(1);  /* give processor time to other processes */
+                UTIL_sleepMilli(5);  /* give processor time to other processes */
                 UTIL_waitForNextTick();
-                clockStart = UTIL_getTime();
 
                 if (!cCompleted) {   /* still some time to do compression tests */
-                    U64 const clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
                     U32 nbLoops = 0;
+                    UTIL_time_t const clockStart = UTIL_getTime();
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_nbWorkers, g_nbWorkers);
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionLevel, cLevel);
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_enableLongDistanceMatching, g_ldmFlag);
@@ -314,7 +316,9 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_targetLength, comprParams->targetLength);
                     ZSTD_CCtx_setParameter(ctx, ZSTD_p_compressionStrategy, comprParams->strategy);
                     ZSTD_CCtx_loadDictionary(ctx, dictBuffer, dictBufferSize);
-                    do {
+
+                    if (!g_nbSeconds) nbCompressionLoops=1;
+                    for (nbLoops=0; nbLoops<nbCompressionLoops; nbLoops++) {
                         U32 blockNb;
                         for (blockNb=0; blockNb<nbBlocks; blockNb++) {
 #if 0   /* direct compression function, for occasional comparison */
@@ -343,12 +347,16 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                             }
                             blockTable[blockNb].cSize = out.pos;
 #endif
+                    }   }
+                    {   U64 const loopDuration = UTIL_clockSpanNano(clockStart);
+                        if (loopDuration > 0) {
+                            if (loopDuration < fastestC * nbCompressionLoops)
+                                fastestC = loopDuration / nbCompressionLoops;
+                            nbCompressionLoops = (1000000000 / fastestC) + 1;
+                        } else {
+                            assert(nbCompressionLoops < 40000000);  /* avoid overflow */
+                            nbCompressionLoops *= 100;
                         }
-                        nbLoops++;
-                    } while (UTIL_clockSpanMicro(clockStart) < clockLoop);
-                    {   U64 const loopDuration = UTIL_clockSpanMicro(clockStart);
-                        if (loopDuration < fastestC*nbLoops)
-                            fastestC = loopDuration / nbLoops;
                         totalCTime += loopDuration;
                         cCompleted = (totalCTime >= maxTime);  /* end compression tests */
                 }   }
@@ -358,7 +366,7 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                 ratio = (double)srcSize / (double)cSize;
                 markNb = (markNb+1) % NB_MARKS;
                 {   int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
-                    double const compressionSpeed = (double)srcSize / fastestC;
+                    double const compressionSpeed = ((double)srcSize / fastestC) * 1000;
                     int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
                     DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
                             marks[markNb], displayName, (U32)srcSize, (U32)cSize,
@@ -376,16 +384,16 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
             /* Decompression */
             if (!dCompleted) memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
 
-            UTIL_sleepMilli(1); /* give processor time to other processes */
+            UTIL_sleepMilli(5); /* give processor time to other processes */
             UTIL_waitForNextTick();
 
             if (!dCompleted) {
-                U64 clockLoop = g_nbSeconds ? TIMELOOP_MICROSEC : 1;
                 U32 nbLoops = 0;
                 ZSTD_DDict* const ddict = ZSTD_createDDict(dictBuffer, dictBufferSize);
                 UTIL_time_t const clockStart = UTIL_getTime();
                 if (!ddict) EXM_THROW(2, "ZSTD_createDDict() allocation failure");
-                do {
+                if (!g_nbSeconds) nbDecodeLoops = 1;
+                for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) {
                     U32 blockNb;
                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
                         size_t const regenSize = ZSTD_decompress_usingDDict(dctx,
@@ -397,22 +405,26 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
                                       blockNb, (U32)blockTable[blockNb].cSize, ZSTD_getErrorName(regenSize));
                         }
                         blockTable[blockNb].resSize = regenSize;
-                    }
-                    nbLoops++;
-                } while (UTIL_clockSpanMicro(clockStart) < clockLoop);
+                }   }
                 ZSTD_freeDDict(ddict);
-                {   U64 const loopDuration = UTIL_clockSpanMicro(clockStart);
-                    if (loopDuration < fastestD*nbLoops)
-                        fastestD = loopDuration / nbLoops;
+                {   U64 const loopDuration = UTIL_clockSpanNano(clockStart);
+                    if (loopDuration > 0) {
+                        if (loopDuration < fastestD * nbDecodeLoops)
+                            fastestD = loopDuration / nbDecodeLoops;
+                        nbDecodeLoops = (1000000000/*1sec*/ / fastestD) + 1;
+                    } else {
+                        assert(nbDecodeLoops < 40000000);  /* avoid overflow */
+                        nbDecodeLoops *= 100;
+                    }
                     totalDTime += loopDuration;
                     dCompleted = (totalDTime >= maxTime);
             }   }
 
             markNb = (markNb+1) % NB_MARKS;
             {   int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
-                double const compressionSpeed = (double)srcSize / fastestC;
+                double const compressionSpeed = ((double)srcSize / fastestC) * 1000;
                 int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
-                double const decompressionSpeed = (double)srcSize / fastestD;
+                double const decompressionSpeed = ((double)srcSize / fastestD) * 1000;
                 DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
                         marks[markNb], displayName, (U32)srcSize, (U32)cSize,
                         ratioAccuracy, ratio,
@@ -461,8 +473,8 @@ static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
         }   /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
 
         if (g_displayLevel == 1) {   /* hidden display mode -q, used by python speed benchmark */
-            double cSpeed = (double)srcSize / fastestC;
-            double dSpeed = (double)srcSize / fastestD;
+            double cSpeed = ((double)srcSize / fastestC) * 1000;
+            double dSpeed = ((double)srcSize / fastestD) * 1000;
             if (g_additionalParam)
                 DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
             else
@@ -634,7 +646,8 @@ static void BMK_benchFileTable(const char* const * const fileNamesTable, unsigne
 }
 
 
-static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility, const ZSTD_compressionParameters* compressionParams)
+static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility,
+                              const ZSTD_compressionParameters* compressionParams)
 {
     char name[20] = {0};
     size_t benchedSize = 10000000;
index 4f22236a91eccf774dc1d7ab82a6167fe1d2a2c9..3e69745793e4eb2c7e92ea65135c6c8a42db9df7 100644 (file)
@@ -142,7 +142,9 @@ static int g_utilDisplayLevel;
         }
         return 1000000000ULL*(clockEnd.QuadPart - clockStart.QuadPart)/ticksPerSecond.QuadPart;
     }
+
 #elif defined(__APPLE__) && defined(__MACH__)
+
     #include <mach/mach_time.h>
     #define UTIL_TIME_INITIALIZER 0
     typedef U64 UTIL_time_t;
@@ -167,7 +169,9 @@ static int g_utilDisplayLevel;
         }
         return ((clockEnd - clockStart) * (U64)rate.numer) / ((U64)rate.denom);
     }
+
 #elif (PLATFORM_POSIX_VERSION >= 200112L) && (defined __UCLIBC__ || ((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 17) || __GLIBC__ > 2))
+
     #define UTIL_TIME_INITIALIZER { 0, 0 }
     typedef struct timespec UTIL_freq_t;
     typedef struct timespec UTIL_time_t;
@@ -217,12 +221,18 @@ static int g_utilDisplayLevel;
 #define SEC_TO_MICRO 1000000
 
 /* returns time span in microseconds */
-UTIL_STATIC U64 UTIL_clockSpanMicro( UTIL_time_t clockStart )
+UTIL_STATIC U64 UTIL_clockSpanMicro(UTIL_time_t clockStart )
 {
     UTIL_time_t const clockEnd = UTIL_getTime();
     return UTIL_getSpanTimeMicro(clockStart, clockEnd);
 }
 
+/* returns time span in microseconds */
+UTIL_STATIC U64 UTIL_clockSpanNano(UTIL_time_t clockStart )
+{
+    UTIL_time_t const clockEnd = UTIL_getTime();
+    return UTIL_getSpanTimeNano(clockStart, clockEnd);
+}
 
 UTIL_STATIC void UTIL_waitForNextTick(void)
 {