]> git.ipfire.org Git - thirdparty/zstd.git/commitdiff
Change speed representation from floating point to integral
authorGeorge Lu <gclu@fb.com>
Tue, 7 Aug 2018 00:13:36 +0000 (17:13 -0700)
committerGeorge Lu <gclu@fb.com>
Thu, 9 Aug 2018 17:42:58 +0000 (10:42 -0700)
programs/bench.c
programs/bench.h
tests/paramgrill.c

index b496caf27769e2656963219e23472fa2bb60e500..49b31787006a71a093028999256b8b568e90cce6 100644 (file)
@@ -555,9 +555,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
                         ratio = (double)(srcSize / intermediateResultCompress.result.result.sumOfReturn);
                         {   
                             int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
-                            double const compressionSpeed = ((double)srcSize / intermediateResultCompress.result.result.nanoSecPerRun) * 1000;
-                            int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
-                            results.result.cSpeed = compressionSpeed * 1000000;
+                            results.result.cSpeed = (srcSize * TIMELOOP_NANOSEC / intermediateResultCompress.result.result.nanoSecPerRun);
                             cSize = intermediateResultCompress.result.result.sumOfReturn;
                             results.result.cSize = cSize;
                             ratio = (double)srcSize / results.result.cSize;
@@ -565,7 +563,7 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
                             DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
                                     marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize,
                                     ratioAccuracy, ratio,
-                                    cSpeedAccuracy, compressionSpeed);
+                                    results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB));
                         }
                     }
 
@@ -579,16 +577,13 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
       
                         {   
                             int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
-                            double const compressionSpeed = results.result.cSpeed / 1000000;
-                            int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
-                            double const decompressionSpeed = ((double)srcSize / intermediateResultDecompress.result.result.nanoSecPerRun) * 1000;
-                            results.result.dSpeed = decompressionSpeed * 1000000;
+                            results.result.dSpeed = (srcSize * TIMELOOP_NANOSEC/ intermediateResultDecompress.result.result.nanoSecPerRun);
                             markNb = (markNb+1) % NB_MARKS;
                             DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
                                     marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize,
                                     ratioAccuracy, ratio,
-                                    cSpeedAccuracy, compressionSpeed,
-                                    decompressionSpeed);
+                                    results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB),
+                                    (double)results.result.dSpeed / (1 MB));
                         }
                     }
                 }
@@ -605,19 +600,20 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
                     if(compressionResults.result.nanoSecPerRun == 0) {
                         results.result.cSpeed = 0;
                     } else {
-                        results.result.cSpeed = (double)srcSize / compressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC;
+                        results.result.cSpeed = srcSize * TIMELOOP_NANOSEC / compressionResults.result.nanoSecPerRun;
                     }
                     results.result.cSize = compressionResults.result.sumOfReturn;
                     {   
                         int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
-                        double const compressionSpeed = results.result.cSpeed / 1000000;
-                        int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
+                        results.result.cSpeed = (srcSize * TIMELOOP_NANOSEC / compressionResults.result.nanoSecPerRun);
+                        cSize = compressionResults.result.sumOfReturn;
+                        results.result.cSize = cSize;
                         ratio = (double)srcSize / results.result.cSize;
                         markNb = (markNb+1) % NB_MARKS;
                         DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s\r",
                                 marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize,
                                 ratioAccuracy, ratio,
-                                cSpeedAccuracy, compressionSpeed);
+                                results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB));
                     }
                 }
                 if(adv->mode != BMK_compressOnly) {
@@ -633,19 +629,18 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
                     if(decompressionResults.result.nanoSecPerRun == 0) {
                         results.result.dSpeed = 0;
                     } else {
-                        results.result.dSpeed = (double)srcSize / decompressionResults.result.nanoSecPerRun * TIMELOOP_NANOSEC;
+                        results.result.dSpeed = srcSize * TIMELOOP_NANOSEC / decompressionResults.result.nanoSecPerRun;
                     }
-                    {   int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
-                        double const compressionSpeed = results.result.cSpeed / 1000000;
-                        int const cSpeedAccuracy = (compressionSpeed < 10.) ? 2 : 1;
-                        double const decompressionSpeed = ((double)srcSize / decompressionResults.result.nanoSecPerRun) * 1000;
-                        results.result.dSpeed = decompressionSpeed * 1000000;
+
+                    {   
+                        int const ratioAccuracy = (ratio < 10.) ? 3 : 2;
+                        results.result.dSpeed = (srcSize * TIMELOOP_NANOSEC/ decompressionResults.result.nanoSecPerRun);
                         markNb = (markNb+1) % NB_MARKS;
                         DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.*f),%6.*f MB/s ,%6.1f MB/s \r",
                                 marks[markNb], displayName, (U32)srcSize, (U32)results.result.cSize,
                                 ratioAccuracy, ratio,
-                                cSpeedAccuracy, compressionSpeed,
-                                decompressionSpeed);
+                                results.result.cSpeed < (10 MB) ? 2 : 1, (double)results.result.cSpeed / (1 MB),
+                                (double)results.result.dSpeed / (1 MB));
                     }
                 }
             }
@@ -693,8 +688,8 @@ static BMK_return_t BMK_benchMemAdvancedNoAlloc(
         }   /* CRC Checking */
 
     if (displayLevel == 1) {   /* hidden display mode -q, used by python speed benchmark */
-        double const cSpeed = results.result.cSpeed / 1000000;
-        double const dSpeed = results.result.dSpeed / 1000000;
+        double const cSpeed = (double)results.result.cSpeed / (1 MB);
+        double const dSpeed = (double)results.result.dSpeed / (1 MB);
         if (adv->additionalParam) {
             DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, adv->additionalParam);
         } else {
index 8baf33a0a5949a262f4e9b29bb71ca20161f3b14..6247fa59668e9871bcc2ab1cfe80a59430977c97 100644 (file)
@@ -32,8 +32,8 @@ extern "C" {
 
 typedef struct {
     size_t cSize;
-    double cSpeed;   /* bytes / sec */
-    double dSpeed;
+    U64 cSpeed;   /* bytes / sec */
+    U64 dSpeed;
     size_t cMem;
 } BMK_result_t;
 
index b8396c72eb4080662a4a8f0ea759cb0a1f1ae772..5af3d88facecd396f3aa24c9771947df9dca4b30 100644 (file)
@@ -249,18 +249,6 @@ static int feasible(const BMK_result_t results, const constraint_t target) {
     return (results.cSpeed >= target.cSpeed) && (results.dSpeed >= target.dSpeed) && (results.cMem <= target.cMem);
 }
 
-#define EPSILON 0.001
-static int epsilonEqual(const double c1, const double c2) {
-    return MAX(c1/c2,c2/c1) < 1 + EPSILON;
-}
-
-/* checks exact equivalence to 0, to stop compiler complaining fpeq */
-static int eqZero(const double c1) {
-    const double z1 = 0.0;
-    const double z2 = -0.0;
-    return !(memcmp(&c1, &z1, sizeof(double))) || !(memcmp(&c1, &z2, sizeof(double)));
-}
-
 /* hill climbing value for part 1 */
 static double resultScore(const BMK_result_t res, const size_t srcSize, const constraint_t target) {
     double cs = 0., ds = 0., rt, cm = 0.;
@@ -280,7 +268,7 @@ static double resultScore(const BMK_result_t res, const size_t srcSize, const co
 static int compareResultLT(const BMK_result_t result1, const BMK_result_t result2, const constraint_t target, size_t srcSize) {
     if(feasible(result1, target) && feasible(result2, target)) {
         return (result1.cSize > result2.cSize) || (result1.cSize == result2.cSize && result2.cSpeed > result1.cSpeed)
-        || (result1.cSize == result2.cSize && epsilonEqual(result2.cSpeed, result1.cSpeed) && result2.dSpeed > result1.dSpeed);
+        || (result1.cSize == result2.cSize && result2.cSpeed == result1.cSpeed && result2.dSpeed > result1.dSpeed);
     }
     return feasible(result2, target) || (!feasible(result1, target) && (resultScore(result1, srcSize, target) < resultScore(result2, srcSize, target)));
 
@@ -661,7 +649,7 @@ static void BMK_printWinner(FILE* f, const U32 cLevel, const BMK_result_t result
 
         fprintf(f,
             "/* %s */   /* R:%5.3f at %5.1f MB/s - %5.1f MB/s */",
-            lvlstr, (double)srcSize / result.cSize, result.cSpeed / (1 << 20), result.dSpeed / (1 << 20));
+            lvlstr, (double)srcSize / result.cSize, (double)result.cSpeed / (1 << 20), (double)result.dSpeed / (1 << 20));
 
         if(TIMED) { fprintf(f, " - %1lu:%2lu:%05.2f", (unsigned long) minutes / 60,(unsigned long) minutes % 60, (double)(time - minutes * TIMELOOP_NANOSEC * 60ULL)/TIMELOOP_NANOSEC); }
         fprintf(f, "\n");
@@ -696,8 +684,8 @@ static void BMK_printWinners(FILE* f, const winnerInfo_t* winners, size_t srcSiz
 
 
 typedef struct {
-    double cSpeed_min;
-    double dSpeed_min;
+    U64 cSpeed_min;
+    U64 dSpeed_min;
     U32 windowLog_max;
     ZSTD_strategy strategy_max;
 } level_constraints_t;
@@ -794,16 +782,16 @@ static int BMK_seed(winnerInfo_t* winners, const ZSTD_compressionParameters para
                 /* too large compression speed difference for the compression benefit */
                 if (W_ratio > O_ratio)
                 DISPLAY ("Compression Speed : %5.3f @ %4.1f MB/s  vs  %5.3f @ %4.1f MB/s   : not enough for level %i\n",
-                         W_ratio, testResult.cSpeed / 1000000,
-                         O_ratio, winners[cLevel].result.cSpeed / 1000000.,   cLevel);
+                         W_ratio, (double)testResult.cSpeed / 1000000,
+                         O_ratio, (double)winners[cLevel].result.cSpeed / 1000000.,   cLevel);
                 continue;
             }
             if (W_DSpeed_note   < O_DSpeed_note  ) {
                 /* too large decompression speed difference for the compression benefit */
                 if (W_ratio > O_ratio)
                 DISPLAY ("Decompression Speed : %5.3f @ %4.1f MB/s  vs  %5.3f @ %4.1f MB/s   : not enough for level %i\n",
-                         W_ratio, testResult.dSpeed / 1000000.,
-                         O_ratio, winners[cLevel].result.dSpeed / 1000000.,   cLevel);
+                         W_ratio, (double)testResult.dSpeed / 1000000.,
+                         O_ratio, (double)winners[cLevel].result.dSpeed / 1000000.,   cLevel);
                 continue;
             }
 
@@ -1173,7 +1161,7 @@ static void BMK_benchOnce(ZSTD_CCtx* cctx, ZSTD_DCtx* dctx, const void* srcBuffe
     g_params = ZSTD_adjustCParams(g_params, srcSize, 0);
     BMK_benchParam1(&testResult, srcBuffer, srcSize, cctx, dctx, g_params);
     DISPLAY("Compression Ratio: %.3f  Compress Speed: %.1f MB/s Decompress Speed: %.1f MB/s\n", (double)srcSize / testResult.cSize, 
-        testResult.cSpeed / 1000000, testResult.dSpeed / 1000000);
+        (double)testResult.cSpeed / 1000000, (double)testResult.dSpeed / 1000000);
     return;
 }
 
@@ -1355,20 +1343,20 @@ static int allBench(BMK_result_t* resultPtr,
     *resultPtr = benchres.result;
 
     /* calculate uncertainty in compression / decompression runs */
-    if(eqZero(benchres.result.cSpeed)) {
+    if(benchres.result.cSpeed) {
+        loopDurationC = ((buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed); 
+        uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC) * VARIANCE; 
+    } else {
         loopDurationC = 0;
         uncertaintyConstantC = 3;
-    } else {
-        loopDurationC = (U64)((double)(buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.cSpeed); 
-        uncertaintyConstantC = ((loopDurationC + (double)(2 * g_clockGranularity))/loopDurationC) * VARIANCE; 
     }
 
-    if(eqZero(benchres.result.dSpeed)) {
+    if(benchres.result.dSpeed) {
+        loopDurationD = ((buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed); 
+        uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD) * VARIANCE;  
+    } else {
         loopDurationD = 0;
         uncertaintyConstantD = 3;
-    } else {
-        loopDurationD = (U64)((double)(buf.srcSize * TIMELOOP_NANOSEC) / benchres.result.dSpeed); 
-        uncertaintyConstantD = ((loopDurationD + (double)(2 * g_clockGranularity))/loopDurationD) * VARIANCE;  
     }
 
     /* anything with worse ratio in feas is definitely worse, discard */