From: Yann Collet <cyan@fb.com>
Date: Tue, 13 Nov 2018 19:01:59 +0000 (-0800)
Subject: separated bench module into benchfn and benchzstd
X-Git-Tag: v1.3.8~47^2~7
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d38063f8ae0ef2e9834fe0b6b8f4cf0943093276;p=thirdparty%2Fzstd.git

separated bench module into benchfn and benchzstd

it shall be possible to use benchfn
without any dependency on zstd.
---

diff --git a/contrib/largeNbDicts/Makefile b/contrib/largeNbDicts/Makefile
index 730250f96..541f3969b 100644
--- a/contrib/largeNbDicts/Makefile
+++ b/contrib/largeNbDicts/Makefile
@@ -28,14 +28,14 @@ default: largeNbDicts
 
 all : largeNbDicts
 
-largeNbDicts: util.o bench.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
+largeNbDicts: util.o benchfn.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD)
 	$(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@
 
 .PHONY: $(LIBZSTD)
 $(LIBZSTD):
 	$(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)"
 
-bench.o  : $(PROGDIR)/bench.c
+benchfn.o  : $(PROGDIR)/benchfn.c
 	$(CC) $(CPPFLAGS) $(CFLAGS) $^ -c
 
 datagen.o: $(PROGDIR)/datagen.c
diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c
index d7639fc40..2605c6003 100644
--- a/contrib/largeNbDicts/largeNbDicts.c
+++ b/contrib/largeNbDicts/largeNbDicts.c
@@ -24,7 +24,7 @@
 #include <assert.h>   /* assert */
 
 #include "util.h"
-#include "bench.h"
+#include "benchfn.h"
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"
 #include "zdict.h"
@@ -543,6 +543,7 @@ static int benchMem(slice_collection_t dstBlocks,
         BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState,
                                 decompress, &di,
                                 NULL, NULL,
+                                ZSTD_isError,
                                 dstBlocks.nbSlices,
                                 (const void* const *)srcBlocks.slicePtrs, srcBlocks.capacities,
                                 dstBlocks.slicePtrs, dstBlocks.capacities,
diff --git a/programs/Makefile b/programs/Makefile
index 0bfb8b9a6..77c1d6a2d 100644
--- a/programs/Makefile
+++ b/programs/Makefile
@@ -160,7 +160,7 @@ $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP)
 zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP)
 zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD)
 zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
-zstd : $(ZSTDLIB_FILES) zstdcli.o util.o fileio.o bench.o datagen.o dibio.o
+zstd : $(ZSTDLIB_FILES) zstdcli.o util.o fileio.o benchfn.o benchzstd.o datagen.o dibio.o
 	@echo "$(THREAD_MSG)"
 	@echo "$(ZLIB_MSG)"
 	@echo "$(LZMA_MSG)"
@@ -178,13 +178,13 @@ zstd-release: zstd
 zstd32 : CPPFLAGS += $(THREAD_CPP)
 zstd32 : LDFLAGS  += $(THREAD_LD)
 zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT)
-zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c fileio.c bench.c datagen.c dibio.c
+zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c fileio.c benchfn.c benchzstd.c datagen.c dibio.c
 ifneq (,$(filter Windows%,$(OS)))
 	windres/generate_res.bat
 endif
 	$(CC) -m32 $(FLAGS) $^ $(RES32_FILE) -o $@$(EXT)
 
-zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c bench.o datagen.o dibio.o
+zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c benchfn.o benchzstd.o datagen.o dibio.o
 	$(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS)
 
 zstd-nomt : THREAD_CPP :=
diff --git a/programs/benchfn.c b/programs/benchfn.c
new file mode 100644
index 000000000..5ba0c96c2
--- /dev/null
+++ b/programs/benchfn.c
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+
+/* *************************************
+*  Includes
+***************************************/
+#include "platform.h"    /* Large Files support */
+#include "util.h"        /* UTIL_getFileSize, UTIL_sleep */
+#include <stdlib.h>      /* malloc, free */
+#include <string.h>      /* memset */
+#include <stdio.h>       /* fprintf, fopen */
+#include <assert.h>      /* assert */
+
+#include "mem.h"
+#include "benchfn.h"
+
+
+/* *************************************
+*  Constants
+***************************************/
+#define TIMELOOP_MICROSEC     (1*1000000ULL) /* 1 second */
+#define TIMELOOP_NANOSEC      (1*1000000000ULL) /* 1 second */
+#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */
+#define COOLPERIOD_SEC        10
+
+#define KB *(1 <<10)
+#define MB *(1 <<20)
+#define GB *(1U<<30)
+
+
+/* *************************************
+*  Errors
+***************************************/
+#ifndef DEBUG
+#  define DEBUG 0
+#endif
+
+#define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
+#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); }
+
+/* error without displaying */
+#define RETURN_QUIET_ERROR(errorNum, retValue, ...) { \
+    DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__);    \
+    DEBUGOUTPUT("Error %i : ", errorNum);             \
+    DEBUGOUTPUT(__VA_ARGS__);                         \
+    DEBUGOUTPUT(" \n");                               \
+    return retValue;                                  \
+}
+
+
+/* *************************************
+*  Benchmarking an arbitrary function
+***************************************/
+
+int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
+{
+    return outcome.tag == 0;
+}
+
+/* warning : this function will stop program execution if outcome is invalid !
+ *           check outcome validity first, using BMK_isValid_runResult() */
+BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
+{
+    assert(outcome.tag == 0);
+    return outcome.internal_never_use_directly;
+}
+
+static BMK_runOutcome_t BMK_runOutcome_error(void)
+{
+    BMK_runOutcome_t b;
+    memset(&b, 0, sizeof(b));
+    b.tag = 1;
+    return b;
+}
+
+static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
+{
+    BMK_runOutcome_t outcome;
+    outcome.tag = 0;
+    outcome.internal_never_use_directly = runTime;
+    return outcome;
+}
+
+
+/* initFn will be measured once, benchFn will be measured `nbLoops` times */
+/* initFn is optional, provide NULL if none */
+/* benchFn must return a size_t value compliant with errorFn */
+/* takes # of blocks and list of size & stuff for each. */
+/* can report result of benchFn for each block into blockResult. */
+/* blockResult is optional, provide NULL if this information is not required */
+/* note : time per loop could be zero if run time < timer resolution */
+BMK_runOutcome_t BMK_benchFunction(
+            BMK_benchFn_t benchFn, void* benchPayload,
+            BMK_initFn_t initFn, void* initPayload,
+            BMK_errorFn_t errorFn,
+            size_t blockCount,
+            const void* const * srcBlockBuffers, const size_t* srcBlockSizes,
+            void* const * dstBlockBuffers, const size_t* dstBlockCapacities,
+            size_t* blockResults,
+            unsigned nbLoops)
+{
+    size_t dstSize = 0;
+
+    if(!nbLoops) {
+        RETURN_QUIET_ERROR(2, BMK_runOutcome_error(), "nbLoops must be nonzero ");
+    }
+
+    /* init */
+    {   size_t i;
+        for(i = 0; i < blockCount; i++) {
+            memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]);  /* warm up and erase result buffer */
+        }
+#if 0
+        /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops
+         * (Makes former slower)
+         */
+        UTIL_sleepMilli(5);  /* give processor time to other processes */
+        UTIL_waitForNextTick();
+#endif
+    }
+
+    /* benchmark */
+    {   UTIL_time_t const clockStart = UTIL_getTime();
+        unsigned loopNb, blockNb;
+        if (initFn != NULL) initFn(initPayload);
+        for (loopNb = 0; loopNb < nbLoops; loopNb++) {
+            for (blockNb = 0; blockNb < blockCount; blockNb++) {
+                size_t const res = benchFn(srcBlockBuffers[blockNb], srcBlockSizes[blockNb],
+                                    dstBlockBuffers[blockNb], dstBlockCapacities[blockNb],
+                                    benchPayload);
+                if (loopNb == 0) {
+                    if (errorFn != NULL)
+                    if (errorFn(res)) {
+                        BMK_runOutcome_t ro = BMK_runOutcome_error();
+                        ro.internal_never_use_directly.sumOfReturn = res;
+                        RETURN_QUIET_ERROR(2, ro,
+                            "Function benchmark failed on block %u (of size %u) with error %i",
+                            blockNb, (U32)srcBlockBuffers[blockNb], (int)res);
+                    }
+                    dstSize += res;
+                    if (blockResults != NULL) blockResults[blockNb] = res;
+            }   }
+        }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
+
+        {   U64 const totalTime = UTIL_clockSpanNano(clockStart);
+            BMK_runTime_t rt;
+            rt.nanoSecPerRun = totalTime / nbLoops;
+            rt.sumOfReturn = dstSize;
+            return BMK_setValid_runTime(rt);
+    }   }
+}
+
+
+/* ====  Benchmarking any function, providing intermediate results  ==== */
+
+struct BMK_timedFnState_s {
+    U64 timeSpent_ns;
+    U64 timeBudget_ns;
+    U64 runBudget_ns;
+    BMK_runTime_t fastestRun;
+    unsigned nbLoops;
+    UTIL_time_t coolTime;
+};  /* typedef'd to BMK_timedFnState_t within bench.h */
+
+BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
+{
+    BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
+    if (r == NULL) return NULL;   /* malloc() error */
+    BMK_resetTimedFnState(r, total_ms, run_ms);
+    return r;
+}
+
+void BMK_freeTimedFnState(BMK_timedFnState_t* state) {
+    free(state);
+}
+
+void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
+{
+    if (!total_ms) total_ms = 1 ;
+    if (!run_ms) run_ms = 1;
+    if (run_ms > total_ms) run_ms = total_ms;
+    timedFnState->timeSpent_ns = 0;
+    timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000;
+    timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000;
+    timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL);
+    timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
+    timedFnState->nbLoops = 1;
+    timedFnState->coolTime = UTIL_getTime();
+}
+
+/* Tells if nb of seconds set in timedFnState for all runs is spent.
+ * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
+int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
+{
+    return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
+}
+
+
+#undef MIN
+#define MIN(a,b)   ( (a) < (b) ? (a) : (b) )
+
+#define MINUSABLETIME  (TIMELOOP_NANOSEC / 2)  /* 0.5 seconds */
+
+BMK_runOutcome_t BMK_benchTimedFn(
+            BMK_timedFnState_t* cont,
+            BMK_benchFn_t benchFn, void* benchPayload,
+            BMK_initFn_t initFn, void* initPayload,
+            BMK_errorFn_t errorFn,
+            size_t blockCount,
+            const void* const* srcBlockBuffers, const size_t* srcBlockSizes,
+            void * const * dstBlockBuffers, const size_t * dstBlockCapacities,
+            size_t* blockResults)
+{
+    U64 const runBudget_ns = cont->runBudget_ns;
+    U64 const runTimeMin_ns = runBudget_ns / 2;
+    int completed = 0;
+    BMK_runTime_t bestRunTime = cont->fastestRun;
+
+    while (!completed) {
+        BMK_runOutcome_t runResult;
+
+        /* Overheat protection */
+        if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) {
+            DEBUGOUTPUT("\rcooling down ...    \r");
+            UTIL_sleep(COOLPERIOD_SEC);
+            cont->coolTime = UTIL_getTime();
+        }
+
+        /* reinitialize capacity */
+        runResult = BMK_benchFunction(benchFn, benchPayload,
+                                    initFn, initPayload,
+                                    errorFn,
+                                    blockCount,
+                                    srcBlockBuffers, srcBlockSizes,
+                                    dstBlockBuffers, dstBlockCapacities,
+                                    blockResults,
+                                    cont->nbLoops);
+
+        if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
+            return BMK_runOutcome_error();
+        }
+
+        {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
+            U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
+
+            cont->timeSpent_ns += loopDuration_ns;
+
+            /* estimate nbLoops for next run to last approximately 1 second */
+            if (loopDuration_ns > (runBudget_ns / 50)) {
+                U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
+                cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1;
+            } else {
+                /* previous run was too short : blindly increase workload by x multiplier */
+                const unsigned multiplier = 10;
+                assert(cont->nbLoops < ((unsigned)-1) / multiplier);  /* avoid overflow */
+                cont->nbLoops *= multiplier;
+            }
+
+            if(loopDuration_ns < runTimeMin_ns) {
+                /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
+                assert(completed == 0);
+                continue;
+            } else {
+                if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
+                    bestRunTime = newRunTime;
+                }
+                completed = 1;
+            }
+        }
+    }   /* while (!completed) */
+
+    return BMK_setValid_runTime(bestRunTime);
+}
diff --git a/programs/benchfn.h b/programs/benchfn.h
new file mode 100644
index 000000000..3aff676d4
--- /dev/null
+++ b/programs/benchfn.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+
+/* benchfn :
+ * benchmark any function on a set of input
+ * providing result in nanoSecPerRun
+ * or detecting and returning an error
+ */
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+#ifndef BENCH_FN_H_23876
+#define BENCH_FN_H_23876
+
+/* ===  Dependencies  === */
+#include <stddef.h>   /* size_t */
+
+
+/* ===  Variant  === */
+
+/* Creates a variant `typeName`, able to express "error or valid result".
+ * Functions with return type `typeName`
+ * must first check if result is valid, using BMK_isSuccessful_*(),
+ * and only then can extract `baseType`.
+ */
+#define VARIANT_ERROR_RESULT(baseType, variantName)  \
+                                             \
+typedef struct {                             \
+    baseType internal_never_use_directly;    \
+    int tag;                                 \
+} variantName
+
+
+/* ====  Benchmarking any function, iterated on a set of blocks  ==== */
+
+typedef struct {
+    unsigned long long nanoSecPerRun;  /* time per iteration */
+    size_t sumOfReturn;       /* sum of return values */
+} BMK_runTime_t;
+
+VARIANT_ERROR_RESULT(BMK_runTime_t, BMK_runOutcome_t);  /* declares BMK_runOutcome_t */
+
+/* check first if the return structure represents an error or a valid result */
+int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome);
+
+/* extract result from variant type.
+ * note : this function will abort() program execution if result is not valid.
+ *        check result validity first, by using BMK_isSuccessful_runOutcome()
+ */
+BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome);
+
+
+
+typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload);
+typedef size_t (*BMK_initFn_t)(void* initPayload);
+typedef unsigned (*BMK_errorFn_t)(size_t);
+
+
+/* BMK_benchFunction() :
+ * This function times the execution of 2 argument functions, benchFn and initFn  */
+
+/* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload)
+ *      is run nbLoops times
+ * initFn - (*initFn)(initPayload) is run once per benchmark, at the beginning.
+ *      This argument can be NULL, in which case nothing is run.
+ * errorFn - is a function run on each return value of benchFn.
+ *      Argument errorFn can be NULL, in which case nothing is run.
+ *      Otherwise, it must return 0 when benchFn was successful, and >= 1 if it detects an error.
+ *      Execution is stopped as soon as an error is detected, and the triggering return value is stored into sumOfReturn.
+ * blockCount - number of blocks. Size of all array parameters : srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults
+ * srcBuffers - an array of buffers to be operated on by benchFn
+ * srcSizes - an array of the sizes of above buffers
+ * dstBuffers - an array of buffers to be written into by benchFn
+ * dstCapacities - an array of the capacities of above buffers
+ * blockResults - Optional: store the return value of benchFn for each block. Use NULL if this result is not requested.
+ * nbLoops - defines number of times benchFn is run.
+ * @return: a variant, which express either an error, or can generate a valid BMK_runTime_t result.
+ *          Use BMK_isSuccessful_runOutcome() to check if function was successful.
+ *          If yes, extract the result with BMK_extract_runTime(),
+ *          it will contain :
+ *              .sumOfReturn : the sum of all return values of benchFn through all of blocks
+ *              .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops)
+ *          .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer,
+ *              in which case, this value will be the total amount of bytes written into dstBuffer.
+ */
+BMK_runOutcome_t BMK_benchFunction(
+                        BMK_benchFn_t benchFn, void* benchPayload,
+                        BMK_initFn_t initFn, void* initPayload,
+                        BMK_errorFn_t errorFn,
+                        size_t blockCount,
+                        const void *const * srcBuffers, const size_t* srcSizes,
+                        void *const * dstBuffers, const size_t* dstCapacities,
+                        size_t* blockResults,
+                        unsigned nbLoops);
+
+
+
+/* ====  Benchmark any function, returning intermediate results  ==== */
+
+/* state information tracking benchmark session */
+typedef struct BMK_timedFnState_s BMK_timedFnState_t;
+
+/* BMK_createTimedFnState() and BMK_resetTimedFnState() :
+ * Create/Set BMK_timedFnState_t for next benchmark session,
+ * which shall last a minimum of total_ms milliseconds,
+ * producing intermediate results, paced at interval of (approximately) run_ms.
+ */
+BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms);
+void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms);
+void BMK_freeTimedFnState(BMK_timedFnState_t* state);
+
+
+/* Tells if duration of all benchmark runs has exceeded total_ms
+ */
+int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState);
+
+
+/* BMK_benchTimedFn() :
+ * Similar to BMK_benchFunction(), most arguments being identical.
+ * Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms.
+ * Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms.
+ * Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms)
+ *         call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms
+ *         Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn()
+ */
+BMK_runOutcome_t BMK_benchTimedFn(
+                    BMK_timedFnState_t* timedFnState,
+                    BMK_benchFn_t benchFn, void* benchPayload,
+                    BMK_initFn_t initFn, void* initPayload,
+                    BMK_errorFn_t errorFn,
+                    size_t blockCount,
+                    const void *const * srcBlockBuffers, const size_t* srcBlockSizes,
+                    void *const * dstBlockBuffers, const size_t* dstBlockCapacities,
+                    size_t* blockResults);
+
+
+
+#endif   /* BENCH_FN_H_23876 */
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/programs/bench.c b/programs/benchzstd.c
similarity index 80%
rename from programs/bench.c
rename to programs/benchzstd.c
index 2bbaa9d0a..6738fe952 100644
--- a/programs/bench.c
+++ b/programs/benchzstd.c
@@ -9,7 +9,6 @@
  */
 
 
-
 /* **************************************
 *  Tuning parameters
 ****************************************/
@@ -18,14 +17,6 @@
 #endif
 
 
-/* **************************************
-*  Compiler Warnings
-****************************************/
-#ifdef _MSC_VER
-#  pragma warning(disable : 4127)   /* disable: C4127: conditional expression is constant */
-#endif
-
-
 /* *************************************
 *  Includes
 ***************************************/
@@ -36,12 +27,13 @@
 #include <stdio.h>       /* fprintf, fopen */
 #include <assert.h>      /* assert */
 
+#include "benchfn.h"
 #include "mem.h"
 #define ZSTD_STATIC_LINKING_ONLY
 #include "zstd.h"
 #include "datagen.h"     /* RDG_genBuffer */
 #include "xxhash.h"
-#include "bench.h"
+#include "benchzstd.h"
 #include "zstd_errors.h"
 
 
@@ -276,219 +268,6 @@ static size_t local_defaultDecompress(
 }
 
 
-/*===  Benchmarking an arbitrary function  ===*/
-
-int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome)
-{
-    return outcome.tag == 0;
-}
-
-/* warning : this function will stop program execution if outcome is invalid !
- *           check outcome validity first, using BMK_isValid_runResult() */
-BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome)
-{
-    assert(outcome.tag == 0);
-    return outcome.internal_never_use_directly;
-}
-
-static BMK_runOutcome_t BMK_runOutcome_error(void)
-{
-    BMK_runOutcome_t b;
-    memset(&b, 0, sizeof(b));
-    b.tag = 1;
-    return b;
-}
-
-static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime)
-{
-    BMK_runOutcome_t outcome;
-    outcome.tag = 0;
-    outcome.internal_never_use_directly = runTime;
-    return outcome;
-}
-
-
-/* initFn will be measured once, benchFn will be measured `nbLoops` times */
-/* initFn is optional, provide NULL if none */
-/* benchFn must return size_t field compliant with ZSTD_isError for error valuee */
-/* takes # of blocks and list of size & stuff for each. */
-/* can report result of benchFn for each block into blockResult. */
-/* blockResult is optional, provide NULL if this information is not required */
-/* note : time per loop could be zero if run time < timer resolution */
-BMK_runOutcome_t BMK_benchFunction(
-            BMK_benchFn_t benchFn, void* benchPayload,
-            BMK_initFn_t initFn, void* initPayload,
-            size_t blockCount,
-            const void* const * srcBlockBuffers, const size_t* srcBlockSizes,
-            void* const * dstBlockBuffers, const size_t* dstBlockCapacities,
-            size_t* blockResults,
-            unsigned nbLoops)
-{
-    size_t dstSize = 0;
-
-    if(!nbLoops) {
-        RETURN_QUIET_ERROR(2, BMK_runOutcome_t, "nbLoops must be nonzero ");
-    }
-
-    /* init */
-    {   size_t i;
-        for(i = 0; i < blockCount; i++) {
-            memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]);  /* warm up and erase result buffer */
-        }
-#if 0
-        /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops
-         * (Makes former slower)
-         */
-        UTIL_sleepMilli(5);  /* give processor time to other processes */
-        UTIL_waitForNextTick();
-#endif
-    }
-
-    /* benchmark */
-    {   UTIL_time_t const clockStart = UTIL_getTime();
-        unsigned loopNb, blockNb;
-        if (initFn != NULL) initFn(initPayload);
-        for (loopNb = 0; loopNb < nbLoops; loopNb++) {
-            for (blockNb = 0; blockNb < blockCount; blockNb++) {
-                size_t const res = benchFn(srcBlockBuffers[blockNb], srcBlockSizes[blockNb],
-                                    dstBlockBuffers[blockNb], dstBlockCapacities[blockNb],
-                                    benchPayload);
-                if(ZSTD_isError(res)) {
-                    RETURN_QUIET_ERROR(2, BMK_runOutcome_t,
-                        "Function benchmark failed on block %u of size %u : %s",
-                        blockNb, (U32)dstBlockCapacities[blockNb], ZSTD_getErrorName(res));
-                } else if (loopNb == 0) {
-                    dstSize += res;
-                    if (blockResults != NULL) blockResults[blockNb] = res;
-            }   }
-        }  /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */
-
-        {   U64 const totalTime = UTIL_clockSpanNano(clockStart);
-            BMK_runTime_t rt;
-            rt.nanoSecPerRun = totalTime / nbLoops;
-            rt.sumOfReturn = dstSize;
-            return BMK_setValid_runTime(rt);
-    }   }
-}
-
-
-/* ====  Benchmarking any function, providing intermediate results  ==== */
-
-struct BMK_timedFnState_s {
-    U64 timeSpent_ns;
-    U64 timeBudget_ns;
-    U64 runBudget_ns;
-    BMK_runTime_t fastestRun;
-    unsigned nbLoops;
-    UTIL_time_t coolTime;
-};  /* typedef'd to BMK_timedFnState_t within bench.h */
-
-BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms)
-{
-    BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r));
-    if (r == NULL) return NULL;   /* malloc() error */
-    BMK_resetTimedFnState(r, total_ms, run_ms);
-    return r;
-}
-
-void BMK_freeTimedFnState(BMK_timedFnState_t* state) {
-    free(state);
-}
-
-void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms)
-{
-    if (!total_ms) total_ms = 1 ;
-    if (!run_ms) run_ms = 1;
-    if (run_ms > total_ms) run_ms = total_ms;
-    timedFnState->timeSpent_ns = 0;
-    timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000;
-    timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000;
-    timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL);
-    timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL);
-    timedFnState->nbLoops = 1;
-    timedFnState->coolTime = UTIL_getTime();
-}
-
-/* Tells if nb of seconds set in timedFnState for all runs is spent.
- * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */
-int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState)
-{
-    return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns);
-}
-
-
-#define MINUSABLETIME  (TIMELOOP_NANOSEC / 2)  /* 0.5 seconds */
-
-BMK_runOutcome_t BMK_benchTimedFn(
-            BMK_timedFnState_t* cont,
-            BMK_benchFn_t benchFn, void* benchPayload,
-            BMK_initFn_t initFn, void* initPayload,
-            size_t blockCount,
-            const void* const* srcBlockBuffers, const size_t* srcBlockSizes,
-            void * const * dstBlockBuffers, const size_t * dstBlockCapacities,
-            size_t* blockResults)
-{
-    U64 const runBudget_ns = cont->runBudget_ns;
-    U64 const runTimeMin_ns = runBudget_ns / 2;
-    int completed = 0;
-    BMK_runTime_t bestRunTime = cont->fastestRun;
-
-    while (!completed) {
-        BMK_runOutcome_t runResult;
-
-        /* Overheat protection */
-        if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) {
-            DEBUGOUTPUT("\rcooling down ...    \r");
-            UTIL_sleep(COOLPERIOD_SEC);
-            cont->coolTime = UTIL_getTime();
-        }
-
-        /* reinitialize capacity */
-        runResult = BMK_benchFunction(benchFn, benchPayload,
-                                    initFn, initPayload,
-                                    blockCount,
-                                    srcBlockBuffers, srcBlockSizes,
-                                    dstBlockBuffers, dstBlockCapacities,
-                                    blockResults,
-                                    cont->nbLoops);
-
-        if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */
-            return BMK_runOutcome_error();
-        }
-
-        {   BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult);
-            U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops;
-
-            cont->timeSpent_ns += loopDuration_ns;
-
-            /* estimate nbLoops for next run to last approximately 1 second */
-            if (loopDuration_ns > (runBudget_ns / 50)) {
-                U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun);
-                cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1;
-            } else {
-                /* previous run was too short : blindly increase workload by x multiplier */
-                const unsigned multiplier = 10;
-                assert(cont->nbLoops < ((unsigned)-1) / multiplier);  /* avoid overflow */
-                cont->nbLoops *= multiplier;
-            }
-
-            if(loopDuration_ns < runTimeMin_ns) {
-                /* don't report results for which benchmark run time was too small : increased risks of rounding errors */
-                assert(completed == 0);
-                continue;
-            } else {
-                if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) {
-                    bestRunTime = newRunTime;
-                }
-                completed = 1;
-            }
-        }
-    }   /* while (!completed) */
-
-    return BMK_setValid_runTime(bestRunTime);
-}
-
-
 /* ================================================================= */
 /*      Benchmark Zstandard, mem-to-mem scenarios                    */
 /* ================================================================= */
@@ -645,6 +424,7 @@ BMK_benchMemAdvancedNoAlloc(
                         BMK_benchTimedFn( timeStateCompress,
                                         &local_defaultCompress, cctx,
                                         &local_initCCtx, &cctxprep,
+                                        ZSTD_isError,
                                         nbBlocks,
                                         srcPtrs, srcSizes,
                                         cPtrs, cCapacities,
@@ -679,6 +459,7 @@ BMK_benchMemAdvancedNoAlloc(
                         BMK_benchTimedFn(timeStateDecompress,
                                         &local_defaultDecompress, dctx,
                                         &local_initDCtx, &dctxprep,
+                                        ZSTD_isError,
                                         nbBlocks,
                                         (const void *const *)cPtrs, cSizes,
                                         resPtrs, resSizes,
diff --git a/programs/bench.h b/programs/benchzstd.h
similarity index 61%
rename from programs/bench.h
rename to programs/benchzstd.h
index 13ca5b50b..9860adf16 100644
--- a/programs/bench.h
+++ b/programs/benchzstd.h
@@ -8,12 +8,18 @@
  * You may select, at your option, one of the above-listed licenses.
  */
 
+ /* benchzstd :
+  * benchmark Zstandard compression / decompression
+  * over a set of files or buffers
+  * and display progress result and final summary
+  */
+
 #if defined (__cplusplus)
 extern "C" {
 #endif
 
-#ifndef BENCH_H_121279284357
-#define BENCH_H_121279284357
+#ifndef BENCH_ZSTD_H_3242387
+#define BENCH_ZSTD_H_3242387
 
 /* ===  Dependencies  === */
 #include <stddef.h>   /* size_t */
@@ -142,9 +148,9 @@ BMK_benchOutcome_t BMK_benchFilesAdvanced(
  *          .cMem  : memory budget required for the compression context
  */
 BMK_benchOutcome_t BMK_syntheticTest(
-                              int cLevel, double compressibility,
-                              const ZSTD_compressionParameters* compressionParams,
-                              int displayLevel, const BMK_advancedParams_t* adv);
+                          int cLevel, double compressibility,
+                          const ZSTD_compressionParameters* compressionParams,
+                          int displayLevel, const BMK_advancedParams_t* adv);
 
 
 
@@ -181,6 +187,7 @@ BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize,
                         const void* dictBuffer, size_t dictBufferSize,
                         int displayLevel, const char* displayName);
 
+
 /* BMK_benchMemAdvanced() : same as BMK_benchMem()
  * with following additional options :
  * dstBuffer - destination buffer to write compressed output in, NULL if none provided.
@@ -197,106 +204,8 @@ BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize,
 
 
 
-/* ====  Benchmarking any function, iterated on a set of blocks  ==== */
-
-typedef struct {
-    unsigned long long nanoSecPerRun;  /* time per iteration */
-    size_t sumOfReturn;       /* sum of return values */
-} BMK_runTime_t;
-
-VARIANT_ERROR_RESULT(BMK_runTime_t, BMK_runOutcome_t);
-
-/* check first if the return structure represents an error or a valid result */
-int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome);
-
-/* extract result from variant type.
- * note : this function will abort() program execution if result is not valid
- *        check result validity first, by using BMK_isSuccessful_runOutcome()
- */
-BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome);
-
-
-
-typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload);
-typedef size_t (*BMK_initFn_t)(void* initPayload);
-
-
-/* BMK_benchFunction() :
- * This function times the execution of 2 argument functions, benchFn and initFn  */
-
-/* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload)
- *      is run nbLoops times
- * initFn - (*initFn)(initPayload) is run once per benchmark, at the beginning.
- *      This argument can be NULL, in which case nothing is run.
- * blockCount - number of blocks. Size of all array parameters : srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults
- * srcBuffers - an array of buffers to be operated on by benchFn
- * srcSizes - an array of the sizes of above buffers
- * dstBuffers - an array of buffers to be written into by benchFn
- * dstCapacities - an array of the capacities of above buffers
- * blockResults - Optional: store the return value of benchFn for each block. Use NULL if this result is not requested.
- * nbLoops - defines number of times benchFn is run.
- * @return: a variant, which express either an error, or can generate a valid BMK_runTime_t result.
- *          Use BMK_isSuccessful_runOutcome() to check if function was successful.
- *          If yes, extract the result with BMK_extract_runTime(),
- *          it will contain :
- *              .sumOfReturn : the sum of all return values of benchFn through all of blocks
- *              .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops)
- *          .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer,
- *              in which case, this value will be the total amount of bytes written into dstBuffer.
- */
-BMK_runOutcome_t BMK_benchFunction(
-                        BMK_benchFn_t benchFn, void* benchPayload,
-                        BMK_initFn_t initFn, void* initPayload,
-                        size_t blockCount,
-                        const void *const * srcBuffers, const size_t* srcSizes,
-                        void *const * dstBuffers, const size_t* dstCapacities,
-                        size_t* blockResults,
-                        unsigned nbLoops);
-
-
-
-/* ====  Benchmark any function, providing intermediate results  ==== */
-
-/* state information tracking benchmark session */
-typedef struct BMK_timedFnState_s BMK_timedFnState_t;
-
-/* BMK_createTimedFnState() and BMK_resetTimedFnState() :
- * Create/Set BMK_timedFnState_t for next benchmark session,
- * which shall last a minimum of total_ms milliseconds,
- * producing intermediate results, paced at interval of (approximately) run_ms.
- */
-BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms);
-void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms);
-void BMK_freeTimedFnState(BMK_timedFnState_t* state);
-
-
-/* Tells if duration of all benchmark runs has exceeded total_ms
- */
-int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState);
-
-
-/* BMK_benchTimedFn() :
- * Similar to BMK_benchFunction(), most arguments being identical.
- * Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms.
- * Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms.
- * Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms)
- *         call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms
- *         Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn()
- */
-BMK_runOutcome_t BMK_benchTimedFn(
-                    BMK_timedFnState_t* timedFnState,
-                    BMK_benchFn_t benchFn, void* benchPayload,
-                    BMK_initFn_t initFn, void* initPayload,
-                    size_t blockCount,
-                    const void *const * srcBlockBuffers, const size_t* srcBlockSizes,
-                    void *const * dstBlockBuffers, const size_t* dstBlockCapacities,
-                    size_t* blockResults);
-
-
-
-
 
-#endif   /* BENCH_H_121279284357 */
+#endif   /* BENCH_ZSTD_H_3242387 */
 
 #if defined (__cplusplus)
 }
diff --git a/programs/zstdcli.c b/programs/zstdcli.c
index 1545d1cac..153de961d 100644
--- a/programs/zstdcli.c
+++ b/programs/zstdcli.c
@@ -32,7 +32,7 @@
 #include <errno.h>    /* errno */
 #include "fileio.h"   /* stdinmark, stdoutmark, ZSTD_EXTENSION */
 #ifndef ZSTD_NOBENCH
-#  include "bench.h"  /* BMK_benchFiles */
+#  include "benchzstd.h"  /* BMK_benchFiles */
 #endif
 #ifndef ZSTD_NODICT
 #  include "dibio.h"  /* ZDICT_cover_params_t, DiB_trainFromFiles() */
diff --git a/tests/Makefile b/tests/Makefile
index da68bddcf..f363001b1 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -132,18 +132,18 @@ fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP)
 fullbench fullbench32 : LDFLAGS += $(MULTITHREAD_LD)
 fullbench fullbench32 : DEBUGFLAGS = -DNDEBUG  # turn off assert() for speed measurements
 fullbench fullbench32 : $(ZSTD_FILES)
-fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/bench.c fullbench.c
+fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c
 	$(CC) $(FLAGS) $^ -o $@$(EXT)
 
 fullbench-lib : CPPFLAGS += -DXXH_NAMESPACE=ZSTD_
 fullbench-lib : zstd-staticLib
-fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/bench.c fullbench.c
+fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c
 	$(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) $(ZSTDDIR)/libzstd.a
 
 # note : broken : requires unavailable symbols
 fullbench-dll : zstd-dll
 fullbench-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd
-fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c fullbench.c
+fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c
 #	$(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll
 	$(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT)
 
@@ -203,7 +203,7 @@ zstreamtest-dll : $(ZSTREAM_LOCAL_FILES)
 	$(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT)
 
 paramgrill : DEBUGFLAGS =  # turn off assert() by default for speed measurements
-paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/bench.c $(PRGDIR)/datagen.c paramgrill.c
+paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c paramgrill.c
 	$(CC) $(FLAGS) $^ -lm -o $@$(EXT)
 
 datagen : $(PRGDIR)/datagen.c datagencli.c
diff --git a/tests/fullbench.c b/tests/fullbench.c
index faf8fe759..bd4b116d9 100644
--- a/tests/fullbench.c
+++ b/tests/fullbench.c
@@ -30,7 +30,8 @@
 #include "zstd.h"        /* ZSTD_versionString */
 #include "util.h"        /* time functions */
 #include "datagen.h"
-#include "bench.h"       /* CustomBench*/
+#include "benchfn.h"       /* CustomBench*/
+#include "benchzstd.h"     /* MB_UNIT */
 
 
 /*_************************************
@@ -524,6 +525,7 @@ static size_t benchMem(U32 benchNb,
                     BMK_benchTimedFn( tfs,
                             benchFunction, buff2,
                             NULL, NULL,   /* initFn */
+                            ZSTD_isError,
                             1,  /* blockCount */
                             &src, &srcSize,
                             &dstBuffv, &dstBuffSize,
diff --git a/tests/paramgrill.c b/tests/paramgrill.c
index 7a4be854a..1b33c086d 100644
--- a/tests/paramgrill.c
+++ b/tests/paramgrill.c
@@ -25,7 +25,8 @@
 #include "datagen.h"
 #include "xxhash.h"
 #include "util.h"
-#include "bench.h"
+#include "benchfn.h"
+#include "benchzstd.h"
 #include "zstd_errors.h"
 #include "zstd_internal.h"     /* should not be needed */
 
@@ -1455,6 +1456,7 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
             BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress,
                                             &local_defaultCompress, cctx,
                                             &local_initCCtx, &cctxprep,
+                                            ZSTD_isError,
                                             nbBlocks,
                                             srcPtrs, srcSizes,
                                             dstPtrs, dstCapacities,
@@ -1479,6 +1481,7 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx,
             BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress,
                                         &local_defaultDecompress, dctx,
                                         &local_initDCtx, &dctxprep,
+                                        ZSTD_isError,
                                         nbBlocks,
                                         (const void* const*)dstPtrs, dstSizes,
                                         resPtrs, resSizes,