From: Yann Collet Date: Tue, 13 Nov 2018 19:01:59 +0000 (-0800) Subject: separated bench module into benchfn and benchzstd X-Git-Tag: v1.3.8~47^2~7 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d38063f8ae0ef2e9834fe0b6b8f4cf0943093276;p=thirdparty%2Fzstd.git separated bench module into benchfn and benchzstd it shall be possible to use benchfn without any dependency on zstd. --- diff --git a/contrib/largeNbDicts/Makefile b/contrib/largeNbDicts/Makefile index 730250f96..541f3969b 100644 --- a/contrib/largeNbDicts/Makefile +++ b/contrib/largeNbDicts/Makefile @@ -28,14 +28,14 @@ default: largeNbDicts all : largeNbDicts -largeNbDicts: util.o bench.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD) +largeNbDicts: util.o benchfn.o datagen.o xxhash.o largeNbDicts.c $(LIBZSTD) $(CC) $(CPPFLAGS) $(CFLAGS) $^ $(LDFLAGS) -o $@ .PHONY: $(LIBZSTD) $(LIBZSTD): $(MAKE) -C $(LIBDIR) libzstd.a CFLAGS="$(CFLAGS)" -bench.o : $(PROGDIR)/bench.c +benchfn.o : $(PROGDIR)/benchfn.c $(CC) $(CPPFLAGS) $(CFLAGS) $^ -c datagen.o: $(PROGDIR)/datagen.c diff --git a/contrib/largeNbDicts/largeNbDicts.c b/contrib/largeNbDicts/largeNbDicts.c index d7639fc40..2605c6003 100644 --- a/contrib/largeNbDicts/largeNbDicts.c +++ b/contrib/largeNbDicts/largeNbDicts.c @@ -24,7 +24,7 @@ #include /* assert */ #include "util.h" -#include "bench.h" +#include "benchfn.h" #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "zdict.h" @@ -543,6 +543,7 @@ static int benchMem(slice_collection_t dstBlocks, BMK_runOutcome_t const outcome = BMK_benchTimedFn(benchState, decompress, &di, NULL, NULL, + ZSTD_isError, dstBlocks.nbSlices, (const void* const *)srcBlocks.slicePtrs, srcBlocks.capacities, dstBlocks.slicePtrs, dstBlocks.capacities, diff --git a/programs/Makefile b/programs/Makefile index 0bfb8b9a6..77c1d6a2d 100644 --- a/programs/Makefile +++ b/programs/Makefile @@ -160,7 +160,7 @@ $(ZSTDDECOMP_O): CFLAGS += $(ALIGN_LOOP) zstd : CPPFLAGS += $(THREAD_CPP) $(ZLIBCPP) $(LZMACPP) $(LZ4CPP) zstd : LDFLAGS += $(THREAD_LD) $(ZLIBLD) $(LZMALD) $(LZ4LD) $(DEBUGFLAGS_LD) zstd : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -zstd : $(ZSTDLIB_FILES) zstdcli.o util.o fileio.o bench.o datagen.o dibio.o +zstd : $(ZSTDLIB_FILES) zstdcli.o util.o fileio.o benchfn.o benchzstd.o datagen.o dibio.o @echo "$(THREAD_MSG)" @echo "$(ZLIB_MSG)" @echo "$(LZMA_MSG)" @@ -178,13 +178,13 @@ zstd-release: zstd zstd32 : CPPFLAGS += $(THREAD_CPP) zstd32 : LDFLAGS += $(THREAD_LD) zstd32 : CPPFLAGS += -DZSTD_LEGACY_SUPPORT=$(ZSTD_LEGACY_SUPPORT) -zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c fileio.c bench.c datagen.c dibio.c +zstd32 : $(ZSTDLIB_FILES) zstdcli.c util.c fileio.c benchfn.c benchzstd.c datagen.c dibio.c ifneq (,$(filter Windows%,$(OS))) windres/generate_res.bat endif $(CC) -m32 $(FLAGS) $^ $(RES32_FILE) -o $@$(EXT) -zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c bench.o datagen.o dibio.o +zstd-nolegacy : $(ZSTD_FILES) $(ZDICT_FILES) zstdcli.o util.o fileio.c benchfn.o benchzstd.o datagen.o dibio.o $(CC) $(FLAGS) $^ -o $@$(EXT) $(LDFLAGS) zstd-nomt : THREAD_CPP := diff --git a/programs/benchfn.c b/programs/benchfn.c new file mode 100644 index 000000000..5ba0c96c2 --- /dev/null +++ b/programs/benchfn.c @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/* ************************************* +* Includes +***************************************/ +#include "platform.h" /* Large Files support */ +#include "util.h" /* UTIL_getFileSize, UTIL_sleep */ +#include /* malloc, free */ +#include /* memset */ +#include /* fprintf, fopen */ +#include /* assert */ + +#include "mem.h" +#include "benchfn.h" + + +/* ************************************* +* Constants +***************************************/ +#define TIMELOOP_MICROSEC (1*1000000ULL) /* 1 second */ +#define TIMELOOP_NANOSEC (1*1000000000ULL) /* 1 second */ +#define ACTIVEPERIOD_MICROSEC (70*TIMELOOP_MICROSEC) /* 70 seconds */ +#define COOLPERIOD_SEC 10 + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + + +/* ************************************* +* Errors +***************************************/ +#ifndef DEBUG +# define DEBUG 0 +#endif + +#define DISPLAY(...) fprintf(stderr, __VA_ARGS__) +#define DEBUGOUTPUT(...) { if (DEBUG) DISPLAY(__VA_ARGS__); } + +/* error without displaying */ +#define RETURN_QUIET_ERROR(errorNum, retValue, ...) { \ + DEBUGOUTPUT("%s: %i: \n", __FILE__, __LINE__); \ + DEBUGOUTPUT("Error %i : ", errorNum); \ + DEBUGOUTPUT(__VA_ARGS__); \ + DEBUGOUTPUT(" \n"); \ + return retValue; \ +} + + +/* ************************************* +* Benchmarking an arbitrary function +***************************************/ + +int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) +{ + return outcome.tag == 0; +} + +/* warning : this function will stop program execution if outcome is invalid ! + * check outcome validity first, using BMK_isValid_runResult() */ +BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) +{ + assert(outcome.tag == 0); + return outcome.internal_never_use_directly; +} + +static BMK_runOutcome_t BMK_runOutcome_error(void) +{ + BMK_runOutcome_t b; + memset(&b, 0, sizeof(b)); + b.tag = 1; + return b; +} + +static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) +{ + BMK_runOutcome_t outcome; + outcome.tag = 0; + outcome.internal_never_use_directly = runTime; + return outcome; +} + + +/* initFn will be measured once, benchFn will be measured `nbLoops` times */ +/* initFn is optional, provide NULL if none */ +/* benchFn must return a size_t value compliant with errorFn */ +/* takes # of blocks and list of size & stuff for each. */ +/* can report result of benchFn for each block into blockResult. */ +/* blockResult is optional, provide NULL if this information is not required */ +/* note : time per loop could be zero if run time < timer resolution */ +BMK_runOutcome_t BMK_benchFunction( + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, + BMK_errorFn_t errorFn, + size_t blockCount, + const void* const * srcBlockBuffers, const size_t* srcBlockSizes, + void* const * dstBlockBuffers, const size_t* dstBlockCapacities, + size_t* blockResults, + unsigned nbLoops) +{ + size_t dstSize = 0; + + if(!nbLoops) { + RETURN_QUIET_ERROR(2, BMK_runOutcome_error(), "nbLoops must be nonzero "); + } + + /* init */ + { size_t i; + for(i = 0; i < blockCount; i++) { + memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]); /* warm up and erase result buffer */ + } +#if 0 + /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops + * (Makes former slower) + */ + UTIL_sleepMilli(5); /* give processor time to other processes */ + UTIL_waitForNextTick(); +#endif + } + + /* benchmark */ + { UTIL_time_t const clockStart = UTIL_getTime(); + unsigned loopNb, blockNb; + if (initFn != NULL) initFn(initPayload); + for (loopNb = 0; loopNb < nbLoops; loopNb++) { + for (blockNb = 0; blockNb < blockCount; blockNb++) { + size_t const res = benchFn(srcBlockBuffers[blockNb], srcBlockSizes[blockNb], + dstBlockBuffers[blockNb], dstBlockCapacities[blockNb], + benchPayload); + if (loopNb == 0) { + if (errorFn != NULL) + if (errorFn(res)) { + BMK_runOutcome_t ro = BMK_runOutcome_error(); + ro.internal_never_use_directly.sumOfReturn = res; + RETURN_QUIET_ERROR(2, ro, + "Function benchmark failed on block %u (of size %u) with error %i", + blockNb, (U32)srcBlockBuffers[blockNb], (int)res); + } + dstSize += res; + if (blockResults != NULL) blockResults[blockNb] = res; + } } + } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ + + { U64 const totalTime = UTIL_clockSpanNano(clockStart); + BMK_runTime_t rt; + rt.nanoSecPerRun = totalTime / nbLoops; + rt.sumOfReturn = dstSize; + return BMK_setValid_runTime(rt); + } } +} + + +/* ==== Benchmarking any function, providing intermediate results ==== */ + +struct BMK_timedFnState_s { + U64 timeSpent_ns; + U64 timeBudget_ns; + U64 runBudget_ns; + BMK_runTime_t fastestRun; + unsigned nbLoops; + UTIL_time_t coolTime; +}; /* typedef'd to BMK_timedFnState_t within bench.h */ + +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) +{ + BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); + if (r == NULL) return NULL; /* malloc() error */ + BMK_resetTimedFnState(r, total_ms, run_ms); + return r; +} + +void BMK_freeTimedFnState(BMK_timedFnState_t* state) { + free(state); +} + +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) +{ + if (!total_ms) total_ms = 1 ; + if (!run_ms) run_ms = 1; + if (run_ms > total_ms) run_ms = total_ms; + timedFnState->timeSpent_ns = 0; + timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000; + timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL); + timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); + timedFnState->nbLoops = 1; + timedFnState->coolTime = UTIL_getTime(); +} + +/* Tells if nb of seconds set in timedFnState for all runs is spent. + * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ +int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState) +{ + return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns); +} + + +#undef MIN +#define MIN(a,b) ( (a) < (b) ? (a) : (b) ) + +#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ + +BMK_runOutcome_t BMK_benchTimedFn( + BMK_timedFnState_t* cont, + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, + BMK_errorFn_t errorFn, + size_t blockCount, + const void* const* srcBlockBuffers, const size_t* srcBlockSizes, + void * const * dstBlockBuffers, const size_t * dstBlockCapacities, + size_t* blockResults) +{ + U64 const runBudget_ns = cont->runBudget_ns; + U64 const runTimeMin_ns = runBudget_ns / 2; + int completed = 0; + BMK_runTime_t bestRunTime = cont->fastestRun; + + while (!completed) { + BMK_runOutcome_t runResult; + + /* Overheat protection */ + if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) { + DEBUGOUTPUT("\rcooling down ... \r"); + UTIL_sleep(COOLPERIOD_SEC); + cont->coolTime = UTIL_getTime(); + } + + /* reinitialize capacity */ + runResult = BMK_benchFunction(benchFn, benchPayload, + initFn, initPayload, + errorFn, + blockCount, + srcBlockBuffers, srcBlockSizes, + dstBlockBuffers, dstBlockCapacities, + blockResults, + cont->nbLoops); + + if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ + return BMK_runOutcome_error(); + } + + { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); + U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; + + cont->timeSpent_ns += loopDuration_ns; + + /* estimate nbLoops for next run to last approximately 1 second */ + if (loopDuration_ns > (runBudget_ns / 50)) { + U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); + cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1; + } else { + /* previous run was too short : blindly increase workload by x multiplier */ + const unsigned multiplier = 10; + assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ + cont->nbLoops *= multiplier; + } + + if(loopDuration_ns < runTimeMin_ns) { + /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ + assert(completed == 0); + continue; + } else { + if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { + bestRunTime = newRunTime; + } + completed = 1; + } + } + } /* while (!completed) */ + + return BMK_setValid_runTime(bestRunTime); +} diff --git a/programs/benchfn.h b/programs/benchfn.h new file mode 100644 index 000000000..3aff676d4 --- /dev/null +++ b/programs/benchfn.h @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* benchfn : + * benchmark any function on a set of input + * providing result in nanoSecPerRun + * or detecting and returning an error + */ + +#if defined (__cplusplus) +extern "C" { +#endif + +#ifndef BENCH_FN_H_23876 +#define BENCH_FN_H_23876 + +/* === Dependencies === */ +#include /* size_t */ + + +/* === Variant === */ + +/* Creates a variant `typeName`, able to express "error or valid result". + * Functions with return type `typeName` + * must first check if result is valid, using BMK_isSuccessful_*(), + * and only then can extract `baseType`. + */ +#define VARIANT_ERROR_RESULT(baseType, variantName) \ + \ +typedef struct { \ + baseType internal_never_use_directly; \ + int tag; \ +} variantName + + +/* ==== Benchmarking any function, iterated on a set of blocks ==== */ + +typedef struct { + unsigned long long nanoSecPerRun; /* time per iteration */ + size_t sumOfReturn; /* sum of return values */ +} BMK_runTime_t; + +VARIANT_ERROR_RESULT(BMK_runTime_t, BMK_runOutcome_t); /* declares BMK_runOutcome_t */ + +/* check first if the return structure represents an error or a valid result */ +int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome); + +/* extract result from variant type. + * note : this function will abort() program execution if result is not valid. + * check result validity first, by using BMK_isSuccessful_runOutcome() + */ +BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome); + + + +typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload); +typedef size_t (*BMK_initFn_t)(void* initPayload); +typedef unsigned (*BMK_errorFn_t)(size_t); + + +/* BMK_benchFunction() : + * This function times the execution of 2 argument functions, benchFn and initFn */ + +/* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) + * is run nbLoops times + * initFn - (*initFn)(initPayload) is run once per benchmark, at the beginning. + * This argument can be NULL, in which case nothing is run. + * errorFn - is a function run on each return value of benchFn. + * Argument errorFn can be NULL, in which case nothing is run. + * Otherwise, it must return 0 when benchFn was successful, and >= 1 if it detects an error. + * Execution is stopped as soon as an error is detected, and the triggering return value is stored into sumOfReturn. + * blockCount - number of blocks. Size of all array parameters : srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults + * srcBuffers - an array of buffers to be operated on by benchFn + * srcSizes - an array of the sizes of above buffers + * dstBuffers - an array of buffers to be written into by benchFn + * dstCapacities - an array of the capacities of above buffers + * blockResults - Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. + * nbLoops - defines number of times benchFn is run. + * @return: a variant, which express either an error, or can generate a valid BMK_runTime_t result. + * Use BMK_isSuccessful_runOutcome() to check if function was successful. + * If yes, extract the result with BMK_extract_runTime(), + * it will contain : + * .sumOfReturn : the sum of all return values of benchFn through all of blocks + * .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops) + * .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer, + * in which case, this value will be the total amount of bytes written into dstBuffer. + */ +BMK_runOutcome_t BMK_benchFunction( + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, + BMK_errorFn_t errorFn, + size_t blockCount, + const void *const * srcBuffers, const size_t* srcSizes, + void *const * dstBuffers, const size_t* dstCapacities, + size_t* blockResults, + unsigned nbLoops); + + + +/* ==== Benchmark any function, returning intermediate results ==== */ + +/* state information tracking benchmark session */ +typedef struct BMK_timedFnState_s BMK_timedFnState_t; + +/* BMK_createTimedFnState() and BMK_resetTimedFnState() : + * Create/Set BMK_timedFnState_t for next benchmark session, + * which shall last a minimum of total_ms milliseconds, + * producing intermediate results, paced at interval of (approximately) run_ms. + */ +BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms); +void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms); +void BMK_freeTimedFnState(BMK_timedFnState_t* state); + + +/* Tells if duration of all benchmark runs has exceeded total_ms + */ +int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState); + + +/* BMK_benchTimedFn() : + * Similar to BMK_benchFunction(), most arguments being identical. + * Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms. + * Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms. + * Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms) + * call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms + * Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn() + */ +BMK_runOutcome_t BMK_benchTimedFn( + BMK_timedFnState_t* timedFnState, + BMK_benchFn_t benchFn, void* benchPayload, + BMK_initFn_t initFn, void* initPayload, + BMK_errorFn_t errorFn, + size_t blockCount, + const void *const * srcBlockBuffers, const size_t* srcBlockSizes, + void *const * dstBlockBuffers, const size_t* dstBlockCapacities, + size_t* blockResults); + + + +#endif /* BENCH_FN_H_23876 */ + +#if defined (__cplusplus) +} +#endif diff --git a/programs/bench.c b/programs/benchzstd.c similarity index 80% rename from programs/bench.c rename to programs/benchzstd.c index 2bbaa9d0a..6738fe952 100644 --- a/programs/bench.c +++ b/programs/benchzstd.c @@ -9,7 +9,6 @@ */ - /* ************************************** * Tuning parameters ****************************************/ @@ -18,14 +17,6 @@ #endif -/* ************************************** -* Compiler Warnings -****************************************/ -#ifdef _MSC_VER -# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */ -#endif - - /* ************************************* * Includes ***************************************/ @@ -36,12 +27,13 @@ #include /* fprintf, fopen */ #include /* assert */ +#include "benchfn.h" #include "mem.h" #define ZSTD_STATIC_LINKING_ONLY #include "zstd.h" #include "datagen.h" /* RDG_genBuffer */ #include "xxhash.h" -#include "bench.h" +#include "benchzstd.h" #include "zstd_errors.h" @@ -276,219 +268,6 @@ static size_t local_defaultDecompress( } -/*=== Benchmarking an arbitrary function ===*/ - -int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome) -{ - return outcome.tag == 0; -} - -/* warning : this function will stop program execution if outcome is invalid ! - * check outcome validity first, using BMK_isValid_runResult() */ -BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome) -{ - assert(outcome.tag == 0); - return outcome.internal_never_use_directly; -} - -static BMK_runOutcome_t BMK_runOutcome_error(void) -{ - BMK_runOutcome_t b; - memset(&b, 0, sizeof(b)); - b.tag = 1; - return b; -} - -static BMK_runOutcome_t BMK_setValid_runTime(BMK_runTime_t runTime) -{ - BMK_runOutcome_t outcome; - outcome.tag = 0; - outcome.internal_never_use_directly = runTime; - return outcome; -} - - -/* initFn will be measured once, benchFn will be measured `nbLoops` times */ -/* initFn is optional, provide NULL if none */ -/* benchFn must return size_t field compliant with ZSTD_isError for error valuee */ -/* takes # of blocks and list of size & stuff for each. */ -/* can report result of benchFn for each block into blockResult. */ -/* blockResult is optional, provide NULL if this information is not required */ -/* note : time per loop could be zero if run time < timer resolution */ -BMK_runOutcome_t BMK_benchFunction( - BMK_benchFn_t benchFn, void* benchPayload, - BMK_initFn_t initFn, void* initPayload, - size_t blockCount, - const void* const * srcBlockBuffers, const size_t* srcBlockSizes, - void* const * dstBlockBuffers, const size_t* dstBlockCapacities, - size_t* blockResults, - unsigned nbLoops) -{ - size_t dstSize = 0; - - if(!nbLoops) { - RETURN_QUIET_ERROR(2, BMK_runOutcome_t, "nbLoops must be nonzero "); - } - - /* init */ - { size_t i; - for(i = 0; i < blockCount; i++) { - memset(dstBlockBuffers[i], 0xE5, dstBlockCapacities[i]); /* warm up and erase result buffer */ - } -#if 0 - /* based on testing these seem to lower accuracy of multiple calls of 1 nbLoops vs 1 call of multiple nbLoops - * (Makes former slower) - */ - UTIL_sleepMilli(5); /* give processor time to other processes */ - UTIL_waitForNextTick(); -#endif - } - - /* benchmark */ - { UTIL_time_t const clockStart = UTIL_getTime(); - unsigned loopNb, blockNb; - if (initFn != NULL) initFn(initPayload); - for (loopNb = 0; loopNb < nbLoops; loopNb++) { - for (blockNb = 0; blockNb < blockCount; blockNb++) { - size_t const res = benchFn(srcBlockBuffers[blockNb], srcBlockSizes[blockNb], - dstBlockBuffers[blockNb], dstBlockCapacities[blockNb], - benchPayload); - if(ZSTD_isError(res)) { - RETURN_QUIET_ERROR(2, BMK_runOutcome_t, - "Function benchmark failed on block %u of size %u : %s", - blockNb, (U32)dstBlockCapacities[blockNb], ZSTD_getErrorName(res)); - } else if (loopNb == 0) { - dstSize += res; - if (blockResults != NULL) blockResults[blockNb] = res; - } } - } /* for (loopNb = 0; loopNb < nbLoops; loopNb++) */ - - { U64 const totalTime = UTIL_clockSpanNano(clockStart); - BMK_runTime_t rt; - rt.nanoSecPerRun = totalTime / nbLoops; - rt.sumOfReturn = dstSize; - return BMK_setValid_runTime(rt); - } } -} - - -/* ==== Benchmarking any function, providing intermediate results ==== */ - -struct BMK_timedFnState_s { - U64 timeSpent_ns; - U64 timeBudget_ns; - U64 runBudget_ns; - BMK_runTime_t fastestRun; - unsigned nbLoops; - UTIL_time_t coolTime; -}; /* typedef'd to BMK_timedFnState_t within bench.h */ - -BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms) -{ - BMK_timedFnState_t* const r = (BMK_timedFnState_t*)malloc(sizeof(*r)); - if (r == NULL) return NULL; /* malloc() error */ - BMK_resetTimedFnState(r, total_ms, run_ms); - return r; -} - -void BMK_freeTimedFnState(BMK_timedFnState_t* state) { - free(state); -} - -void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms) -{ - if (!total_ms) total_ms = 1 ; - if (!run_ms) run_ms = 1; - if (run_ms > total_ms) run_ms = total_ms; - timedFnState->timeSpent_ns = 0; - timedFnState->timeBudget_ns = (U64)total_ms * TIMELOOP_NANOSEC / 1000; - timedFnState->runBudget_ns = (U64)run_ms * TIMELOOP_NANOSEC / 1000; - timedFnState->fastestRun.nanoSecPerRun = (U64)(-1LL); - timedFnState->fastestRun.sumOfReturn = (size_t)(-1LL); - timedFnState->nbLoops = 1; - timedFnState->coolTime = UTIL_getTime(); -} - -/* Tells if nb of seconds set in timedFnState for all runs is spent. - * note : this function will return 1 if BMK_benchFunctionTimed() has actually errored. */ -int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState) -{ - return (timedFnState->timeSpent_ns >= timedFnState->timeBudget_ns); -} - - -#define MINUSABLETIME (TIMELOOP_NANOSEC / 2) /* 0.5 seconds */ - -BMK_runOutcome_t BMK_benchTimedFn( - BMK_timedFnState_t* cont, - BMK_benchFn_t benchFn, void* benchPayload, - BMK_initFn_t initFn, void* initPayload, - size_t blockCount, - const void* const* srcBlockBuffers, const size_t* srcBlockSizes, - void * const * dstBlockBuffers, const size_t * dstBlockCapacities, - size_t* blockResults) -{ - U64 const runBudget_ns = cont->runBudget_ns; - U64 const runTimeMin_ns = runBudget_ns / 2; - int completed = 0; - BMK_runTime_t bestRunTime = cont->fastestRun; - - while (!completed) { - BMK_runOutcome_t runResult; - - /* Overheat protection */ - if (UTIL_clockSpanMicro(cont->coolTime) > ACTIVEPERIOD_MICROSEC) { - DEBUGOUTPUT("\rcooling down ... \r"); - UTIL_sleep(COOLPERIOD_SEC); - cont->coolTime = UTIL_getTime(); - } - - /* reinitialize capacity */ - runResult = BMK_benchFunction(benchFn, benchPayload, - initFn, initPayload, - blockCount, - srcBlockBuffers, srcBlockSizes, - dstBlockBuffers, dstBlockCapacities, - blockResults, - cont->nbLoops); - - if(!BMK_isSuccessful_runOutcome(runResult)) { /* error : move out */ - return BMK_runOutcome_error(); - } - - { BMK_runTime_t const newRunTime = BMK_extract_runTime(runResult); - U64 const loopDuration_ns = newRunTime.nanoSecPerRun * cont->nbLoops; - - cont->timeSpent_ns += loopDuration_ns; - - /* estimate nbLoops for next run to last approximately 1 second */ - if (loopDuration_ns > (runBudget_ns / 50)) { - U64 const fastestRun_ns = MIN(bestRunTime.nanoSecPerRun, newRunTime.nanoSecPerRun); - cont->nbLoops = (U32)(runBudget_ns / fastestRun_ns) + 1; - } else { - /* previous run was too short : blindly increase workload by x multiplier */ - const unsigned multiplier = 10; - assert(cont->nbLoops < ((unsigned)-1) / multiplier); /* avoid overflow */ - cont->nbLoops *= multiplier; - } - - if(loopDuration_ns < runTimeMin_ns) { - /* don't report results for which benchmark run time was too small : increased risks of rounding errors */ - assert(completed == 0); - continue; - } else { - if(newRunTime.nanoSecPerRun < bestRunTime.nanoSecPerRun) { - bestRunTime = newRunTime; - } - completed = 1; - } - } - } /* while (!completed) */ - - return BMK_setValid_runTime(bestRunTime); -} - - /* ================================================================= */ /* Benchmark Zstandard, mem-to-mem scenarios */ /* ================================================================= */ @@ -645,6 +424,7 @@ BMK_benchMemAdvancedNoAlloc( BMK_benchTimedFn( timeStateCompress, &local_defaultCompress, cctx, &local_initCCtx, &cctxprep, + ZSTD_isError, nbBlocks, srcPtrs, srcSizes, cPtrs, cCapacities, @@ -679,6 +459,7 @@ BMK_benchMemAdvancedNoAlloc( BMK_benchTimedFn(timeStateDecompress, &local_defaultDecompress, dctx, &local_initDCtx, &dctxprep, + ZSTD_isError, nbBlocks, (const void *const *)cPtrs, cSizes, resPtrs, resSizes, diff --git a/programs/bench.h b/programs/benchzstd.h similarity index 61% rename from programs/bench.h rename to programs/benchzstd.h index 13ca5b50b..9860adf16 100644 --- a/programs/bench.h +++ b/programs/benchzstd.h @@ -8,12 +8,18 @@ * You may select, at your option, one of the above-listed licenses. */ + /* benchzstd : + * benchmark Zstandard compression / decompression + * over a set of files or buffers + * and display progress result and final summary + */ + #if defined (__cplusplus) extern "C" { #endif -#ifndef BENCH_H_121279284357 -#define BENCH_H_121279284357 +#ifndef BENCH_ZSTD_H_3242387 +#define BENCH_ZSTD_H_3242387 /* === Dependencies === */ #include /* size_t */ @@ -142,9 +148,9 @@ BMK_benchOutcome_t BMK_benchFilesAdvanced( * .cMem : memory budget required for the compression context */ BMK_benchOutcome_t BMK_syntheticTest( - int cLevel, double compressibility, - const ZSTD_compressionParameters* compressionParams, - int displayLevel, const BMK_advancedParams_t* adv); + int cLevel, double compressibility, + const ZSTD_compressionParameters* compressionParams, + int displayLevel, const BMK_advancedParams_t* adv); @@ -181,6 +187,7 @@ BMK_benchOutcome_t BMK_benchMem(const void* srcBuffer, size_t srcSize, const void* dictBuffer, size_t dictBufferSize, int displayLevel, const char* displayName); + /* BMK_benchMemAdvanced() : same as BMK_benchMem() * with following additional options : * dstBuffer - destination buffer to write compressed output in, NULL if none provided. @@ -197,106 +204,8 @@ BMK_benchOutcome_t BMK_benchMemAdvanced(const void* srcBuffer, size_t srcSize, -/* ==== Benchmarking any function, iterated on a set of blocks ==== */ - -typedef struct { - unsigned long long nanoSecPerRun; /* time per iteration */ - size_t sumOfReturn; /* sum of return values */ -} BMK_runTime_t; - -VARIANT_ERROR_RESULT(BMK_runTime_t, BMK_runOutcome_t); - -/* check first if the return structure represents an error or a valid result */ -int BMK_isSuccessful_runOutcome(BMK_runOutcome_t outcome); - -/* extract result from variant type. - * note : this function will abort() program execution if result is not valid - * check result validity first, by using BMK_isSuccessful_runOutcome() - */ -BMK_runTime_t BMK_extract_runTime(BMK_runOutcome_t outcome); - - - -typedef size_t (*BMK_benchFn_t)(const void* src, size_t srcSize, void* dst, size_t dstCapacity, void* customPayload); -typedef size_t (*BMK_initFn_t)(void* initPayload); - - -/* BMK_benchFunction() : - * This function times the execution of 2 argument functions, benchFn and initFn */ - -/* benchFn - (*benchFn)(srcBuffers[i], srcSizes[i], dstBuffers[i], dstCapacities[i], benchPayload) - * is run nbLoops times - * initFn - (*initFn)(initPayload) is run once per benchmark, at the beginning. - * This argument can be NULL, in which case nothing is run. - * blockCount - number of blocks. Size of all array parameters : srcBuffers, srcSizes, dstBuffers, dstCapacities, blockResults - * srcBuffers - an array of buffers to be operated on by benchFn - * srcSizes - an array of the sizes of above buffers - * dstBuffers - an array of buffers to be written into by benchFn - * dstCapacities - an array of the capacities of above buffers - * blockResults - Optional: store the return value of benchFn for each block. Use NULL if this result is not requested. - * nbLoops - defines number of times benchFn is run. - * @return: a variant, which express either an error, or can generate a valid BMK_runTime_t result. - * Use BMK_isSuccessful_runOutcome() to check if function was successful. - * If yes, extract the result with BMK_extract_runTime(), - * it will contain : - * .sumOfReturn : the sum of all return values of benchFn through all of blocks - * .nanoSecPerRun : time per run of benchFn + (time for initFn / nbLoops) - * .sumOfReturn is generally intended for functions which return a # of bytes written into dstBuffer, - * in which case, this value will be the total amount of bytes written into dstBuffer. - */ -BMK_runOutcome_t BMK_benchFunction( - BMK_benchFn_t benchFn, void* benchPayload, - BMK_initFn_t initFn, void* initPayload, - size_t blockCount, - const void *const * srcBuffers, const size_t* srcSizes, - void *const * dstBuffers, const size_t* dstCapacities, - size_t* blockResults, - unsigned nbLoops); - - - -/* ==== Benchmark any function, providing intermediate results ==== */ - -/* state information tracking benchmark session */ -typedef struct BMK_timedFnState_s BMK_timedFnState_t; - -/* BMK_createTimedFnState() and BMK_resetTimedFnState() : - * Create/Set BMK_timedFnState_t for next benchmark session, - * which shall last a minimum of total_ms milliseconds, - * producing intermediate results, paced at interval of (approximately) run_ms. - */ -BMK_timedFnState_t* BMK_createTimedFnState(unsigned total_ms, unsigned run_ms); -void BMK_resetTimedFnState(BMK_timedFnState_t* timedFnState, unsigned total_ms, unsigned run_ms); -void BMK_freeTimedFnState(BMK_timedFnState_t* state); - - -/* Tells if duration of all benchmark runs has exceeded total_ms - */ -int BMK_isCompleted_TimedFn(const BMK_timedFnState_t* timedFnState); - - -/* BMK_benchTimedFn() : - * Similar to BMK_benchFunction(), most arguments being identical. - * Automatically determines `nbLoops` so that each result is regularly produced at interval of about run_ms. - * Note : minimum `nbLoops` is 1, therefore a run may last more than run_ms, and possibly even more than total_ms. - * Usage - initialize timedFnState, select benchmark duration (total_ms) and each measurement duration (run_ms) - * call BMK_benchTimedFn() repetitively, each measurement is supposed to last about run_ms - * Check if total time budget is spent or exceeded, using BMK_isCompleted_TimedFn() - */ -BMK_runOutcome_t BMK_benchTimedFn( - BMK_timedFnState_t* timedFnState, - BMK_benchFn_t benchFn, void* benchPayload, - BMK_initFn_t initFn, void* initPayload, - size_t blockCount, - const void *const * srcBlockBuffers, const size_t* srcBlockSizes, - void *const * dstBlockBuffers, const size_t* dstBlockCapacities, - size_t* blockResults); - - - - -#endif /* BENCH_H_121279284357 */ +#endif /* BENCH_ZSTD_H_3242387 */ #if defined (__cplusplus) } diff --git a/programs/zstdcli.c b/programs/zstdcli.c index 1545d1cac..153de961d 100644 --- a/programs/zstdcli.c +++ b/programs/zstdcli.c @@ -32,7 +32,7 @@ #include /* errno */ #include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */ #ifndef ZSTD_NOBENCH -# include "bench.h" /* BMK_benchFiles */ +# include "benchzstd.h" /* BMK_benchFiles */ #endif #ifndef ZSTD_NODICT # include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */ diff --git a/tests/Makefile b/tests/Makefile index da68bddcf..f363001b1 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -132,18 +132,18 @@ fullbench fullbench32 : CPPFLAGS += $(MULTITHREAD_CPP) fullbench fullbench32 : LDFLAGS += $(MULTITHREAD_LD) fullbench fullbench32 : DEBUGFLAGS = -DNDEBUG # turn off assert() for speed measurements fullbench fullbench32 : $(ZSTD_FILES) -fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/bench.c fullbench.c +fullbench fullbench32 : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c $(CC) $(FLAGS) $^ -o $@$(EXT) fullbench-lib : CPPFLAGS += -DXXH_NAMESPACE=ZSTD_ fullbench-lib : zstd-staticLib -fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/bench.c fullbench.c +fullbench-lib : $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) $(ZSTDDIR)/libzstd.a # note : broken : requires unavailable symbols fullbench-dll : zstd-dll fullbench-dll : LDFLAGS+= -L$(ZSTDDIR) -lzstd -fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c fullbench.c +fullbench-dll: $(PRGDIR)/datagen.c $(PRGDIR)/util.c $(PRGDIR)/benchfn.c fullbench.c # $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) -DZSTD_DLL_IMPORT=1 $(ZSTDDIR)/dll/libzstd.dll $(CC) $(FLAGS) $(filter %.c,$^) -o $@$(EXT) @@ -203,7 +203,7 @@ zstreamtest-dll : $(ZSTREAM_LOCAL_FILES) $(CC) $(CPPFLAGS) $(CFLAGS) $(filter %.c,$^) $(LDFLAGS) -o $@$(EXT) paramgrill : DEBUGFLAGS = # turn off assert() by default for speed measurements -paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/bench.c $(PRGDIR)/datagen.c paramgrill.c +paramgrill : $(ZSTD_FILES) $(PRGDIR)/util.c $(PRGDIR)/benchfn.c $(PRGDIR)/benchzstd.c $(PRGDIR)/datagen.c paramgrill.c $(CC) $(FLAGS) $^ -lm -o $@$(EXT) datagen : $(PRGDIR)/datagen.c datagencli.c diff --git a/tests/fullbench.c b/tests/fullbench.c index faf8fe759..bd4b116d9 100644 --- a/tests/fullbench.c +++ b/tests/fullbench.c @@ -30,7 +30,8 @@ #include "zstd.h" /* ZSTD_versionString */ #include "util.h" /* time functions */ #include "datagen.h" -#include "bench.h" /* CustomBench*/ +#include "benchfn.h" /* CustomBench*/ +#include "benchzstd.h" /* MB_UNIT */ /*_************************************ @@ -524,6 +525,7 @@ static size_t benchMem(U32 benchNb, BMK_benchTimedFn( tfs, benchFunction, buff2, NULL, NULL, /* initFn */ + ZSTD_isError, 1, /* blockCount */ &src, &srcSize, &dstBuffv, &dstBuffSize, diff --git a/tests/paramgrill.c b/tests/paramgrill.c index 7a4be854a..1b33c086d 100644 --- a/tests/paramgrill.c +++ b/tests/paramgrill.c @@ -25,7 +25,8 @@ #include "datagen.h" #include "xxhash.h" #include "util.h" -#include "bench.h" +#include "benchfn.h" +#include "benchzstd.h" #include "zstd_errors.h" #include "zstd_internal.h" /* should not be needed */ @@ -1455,6 +1456,7 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx, BMK_runOutcome_t const cOutcome = BMK_benchTimedFn(timeStateCompress, &local_defaultCompress, cctx, &local_initCCtx, &cctxprep, + ZSTD_isError, nbBlocks, srcPtrs, srcSizes, dstPtrs, dstCapacities, @@ -1479,6 +1481,7 @@ BMK_benchMemInvertible( buffers_t buf, contexts_t ctx, BMK_runOutcome_t const dOutcome = BMK_benchTimedFn(timeStateDecompress, &local_defaultDecompress, dctx, &local_initDCtx, &dctxprep, + ZSTD_isError, nbBlocks, (const void* const*)dstPtrs, dstSizes, resPtrs, resSizes,