From: Tony Finch Date: Mon, 20 Mar 2023 15:05:36 +0000 (+0000) Subject: Simplify histogram quantiles X-Git-Tag: v9.19.12~33^2~3 X-Git-Url: http://git.ipfire.org/gitweb/index.cgi?a=commitdiff_plain;h=cd0e7f853af175d9b1fae8abeb29ea3ca4d33481;p=thirdparty%2Fbind9.git Simplify histogram quantiles The `isc_histosummary_t` functions were written in the early days of `hg64` and carried over when I brought `hg64` into BIND. They were intended to be useful for graphing cumulative frequency distributions and the like, but in practice whatever draws charts is better off with a raw histogram export. Especially because of the poor performance of the old functions. The replacement `isc_histo_quantiles()` function is intended for providing a few quantile values in BIND's stats channel, when the user does not want the full histogram. Unlike the old functions, the caller provides all the query fractions up-front, so that the values can be found in a single scan instead of a scan per value. The scan is from larger values to smaller, since larger quantiles are usually more interesting, so the scan can bail out early. --- diff --git a/lib/isc/histo.c b/lib/isc/histo.c index a0b94c9b2a7..b7e1b23fdf6 100644 --- a/lib/isc/histo.c +++ b/lib/isc/histo.c @@ -28,12 +28,22 @@ #include /* - * XXXFANF to be added to isc/util.h by a commmit in a qp-trie + * XXXFANF to be added to by a commmit in a qp-trie * feature branch */ #define STRUCT_FLEX_SIZE(pointer, member, count) \ (sizeof(*(pointer)) + sizeof(*(pointer)->member) * (count)) +/* + * XXXFANF this should probably be in too + */ +#define OUTARG(ptr, val) \ + ({ \ + if ((ptr) != NULL) { \ + *(ptr) = (val); \ + } \ + }) + #define HISTO_MAGIC ISC_MAGIC('H', 's', 't', 'o') #define HISTO_VALID(p) ISC_MAGIC_VALID(p, HISTO_MAGIC) #define HISTOMULTI_MAGIC ISC_MAGIC('H', 'g', 'M', 't') @@ -72,38 +82,13 @@ typedef atomic_uint_fast64_t hg_bucket_t; typedef atomic_ptr(hg_bucket_t) hg_chunk_t; -#define ISC_HISTO_FIELDS \ - uint magic; \ - uint sigbits; \ - isc_mem_t *mctx - struct isc_histo { - ISC_HISTO_FIELDS; - /* chunk array must be first after common fields */ + uint magic; + uint sigbits; + isc_mem_t *mctx; hg_chunk_t chunk[CHUNKS]; }; -/* - * To convert between ranks and values, we scan the histogram to find the - * required rank. Each per-chunk total contains the sum of all the buckets - * in that chunk, so we can scan a chunk at a time rather than a bucket at - * a time. - * - * XXXFANF When `sigbits` is large, the chunks get large and slow to scan. - * If this turns out to be a problem, we could store ranks as well as - * values in the summary, and use a binary search. - */ -struct isc_histosummary { - ISC_HISTO_FIELDS; - /* chunk array must be first after common fields */ - uint64_t *chunk[CHUNKS]; - uint64_t total[CHUNKS]; - uint64_t population; - uint64_t maximum; - size_t size; - uint64_t buckets[]; -}; - struct isc_histomulti { uint magic; uint size; @@ -112,21 +97,6 @@ struct isc_histomulti { /**********************************************************************/ -#define OUTARG(ptr, val) \ - ({ \ - if ((ptr) != NULL) { \ - *(ptr) = (val); \ - } \ - }) - -static inline uint64_t -interpolate(uint64_t span, uint64_t mul, uint64_t div) { - double frac = div > 0 ? (double)mul / (double)div : mul > 0 ? 1 : 0; - return ((uint64_t)round(span * frac)); -} - -/**********************************************************************/ - void isc_histo_create(isc_mem_t *mctx, uint sigbits, isc_histo_t **hgp) { REQUIRE(sigbits >= ISC_HISTO_MINBITS); @@ -163,9 +133,9 @@ isc_histo_destroy(isc_histo_t **hgp) { /**********************************************************************/ uint -isc_histo_sigbits(isc_historead_t hr) { - REQUIRE(HISTO_VALID(hr.hg)); - return (hr.hg->sigbits); +isc_histo_sigbits(isc_histo_t *hg) { + REQUIRE(HISTO_VALID(hg)); + return (hg->sigbits); } /* @@ -221,11 +191,11 @@ isc_histo_digits_to_bits(uint digits) { * * This branchless conversion is due to Paul Khuong: see bin_down_of() in * https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/ + * + * This function is in the `isc_histo_inc()` fast path. */ static inline uint -value_to_key(isc_historead_t hr, uint64_t value) { - /* fast path */ - const isc_histo_t *hg = hr.hg; +value_to_key(const isc_histo_t *hg, uint64_t value) { /* ensure that denormal numbers are all in chunk zero */ uint64_t chunked = value | CHUNKSIZE(hg); int clz = __builtin_clzll((unsigned long long)(chunked)); @@ -259,16 +229,16 @@ value_to_key(isc_historead_t hr, uint64_t value) { */ static inline uint64_t -key_to_minval(isc_historead_t hr, uint key) { - uint chunksize = CHUNKSIZE(hr.hg); +key_to_minval(const isc_histo_t *hg, uint key) { + uint chunksize = CHUNKSIZE(hg); uint exponent = (key / chunksize) - 1; uint64_t mantissa = (key % chunksize) + chunksize; return (key < chunksize ? key : mantissa << exponent); } static inline uint64_t -key_to_maxval(isc_historead_t hr, uint key) { - return (key_to_minval(hr, key + 1) - 1); +key_to_maxval(const isc_histo_t *hg, uint key) { + return (key_to_minval(hg, key + 1) - 1); } /**********************************************************************/ @@ -293,29 +263,33 @@ key_to_new_bucket(isc_histo_t *hg, uint key) { } static hg_bucket_t * -get_chunk(isc_historead_t hr, uint chunk) { - const hg_chunk_t *cpp = &hr.hg->chunk[chunk]; - return (atomic_load_acquire(cpp)); +get_chunk(const isc_histo_t *hg, uint chunk) { + return (atomic_load_acquire(&hg->chunk[chunk])); } static inline hg_bucket_t * -key_to_bucket(isc_historead_t hr, uint key) { +key_to_bucket(const isc_histo_t *hg, uint key) { /* fast path */ - uint chunksize = CHUNKSIZE(hr.hg); + uint chunksize = CHUNKSIZE(hg); uint chunk = key / chunksize; uint bucket = key % chunksize; - hg_bucket_t *cp = get_chunk(hr, chunk); + hg_bucket_t *cp = get_chunk(hg, chunk); return (cp == NULL ? NULL : &cp[bucket]); } static inline uint64_t -get_key_count(isc_historead_t hr, uint key) { - hg_bucket_t *bp = key_to_bucket(hr, key); +bucket_count(const hg_bucket_t *bp) { return (bp == NULL ? 0 : atomic_load_relaxed(bp)); } +static inline uint64_t +get_key_count(const isc_histo_t *hg, uint key) { + return (bucket_count(key_to_bucket(hg, key))); +} + static inline void add_key_count(isc_histo_t *hg, uint key, uint64_t inc) { + /* fast path */ if (inc > 0) { hg_bucket_t *bp = key_to_bucket(hg, key); bp = bp != NULL ? bp : key_to_new_bucket(hg, key); @@ -355,14 +329,14 @@ isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count) { } isc_result_t -isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp, +isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp, uint64_t *countp) { - REQUIRE(HISTO_VALID(hr.hg)); + REQUIRE(HISTO_VALID(hg)); - if (key < BUCKETS(hr.hg)) { - OUTARG(minp, key_to_minval(hr, key)); - OUTARG(maxp, key_to_maxval(hr, key)); - OUTARG(countp, get_key_count(hr, key)); + if (key < BUCKETS(hg)) { + OUTARG(minp, key_to_minval(hg, key)); + OUTARG(maxp, key_to_maxval(hg, key)); + OUTARG(countp, get_key_count(hg, key)); return (ISC_R_SUCCESS); } else { return (ISC_R_RANGE); @@ -370,9 +344,7 @@ isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp, } void -isc_histo_next(isc_historead_t hr, uint *keyp) { - const isc_histo_t *hg = hr.hg; - +isc_histo_next(const isc_histo_t *hg, uint *keyp) { REQUIRE(HISTO_VALID(hg)); REQUIRE(keyp != NULL); @@ -382,7 +354,7 @@ isc_histo_next(isc_historead_t hr, uint *keyp) { key++; while (key < buckets && key % chunksize == 0 && - key_to_bucket(hr, key) == NULL) + key_to_bucket(hg, key) == NULL) { key += chunksize; } @@ -390,14 +362,14 @@ isc_histo_next(isc_historead_t hr, uint *keyp) { } void -isc_histo_merge(isc_histo_t **targetp, isc_historead_t source) { - REQUIRE(HISTO_VALID(source.hg)); +isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source) { + REQUIRE(HISTO_VALID(source)); REQUIRE(targetp != NULL); if (*targetp != NULL) { REQUIRE(HISTO_VALID(*targetp)); } else { - isc_histo_create(source.hg->mctx, source.hg->sigbits, targetp); + isc_histo_create(source->mctx, source->sigbits, targetp); } uint64_t min, max, count; @@ -450,7 +422,7 @@ isc_histomulti_destroy(isc_histomulti_t **hmp) { } void -isc_histomulti_merge(isc_histo_t **hgp, isc_histomulti_t *hm) { +isc_histomulti_merge(isc_histo_t **hgp, const isc_histomulti_t *hm) { REQUIRE(HISTOMULTI_VALID(hm)); for (uint i = 0; i < hm->size; i++) { @@ -477,17 +449,18 @@ isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value) { * equation 4 (incremental mean) and equation 44 (incremental variance) */ void -isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2) { - REQUIRE(HISTO_VALID(hr.hg)); +isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1, + double *pm2) { + REQUIRE(HISTO_VALID(hg)); - double pop = 0.0; + uint64_t pop = 0; double mean = 0.0; double sigma = 0.0; uint64_t min, max, count; for (uint key = 0; - isc_histo_get(hr, key, &min, &max, &count) == ISC_R_SUCCESS; - isc_histo_next(hr, &key)) + isc_histo_get(hg, key, &min, &max, &count) == ISC_R_SUCCESS; + isc_histo_next(hg, &key)) { if (count == 0) { /* avoid division by zero */ continue; @@ -504,166 +477,112 @@ isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2) { OUTARG(pm2, sqrt(sigma / pop)); } -/**********************************************************************/ +/* + * Clamped linear interpolation + * + * `outrange` should be `((1 << n) - 1)` for some `n`; when `n` is larger + * than 53, `outrange` can get rounded up to a power of 2, so we clamp the + * result to keep within bounds (extra important when `max == UINT64_MAX`) + */ +static inline uint64_t +lerp(uint64_t min, uint64_t max, uint64_t lo, uint64_t in, uint64_t hi) { + double inrange = (double)(hi - lo); + double inpart = (double)(in - lo); + double outrange = (double)(max - min); + double outpart = round(outrange * inpart / inrange); + return (min + ISC_MIN((uint64_t)outpart, max - min)); +} -void -isc_histosummary_create(isc_historead_t hr, isc_histosummary_t **hsp) { - const isc_histo_t *hg = hr.hg; +/* + * There is non-zero space for the inner value, and it is inside the bounds + */ +static inline bool +inside(uint64_t lo, uint64_t in, uint64_t hi) { + return (lo < hi && lo <= in && in <= hi); +} + +isc_result_t +isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction, + uint64_t *value) { + hg_bucket_t *chunk[CHUNKS]; + uint64_t total[CHUNKS]; + uint64_t rank[ISC_HISTO_MAXQUANTILES]; REQUIRE(HISTO_VALID(hg)); - REQUIRE(hsp != NULL); - REQUIRE(*hsp == NULL); + REQUIRE(0 < size && size <= ISC_HISTO_MAXQUANTILES); + REQUIRE(fraction != NULL); + REQUIRE(value != NULL); - uint chunksize = CHUNKSIZE(hg); - hg_bucket_t *chunk[CHUNKS] = { NULL }; + const uint maxchunk = MAXCHUNK(hg); + const uint chunksize = CHUNKSIZE(hg); /* - * First, find out which chunks we will copy across and how much - * space they need. We take a copy of the chunk pointers because - * concurrent threads may add new chunks before we have finished. + * Find out which chunks exist and what their totals are. We take a + * copy of the chunk pointers to reduce the need for atomic ops + * later on. Scan from low to high so that higher buckets are more + * likely to be in the CPU cache when we scan from high to low. */ - uint size = 0; - for (uint c = 0; c < CHUNKS; c++) { + uint64_t population = 0; + for (uint c = 0; c < maxchunk; c++) { chunk[c] = get_chunk(hg, c); + total[c] = 0; if (chunk[c] != NULL) { - size += chunksize; + for (uint b = chunksize; b-- > 0;) { + total[c] += bucket_count(&chunk[c][b]); + } + population += total[c]; } } - isc_histosummary_t *hs = - isc_mem_get(hg->mctx, STRUCT_FLEX_SIZE(hs, buckets, size)); - *hs = (isc_histosummary_t){ - .magic = HISTO_MAGIC, - .sigbits = hg->sigbits, - .size = size, - }; - isc_mem_attach(hg->mctx, &hs->mctx); - /* - * Second, copy the contents of the buckets. The copied pointers - * are faster than get_key_count() because get_chunk()'s atomics - * would require re-fetching the chunk pointer for every bucket. + * Now we know the population, we can convert fractions to ranks. + * Also ensure they are within bounds and in decreasing order. */ - uint maxkey = 0; - uint chunkbase = 0; - for (uint c = 0; c < CHUNKS; c++) { - if (chunk[c] == NULL) { - continue; - } - hs->chunk[c] = &hs->buckets[chunkbase]; - chunkbase += chunksize; - for (uint b = 0; b < chunksize; b++) { - uint64_t count = atomic_load_relaxed(&chunk[c][b]); - hs->chunk[c][b] = count; - hs->total[c] += count; - hs->population += count; - maxkey = (count == 0) ? maxkey : chunksize * c + b; - } + for (uint i = 0; i < size; i++) { + REQUIRE(0.0 <= fraction[i] && fraction[i] <= 1.0); + REQUIRE(i == 0 || fraction[i - 1] > fraction[i]); + rank[i] = round(fraction[i] * population); } - hs->maximum = key_to_maxval(hs, maxkey); - *hsp = hs; -} - -void -isc_histosummary_destroy(isc_histosummary_t **hsp) { - REQUIRE(hsp != NULL); - REQUIRE(HISTO_VALID(*hsp)); - - isc_histosummary_t *hs = *hsp; - *hsp = NULL; - - isc_mem_putanddetach(&hs->mctx, hs, - STRUCT_FLEX_SIZE(hs, buckets, hs->size)); -} - -/**********************************************************************/ - -isc_result_t -isc_histo_value_at_rank(const isc_histosummary_t *hs, uint64_t rank, - uint64_t *valuep) { - REQUIRE(HISTO_VALID(hs)); - REQUIRE(valuep != NULL); - - uint maxchunk = MAXCHUNK(hs); - uint chunksize = CHUNKSIZE(hs); - uint64_t count = 0; - uint b, c; - - if (rank > hs->population) { - return (ISC_R_RANGE); - } - if (rank == hs->population) { - *valuep = hs->maximum; - return (ISC_R_SUCCESS); - } - - for (c = 0; c < maxchunk; c++) { - count = hs->total[c]; - if (rank < count) { - break; - } - rank -= count; - } - INSIST(c < maxchunk); - - for (b = 0; b < chunksize; b++) { - count = hs->chunk[c][b]; - if (rank < count) { - break; + /* + * Scan chunks from high to low, keeping track of the bounds on + * each chunk's ranks. Each time we match `rank[i]`, move on to the + * next rank and continue the scan from the same place. + */ + uint i = 0; + uint64_t chunk_lo = population; + for (uint c = maxchunk; c-- > 0;) { + uint64_t chunk_hi = chunk_lo; + chunk_lo = chunk_hi - total[c]; + + /* + * Scan buckets backwards within this chunk, in a similar + * manner to the chunk scan. Skip all or part of the loop + * if the current rank is not in the chunk. + */ + uint64_t bucket_lo = chunk_hi; + for (uint b = chunksize; + b-- > 0 && inside(chunk_lo, rank[i], chunk_hi);) + { + uint64_t bucket_hi = bucket_lo; + bucket_lo = bucket_hi - bucket_count(&chunk[c][b]); + + /* + * Convert all ranks that fall in this bucket. + */ + while (inside(bucket_lo, rank[i], bucket_hi)) { + uint key = chunksize * c + b; + value[i] = lerp(key_to_minval(hg, key), + key_to_maxval(hg, key), + bucket_lo, rank[i], bucket_hi); + if (++i == size) { + return (ISC_R_SUCCESS); + } + } } - rank -= count; } - INSIST(b < chunksize); - - uint key = chunksize * c + b; - uint64_t min = key_to_minval(hs, key); - uint64_t max = key_to_maxval(hs, key); - *valuep = min + interpolate(max - min, rank, count); - return (ISC_R_SUCCESS); -} - -void -isc_histo_rank_of_value(const isc_histosummary_t *hs, uint64_t value, - uint64_t *rankp) { - REQUIRE(HISTO_VALID(hs)); - REQUIRE(rankp != NULL); - - uint key = value_to_key(hs, value); - uint chunksize = CHUNKSIZE(hs); - uint kc = key / chunksize; - uint kb = key % chunksize; - uint64_t rank = 0; - - for (uint c = 0; c < kc; c++) { - rank += hs->total[c]; - } - for (uint b = 0; b < kb; b++) { - rank += hs->chunk[kc][b]; - } - - uint64_t count = hs->chunk[kc][kb]; - uint64_t min = key_to_minval(hs, key); - uint64_t max = key_to_maxval(hs, key); - - *rankp = rank + interpolate(count, value - min, max - min); -} - -isc_result_t -isc_histo_quantile(const isc_histosummary_t *hs, double p, uint64_t *valuep) { - if (p < 0.0 || p > 1.0) { - return (ISC_R_RANGE); - } - double rank = round(hs->population * p); - return (isc_histo_value_at_rank(hs, (uint64_t)rank, valuep)); -} - -void -isc_histo_cdf(const isc_histosummary_t *hs, uint64_t value, double *pp) { - uint64_t rank; - isc_histo_rank_of_value(hs, value, &rank); - *pp = (double)rank / (double)hs->population; + return (ISC_R_UNSET); } /**********************************************************************/ diff --git a/lib/isc/include/isc/histo.h b/lib/isc/include/isc/histo.h index 461dbb75178..4c9123fbbc6 100644 --- a/lib/isc/include/isc/histo.h +++ b/lib/isc/include/isc/histo.h @@ -28,17 +28,16 @@ * The bits <-> digits functions convert betwen decimal significant * digits (as in scientific notation) and binary significant bits. * - * At the low end (near zero) there is one value per bucket, then two - * values, four, etc. - * * You can use the `isc_histo_get()` function to export data from the * histogram. The range of a bucket is returned as its minimum and - * maximum values, inclusive, i.e. a closed interval. Closed intervals - * are more inconvenient than half-open intervals, and half-open - * intervals are more common in C. We use closed intervals so we are - * able to express the maximum of the last bucket, UINT64_MAX, and - * because OpenMetrics histograms describe buckets as - * less-than-or-equal to a particular value. + * maximum values, inclusive, i.e. a closed interval. We use closed + * intervals so we are able to express the maximum of the last bucket, + * UINT64_MAX, although half-open intervals are more common in C. + * + * You can calculate some basic statistics directly from a histogram. + * The `isc_histo_quantiles()` function can get a histogram's median, + * 99th percentile, etc. The `isc_histo_moments()` function gets a + * histogram's population, mean, and standard deviation. * * The size of a histogram depends on the range of values in the * stream of samples, not the number of samples. Bucket counters are @@ -47,45 +46,32 @@ * most 64 chunks, one for each bit of a 64 bit value. Histograms with * greater precision have larger chunks. * - * The number of values that map to a bucket (1, 2, 4, 8, ...) is the - * same in each chunk. Chunks 0 and 1 have one value per bucket, (see - * `ISC_HISTO_UNITBUCKETS()` below), chunk 2 has 2 values per bucket, - * and they increase by a factor of 2 in each successive bucket. + * At the low end (values near zero) there is one value per bucket, + * then two values, four, eight, etc. The number of values that map to + * a bucket is the same in each chunk. Chunks 0 and 1 have one value + * per bucket, (see `ISC_HISTO_UNITBUCKETS()` below), chunk 2 has 2 + * values per bucket, chunk 3 has 4, etc. * * The update cost is roughly constant and very small (not much more * than an atomic increment). It mostly depends on cache locality and * thread contention. * - * To get statistical properties of a histogram (population, mean, - * standard deviation, CDF, quantiles, ranks) you must first construct - * an `isc_histosummary_t`. A summary is a read-only snapshot of a - * histogram augmented with information for calculating statistics - * more efficiently. - * - * There is no overflow checking for bucket counters. It takes a few - * nanoseconds to add a sample to the histogram, so it would take at - * least a few CPU-centuries to cause an overflow. Aggregate - * statistics from a quarter of a million CPUs might overflow in a - * day. (Provided that in both examples the CPUs are doing nothing - * apart from repeatedly adding 1 to histogram buckets.) + * There is no overflow checking for the 64 bit bucket counters. It + * takes a few nanoseconds to add a sample to the histogram, so it + * would take at least a few CPU-centuries to cause an overflow. + * Aggregate statistics from a quarter of a million CPUs might + * overflow in a day. (Provided that in both examples the CPUs are + * doing nothing apart from repeatedly adding 1 to histogram buckets.) */ -typedef struct isc_histo isc_histo_t; -typedef struct isc_histosummary isc_histosummary_t; -typedef struct isc_histomulti isc_histomulti_t; - -/* - * For functions that can take either type. - */ -typedef union isc_historead { - const isc_histo_t *hg; - const isc_histosummary_t *hs; -} isc_historead_t __attribute__((__transparent_union__)); +typedef struct isc_histo isc_histo_t; +typedef struct isc_histomulti isc_histomulti_t; -#define ISC_HISTO_MINBITS 1 -#define ISC_HISTO_MAXBITS 18 -#define ISC_HISTO_MINDIGITS 1 -#define ISC_HISTO_MAXDIGITS 6 +#define ISC_HISTO_MINBITS 1 +#define ISC_HISTO_MAXBITS 18 +#define ISC_HISTO_MINDIGITS 1 +#define ISC_HISTO_MAXDIGITS 6 +#define ISC_HISTO_MAXQUANTILES 101 /* enough for all the percentiles */ /* * How many values map 1:1 to buckets for a given number of sigbits? @@ -126,7 +112,7 @@ isc_histo_destroy(isc_histo_t **hgp); */ uint -isc_histo_sigbits(isc_historead_t hr); +isc_histo_sigbits(isc_histo_t *hg); /*%< * Get the histogram's `sigbits` setting * @@ -195,7 +181,7 @@ isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count); */ isc_result_t -isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp, +isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp, uint64_t *countp); /*%< * Export information about a bucket. @@ -218,7 +204,7 @@ isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp, * which can be zero. * * Requires: - *\li `hr` is a pointer to a valid histogram or summary + *\li `hg` is a pointer to a valid histogram * * Returns: *\li ISC_R_SUCCESS, if `key` is valid @@ -226,7 +212,7 @@ isc_histo_get(isc_historead_t hr, uint key, uint64_t *minp, uint64_t *maxp, */ void -isc_histo_next(isc_historead_t hr, uint *keyp); +isc_histo_next(const isc_histo_t *hg, uint *keyp); /*%< * Skip to the next key, omitting chunks of unallocated buckets. * @@ -245,12 +231,12 @@ isc_histo_next(isc_historead_t hr, uint *keyp); * } * * Requires: - *\li `hr` is a pointer to a valid histogram or summary + *\li `hg` is a pointer to a valid histogram *\li `keyp != NULL` */ void -isc_histo_merge(isc_histo_t **targetp, isc_historead_t source); +isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source); /*%< * Increase the counts in `*ptarget` by the counts recorded in `source` * @@ -264,7 +250,7 @@ isc_histo_merge(isc_histo_t **targetp, isc_historead_t source); * Requires: *\li `targetp != NULL` *\li `*targetp` is NULL or a pointer to a valid histogram - *\li `source` is a pointer to a valid histogram or summary + *\li `source` is a pointer to a valid histogram * * Ensures: *\li `*targetp` is a pointer to a valid histogram @@ -307,7 +293,7 @@ isc_histomulti_destroy(isc_histomulti_t **hmp); */ void -isc_histomulti_merge(isc_histo_t **targetp, isc_histomulti_t *source); +isc_histomulti_merge(isc_histo_t **targetp, const isc_histomulti_t *source); /*%< * Increase the counts in `*targetp` by the counts recorded in `source` * @@ -343,37 +329,9 @@ isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc); /**********************************************************************/ void -isc_histosummary_create(const isc_histo_t *hg, isc_histosummary_t **hsp); +isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1, double *pm2); /*%< - * Summarize a histogram for rank and quantile calculations. - * - * Requires: - *\li `hg` is a pointer to a valid histogram - *\li `hsp != NULL` - *\li `*hsp == NULL` - * - * Ensures: - *\li `*hsp` is a pointer to a histogram summary - */ - -void -isc_histosummary_destroy(isc_histosummary_t **hsp); -/*%< - * Destroy a histogram summary - * - * Requires: - *\li `hsp != NULL` - *\li `*hsp` is a pointer to a valid histogram summary - * - * Ensures: - *\li all memory allocated by the summary has been released - *\li `*hsp == NULL` - */ - -void -isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2); -/*%< - * Get the population, mean, and standard deviation of a histogram + * Get the population, mean, and standard deviation of a histogram. * * If `pm0` is non-NULL it is set to the population of the histogram. * (Strictly speaking, the zeroth moment is `pop / pop == 1`.) @@ -385,99 +343,49 @@ isc_histo_moments(isc_historead_t hr, double *pm0, double *pm1, double *pm2); * recorded data. The standard deviation is the square root of the * variance, which is the second moment about the mean. * - * Requires: - *\li `hr` is a pointer to a valid histogram or summary - */ - -isc_result_t -isc_histo_value_at_rank(const isc_histosummary_t *hs, uint64_t rank, - uint64_t *valuep); -/*%< - * Get the approximate value at a given rank in the recorded data. - * - * The value at rank 0 is the minimum of the bucket containing the - * smallest value added to the histogram. - * - * The value at rank equal to the population is the maximum of the - * bucket containing the largest value added to the histogram. - * - * Greater ranks return a range error. - * - * Note: this function is slow for high-precision histograms - * (more than 3 significant digits). - * - * Requires: - *\li `hs` is a pointer to a valid histogram summary - *\li `valuep != NULL` - * - * Returns: - *\li ISC_R_SUCCESS, if `rank` is within bounds - *\li ISC_R_RANGE, otherwise - */ - -void -isc_histo_rank_of_value(const isc_histosummary_t *hs, uint64_t value, - uint64_t *rankp); -/*%< - * Get the approximate rank of a value in the recorded data. - * - * You can query the rank of any value. - * - * Note: this function is slow for high-precision histograms - * (more than 3 significant digits). + * It is safe if the histogram is concurrently modified. * * Requires: - *\li `hs` is a pointer to a valid histogram summary - *\li `rankp != NULL` + *\li `hg` is a pointer to a valid histogram */ isc_result_t -isc_histo_quantile(const isc_histosummary_t *hs, double proportion, - uint64_t *valuep); +isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction, + uint64_t *value); /*%< * The quantile function (aka inverse cumulative distribution function) - * of the histogram. What value is greater than the given proportion of + * of the histogram. What value is greater than the given fraction of * the population? * - * A proportion of 0.5 gets the median value: it is greater than half + * A fraction of 0.5 gets the median value: it is greater than half * the population. 0.75 gets the third quartile value, and 0.99 gets - * the 99th percentile value. The proportion should be between 0.0 and - * 1.0 inclusive. + * the 99th percentile value. The fraction must be between 0.0 and 1.0 + * inclusive. * * https://enwp.org/Quantile_function * - * Note: this function is slow for high-precision histograms - * (more than 3 significant digits). + * This implementation allows you to query quantile values for + * multiple fractions in one function call. Internally, it makes one + * linear scan over the histogram's buckets to find all the fractions. + * Buckets are scanned from high to low, so that querying large + * quantiles is more efficient. The `fraction` array must be sorted in + * decreasing order. The results are stored in the `value` array. Both + * arrays have `size` elements. + * + * The results may be nonsense if the histogram is concurrently + * modified. To get a stable copy you can call `isc_histo_merge()`. * * Requires: - *\li `hs` is a pointer to a valid histogram summary - *\li `valuep != NULL` + *\li `hg` is a pointer to a valid histogram + *\li `0 < size && size <= ISC_HISTO_MAXQUANTILES` + *\li `fraction != NULL` + *\li `value != NULL` + *\li `0.0 <= fraction[i] && fraction[i] <= 1.0` for every element + *\li `fraction[i - 1] > fraction[i]` for every pair of elements * * Returns: - *\li ISC_R_SUCCESS, if the proportion is in bounds - *\li ISC_R_RANGE, otherwise - */ - -void -isc_histo_cdf(const isc_histosummary_t *hs, uint64_t value, - double *proportionp); -/*%< - * The cumulative distribution function of the histogram. Given a - * value, what proportion of the population have smaller values? - * You can query any value. - * - * If the value is the median, the proportion is 0.5. The proportion - * of the third quartile value is 0.75, and the proportion of the 99th - * percentile value is 0.99. - * - * https://enwp.org/Cumulative_distribution_function - * - * Note: this function is slow for high-precision histograms - * (more than 3 significant digits). - * - * Requires: - *\li `hs` is a pointer to a valid histogram summary - *\li `proportionp != NULL` + *\li ISC_R_SUCCESS, if results were stored in the `value` array + *\li ISC_R_UNSET, if the histogram is empty */ /**********************************************************************/ diff --git a/tests/isc/histo_test.c b/tests/isc/histo_test.c index 2355ab1f32a..79224170b95 100644 --- a/tests/isc/histo_test.c +++ b/tests/isc/histo_test.c @@ -32,6 +32,8 @@ #define TIME_LIMIT (123 * NS_PER_MS) +#define SUBRANGE 69 + #if VERBOSE #define TRACE(fmt, ...) \ @@ -117,18 +119,106 @@ ISC_RUN_TEST_IMPL(basics) { prev_max = max; key++; result = isc_histo_get(hg, key, &min, &max, &count); + + /* these tests can be slow */ + if (isc_time_monotonic() > start + TIME_LIMIT) { + break; + } + } + + /* if we did not stop early */ + if (result != ISC_R_SUCCESS) { + /* last bucket goes up to last possible value */ + assert_int_equal(max, UINT64_MAX); + + double pop; + isc_histo_moments(hg, &pop, NULL, NULL); + assert_int_equal((uint64_t)pop, key * 8); } - /* last bucket goes up to last possible value */ - assert_int_equal(max, UINT64_MAX); + isc_histo_destroy(&hg); + + TRACETIME("%u keys", key); + } +} +ISC_RUN_TEST_IMPL(quantiles) { + for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) { + isc_result_t result; + uint64_t min, max, count; double pop; + uint key; + + isc_nanosecs_t start = isc_time_monotonic(); + + isc_histo_t *hg = NULL; + isc_histo_create(mctx, bits, &hg); + + for (key = 0; isc_histo_get(hg, key, &min, &max, &count) == + ISC_R_SUCCESS; + key++) + { + /* inc twice so we can check bucket's midpoint */ + assert_int_equal(count, 0); + isc_histo_inc(hg, min); + isc_histo_inc(hg, max); + } + + const uint buckets = key; + + /* no incs were lost */ isc_histo_moments(hg, &pop, NULL, NULL); - assert_int_equal((uint64_t)pop, key * 8); + assert_float_equal(pop, buckets * 2, 0.5); + + /* two ranks per bucket */ + const uint quantum = ISC_HISTO_MAXQUANTILES / 2 - 1; + uint64_t value[ISC_HISTO_MAXQUANTILES]; + double frac[ISC_HISTO_MAXQUANTILES]; + uint base = 0; + + for (key = 0; key < buckets; key++) { + /* fill in the values one quantum at a time */ + if (key == 0 || key % quantum == buckets % quantum) { + base = key; + for (uint k = 0; k < quantum; k++) { + double rank = (base + k) * 2; + uint i = (quantum - k) * 2; + frac[i - 1] = (rank + 1.0) / pop; + frac[i - 0] = rank / pop; + } + frac[0] = (base + quantum) * 2 / pop; + result = isc_histo_quantiles( + hg, quantum * 2 + 1, frac, value); + assert_int_equal(result, ISC_R_SUCCESS); + } + + result = isc_histo_get(hg, key, &min, &max, &count); + assert_int_equal(result, ISC_R_SUCCESS); + assert_int_equal(count, 2); + + uint64_t lomin = min == 0 ? min : min - 1; + uint64_t himin = min; + uint64_t lomid = floor(min / 2.0 + max / 2.0); + uint64_t himid = ceil(min / 2.0 + max / 2.0); + uint64_t lomax = max; + uint64_t himax = max == UINT64_MAX ? max : max + 1; + + uint i = (quantum + base - key) * 2; + + /* check fenceposts */ + assert_in_range(value[i - 0], lomin, himin); + assert_in_range(value[i - 1], lomid, himid); + assert_in_range(value[i - 2], lomax, himax); + + /* these tests can be slow */ + if (isc_time_monotonic() > start + TIME_LIMIT) { + break; + } + } isc_histo_destroy(&hg); - TRACETIME("%u keys", key); + TRACETIME(""); } } @@ -183,106 +273,26 @@ ISC_RUN_TEST_IMPL(sigfigs) { } } -ISC_RUN_TEST_IMPL(summary) { +ISC_RUN_TEST_IMPL(subrange) { for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) { isc_result_t result; - uint64_t min, max, count, value, rank, lorank, hirank; - double pop; - uint key; + uint64_t min, max, count; isc_nanosecs_t start = isc_time_monotonic(); isc_histo_t *hg = NULL; isc_histo_create(mctx, bits, &hg); - for (key = 0; isc_histo_get(hg, key, &min, &max, &count) == - ISC_R_SUCCESS; - key++) - { - /* inc twice so we can check bucket's midpoint */ - assert_int_equal(count, 0); - isc_histo_inc(hg, min); - isc_histo_inc(hg, max); - } - - isc_histosummary_t *hs = NULL; - isc_histosummary_create(hg, &hs); - - /* no incs were lost */ - isc_histo_moments(hg, &pop, NULL, NULL); - assert_float_equal(pop, 2 * key, 0.5); - - isc_histo_destroy(&hg); - - for (key = 0; isc_histo_get(hs, key, &min, &max, &count) == - ISC_R_SUCCESS; - isc_histo_next(hs, &key)) - { - uint64_t lomin = min == 0 ? min : min - 1; - uint64_t himin = min; - uint64_t lomid = floor(min / 2.0 + max / 2.0); - uint64_t himid = ceil(min / 2.0 + max / 2.0); - uint64_t lomax = max; - uint64_t himax = max == UINT64_MAX ? max : max + 1; - - assert_int_equal(count, 2); - - rank = key * 2; - - /* check fenceposts */ - result = isc_histo_value_at_rank(hs, rank, &value); - assert_int_equal(result, ISC_R_SUCCESS); - assert_in_range(value, lomin, himin); - result = isc_histo_value_at_rank(hs, rank + 1, &value); - assert_int_equal(result, ISC_R_SUCCESS); - assert_in_range(value, lomid, himid); - result = isc_histo_value_at_rank(hs, rank + 2, &value); - assert_int_equal(result, ISC_R_SUCCESS); - assert_in_range(value, lomax, himax); - - isc_histo_rank_of_value(hs, min, &rank); - assert_int_equal(rank, key * 2); - - /* only if the bucket covers enough distinct values */ - - if (min < lomid) { - rank = key * 2 + 1; - isc_histo_rank_of_value(hs, lomid, &lorank); - isc_histo_rank_of_value(hs, himid, &hirank); - assert_in_range(rank, lorank, hirank); - } - - if (himid < max) { - rank = key * 2 + 2; - isc_histo_rank_of_value(hs, lomax, &lorank); - isc_histo_rank_of_value(hs, himax, &hirank); - assert_in_range(rank, lorank, hirank); - } - - /* these tests can be slow */ - if (isc_time_monotonic() > start + TIME_LIMIT) { - break; - } + uint64_t value[SUBRANGE + 1]; + double frac[SUBRANGE + 1]; + for (uint i = 0; i <= SUBRANGE; i++) { + frac[i] = (double)(SUBRANGE - i) / (double)(SUBRANGE); } - isc_histosummary_destroy(&hs); + result = isc_histo_quantiles(hg, ARRAY_SIZE(frac), frac, value); + assert_int_equal(result, ISC_R_UNSET); - TRACETIME(""); - } -} - -ISC_RUN_TEST_IMPL(subrange) { - for (uint bits = ISC_HISTO_MINBITS; bits <= ISC_HISTO_MAXBITS; bits++) { - isc_result_t result; - uint64_t min, max, count, value; - - isc_nanosecs_t start = isc_time_monotonic(); - - isc_histo_t *hg = NULL; - isc_histo_create(mctx, bits, &hg); - - uint buckets = 64; - for (uint key = 0, top = buckets - 1;; key++, top++) { + for (uint key = 0, top = SUBRANGE - 1;; key++, top++) { if (isc_histo_get(hg, key, &min, NULL, NULL) != ISC_R_SUCCESS) { @@ -293,27 +303,28 @@ ISC_RUN_TEST_IMPL(subrange) { { break; } - isc_histo_put(hg, min, max, buckets); - - isc_histosummary_t *hs = NULL; - isc_histosummary_create(hg, &hs); + /* + * If we try adding more than one sample per bucket + * here, the test fails when buckets have different + * sizes because [min,max] spans multiple chunks. + */ + isc_histo_put(hg, min, max, SUBRANGE); + + result = isc_histo_quantiles(hg, ARRAY_SIZE(frac), frac, + value); + assert_int_equal(result, ISC_R_SUCCESS); - for (uint bucket = 0; bucket < buckets; bucket++) { + for (uint bucket = 0; bucket < SUBRANGE; bucket++) { result = isc_histo_get(hg, key + bucket, &min, &max, &count); assert_int_equal(result, ISC_R_SUCCESS); /* did isc_histo_put() spread evenly? */ assert_int_equal(count, 1); - result = isc_histo_value_at_rank(hs, bucket, - &value); - assert_int_equal(result, ISC_R_SUCCESS); - assert_int_equal(value, min); + /* do the quantile values match? */ + assert_int_equal(value[SUBRANGE - bucket], min); } - result = isc_histo_value_at_rank(hs, buckets, &value); - assert_int_equal(result, ISC_R_SUCCESS); - assert_int_equal(value, max); + assert_int_equal(value[0], max); - isc_histosummary_destroy(&hs); isc_histo_destroy(&hg); isc_histo_create(mctx, bits, &hg); @@ -331,8 +342,8 @@ ISC_RUN_TEST_IMPL(subrange) { ISC_TEST_LIST_START ISC_TEST_ENTRY(basics) +ISC_TEST_ENTRY(quantiles) ISC_TEST_ENTRY(sigfigs) -ISC_TEST_ENTRY(summary) ISC_TEST_ENTRY(subrange) ISC_TEST_LIST_END