From: Willy Tarreau Date: Mon, 24 Jul 2023 14:18:25 +0000 (+0200) Subject: MEDIUM: pools: move the needed_avg counter over a few buckets X-Git-Tag: v2.9-dev3~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=da6999f83901ed176d3da14d75bc782353a4faa3;p=thirdparty%2Fhaproxy.git MEDIUM: pools: move the needed_avg counter over a few buckets That's the same principle as for ->allocated and ->used. Here we return the summ of the raw values, so the result still needs to be fed to swrate_avg(). It also means that we now use the local ->used instead of the global one for the calculations and do not need to call pool_used() anymore on fast paths. The number of samples should likely be divided by the number of buckets, but that's not done yet (better observe first). A function pool_needed_avg() was added to report aggregated values for the "show pools" command. With this change, an h2load made of 5 * 160 conn * 40 streams on 80 threads raised from 1.5M RPS to 6.7M RPS. --- diff --git a/include/haproxy/pool-t.h b/include/haproxy/pool-t.h index 1b1dfc4143..3cbab8bf16 100644 --- a/include/haproxy/pool-t.h +++ b/include/haproxy/pool-t.h @@ -122,7 +122,6 @@ struct pool_head { THREAD_ALIGN(64); struct pool_item *free_list; /* list of free shared objects */ - unsigned int needed_avg;/* floating indicator between used and allocated */ unsigned int failed; /* failed allocations */ /* these entries depend on the pointer value, they're used to reduce @@ -133,6 +132,7 @@ struct pool_head { THREAD_ALIGN(64); unsigned int allocated; /* how many chunks have been allocated */ unsigned int used; /* how many chunks are currently in use */ + unsigned int needed_avg;/* floating indicator between used and allocated */ } buckets[CONFIG_HAP_POOL_BUCKETS]; struct pool_cache_head cache[MAX_THREADS] THREAD_ALIGNED(64); /* pool caches */ diff --git a/include/haproxy/pool.h b/include/haproxy/pool.h index 8522b691ca..b999c392d6 100644 --- a/include/haproxy/pool.h +++ b/include/haproxy/pool.h @@ -158,6 +158,19 @@ static inline uint pool_used(const struct pool_head *pool) return ret; } +/* returns the raw total number needed entries across all buckets. It must + * be passed to swrate_avg() to get something usable. + */ +static inline uint pool_needed_avg(const struct pool_head *pool) +{ + int bucket; + uint ret; + + for (bucket = ret = 0; bucket < CONFIG_HAP_POOL_BUCKETS; bucket++) + ret += HA_ATOMIC_LOAD(&pool->buckets[bucket].needed_avg); + return ret; +} + /* Returns the max number of entries that may be brought back to the pool * before it's considered as full. Note that it is only usable for releasing * objects, hence the function assumes that no more than ->used entries will @@ -171,6 +184,7 @@ static inline uint pool_used(const struct pool_head *pool) static inline uint pool_releasable(const struct pool_head *pool) { uint alloc, used; + uint needed_raw; if (unlikely(pool_debugging & (POOL_DBG_NO_CACHE|POOL_DBG_NO_GLOBAL))) return 0; @@ -180,7 +194,8 @@ static inline uint pool_releasable(const struct pool_head *pool) if (used < alloc) used = alloc; - if (alloc < swrate_avg(pool->needed_avg + pool->needed_avg / 4, POOL_AVG_SAMPLES)) + needed_raw = pool_needed_avg(pool); + if (alloc < swrate_avg(needed_raw + needed_raw / 4, POOL_AVG_SAMPLES)) return used; // less than needed is allocated, can release everything if ((uint)(alloc - used) < pool->minavail) diff --git a/src/pool.c b/src/pool.c index 0416233cdf..e2f2e7e4c0 100644 --- a/src/pool.c +++ b/src/pool.c @@ -428,7 +428,7 @@ void *pool_alloc_nocache(struct pool_head *pool) return NULL; bucket = pool_pbucket(ptr); - swrate_add_scaled_opportunistic(&pool->needed_avg, POOL_AVG_SAMPLES, pool_used(pool), POOL_AVG_SAMPLES/4); + swrate_add_scaled_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, pool->buckets[bucket].used, POOL_AVG_SAMPLES/4); _HA_ATOMIC_INC(&pool->buckets[bucket].allocated); _HA_ATOMIC_INC(&pool->buckets[bucket].used); @@ -448,7 +448,7 @@ void pool_free_nocache(struct pool_head *pool, void *ptr) _HA_ATOMIC_DEC(&pool->buckets[bucket].used); _HA_ATOMIC_DEC(&pool->buckets[bucket].allocated); - swrate_add_opportunistic(&pool->needed_avg, POOL_AVG_SAMPLES, pool_used(pool)); + swrate_add_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, pool->buckets[bucket].used); pool_put_to_os_nodec(pool, ptr); } @@ -530,7 +530,7 @@ static void pool_evict_last_items(struct pool_head *pool, struct pool_cache_head /* will never match when global pools are disabled */ uint bucket = pool_pbucket(item); _HA_ATOMIC_DEC(&pool->buckets[bucket].used); - swrate_add_opportunistic(&pool->needed_avg, POOL_AVG_SAMPLES, pool_used(pool)); + swrate_add_opportunistic(&pool->buckets[bucket].needed_avg, POOL_AVG_SAMPLES, pool->buckets[bucket].used); pi = (struct pool_item *)item; pi->next = NULL; @@ -984,7 +984,7 @@ void dump_pools_to_trash(int by_what, int max, const char *pfx) pool_info[nbpools].alloc_bytes = (ulong)entry->size * alloc_items; pool_info[nbpools].used_items = pool_used(entry); pool_info[nbpools].cached_items = cached; - pool_info[nbpools].need_avg = swrate_avg(entry->needed_avg, POOL_AVG_SAMPLES); + pool_info[nbpools].need_avg = swrate_avg(pool_needed_avg(entry), POOL_AVG_SAMPLES); pool_info[nbpools].failed_items = entry->failed; nbpools++; }