From: Willy Tarreau Date: Thu, 11 Sep 2025 12:52:00 +0000 (+0200) Subject: MINOR: activity: collect CPU time spent on memory allocations for each task X-Git-Tag: v3.3-dev9~126 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=9d8c2a888bab14102c2b78d8e0b6f54c9877ae9b;p=thirdparty%2Fhaproxy.git MINOR: activity: collect CPU time spent on memory allocations for each task When task profiling is enabled, the pool alloc/free code will measure the time it takes to perform memory allocation after a cache miss or memory freeing to the shared cache or OS. The time taken with the thread-local cache is never measured as measuring that time is very expensive compared to the pool access time. Here doing so costs around 2% performance at 2M req/s, only when task profiling is enabled, so this remains reasonable. The scheduler takes care of collecting that time and updating the sched_activity entry corresponding to the current task when task profiling is enabled. The goal clearly is to track places that are wasting CPU time allocating and releasing too often, or causing large evictions. This appears like this in "show profiling tasks aggr": Tasks activity over 11.428 sec till 0.000 sec ago: function calls cpu_tot cpu_avg lkw_avg lkd_avg mem_avg lat_avg process_stream 44183891 16.47m 22.36us 491.0ns 1.154us 1.000ns 101.1us h1_io_cb 57386064 4.011m 4.193us 20.00ns 16.00ns - 29.47us sc_conn_io_cb 42088024 49.04s 1.165us - - - 54.67us h1_timeout_task 438171 196.5ms 448.0ns - - - 100.1us srv_cleanup_toremove_conns 65 1.468ms 22.58us 184.0ns 87.00ns - 101.3us task_process_applet 3 508.0us 169.3us - 107.0us 1.847us 29.67us srv_cleanup_idle_conns 6 225.3us 37.55us 15.74us 36.84us - 49.47us accept_queue_process 2 45.62us 22.81us - - 4.949us 54.33us --- diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h index 7d550e7e6..8968cfa09 100644 --- a/include/haproxy/tinfo-t.h +++ b/include/haproxy/tinfo-t.h @@ -161,6 +161,7 @@ struct thread_ctx { uint32_t sched_wake_date; /* current task/tasklet's wake date in 32-bit ns or 0 if not supported */ uint64_t sched_call_date; /* current task/tasklet's call date in ns */ uint64_t lock_wait_total; /* total time in ns spent waiting for a lock (task prof) */ + uint64_t mem_wait_total; /* total time in ns spent allocating/freeing memory (task prof) */ uint64_t lock_start_date; /* date when first locked was obtained (task prof) */ uint64_t locked_total; /* total time in ns spent with at least one lock held (task prof) */ diff --git a/src/pool.c b/src/pool.c index d7928c37c..d824b2b93 100644 --- a/src/pool.c +++ b/src/pool.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -787,10 +788,18 @@ void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller) pool_cache_bytes += pool->size; if (unlikely(pool_cache_bytes > global.tune.pool_cache_size * 3 / 4)) { + uint64_t mem_wait_start = 0; + + if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) + mem_wait_start = now_mono_time(); + if (ph->count >= 16 + pool_cache_count / 8 + CONFIG_HAP_POOL_CLUSTER_SIZE) pool_evict_from_local_cache(pool, 0); if (pool_cache_bytes > global.tune.pool_cache_size) pool_evict_from_local_caches(); + + if (unlikely(mem_wait_start)) + th_ctx->mem_wait_total += now_mono_time() - mem_wait_start; } } @@ -941,8 +950,12 @@ void pool_flush(struct pool_head *pool) void pool_gc(struct pool_head *pool_ctx) { struct pool_head *entry; + uint64_t mem_wait_start = 0; int isolated = thread_isolated(); + if (th_ctx->flags & TH_FL_TASK_PROFILING) + mem_wait_start = now_mono_time(); + if (!isolated) thread_isolate(); @@ -975,6 +988,9 @@ void pool_gc(struct pool_head *pool_ctx) if (!isolated) thread_release(); + + if (mem_wait_start) + th_ctx->mem_wait_total += now_mono_time() - mem_wait_start; } /* @@ -995,9 +1011,19 @@ void *__pool_alloc(struct pool_head *pool, unsigned int flags) if (likely(!(pool_debugging & POOL_DBG_NO_CACHE)) && !p) p = pool_get_from_cache(pool, caller); - if (unlikely(!p)) + if (unlikely(!p)) { + /* count allocation time only for cache misses */ + uint64_t mem_wait_start = 0; + + if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) + mem_wait_start = now_mono_time(); + p = pool_alloc_nocache(pool, caller); + if (unlikely(mem_wait_start)) + th_ctx->mem_wait_total += now_mono_time() - mem_wait_start; + } + if (likely(p)) { #ifdef USE_MEMORY_PROFILING if (unlikely(profiling & HA_PROF_MEMORY)) { @@ -1061,11 +1087,20 @@ void __pool_free(struct pool_head *pool, void *ptr) if (unlikely((pool_debugging & POOL_DBG_NO_CACHE) || (pool->flags & MEM_F_UAF) || global.tune.pool_cache_size < pool->size)) { + uint64_t mem_wait_start = 0; + + if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) + mem_wait_start = now_mono_time(); + pool_free_nocache(pool, ptr); - return; - } - pool_put_to_cache(pool, ptr, caller); + if (unlikely(mem_wait_start)) + th_ctx->mem_wait_total += now_mono_time() - mem_wait_start; + } + else { + /* this one will count its own time itself */ + pool_put_to_cache(pool, ptr, caller); + } } /* diff --git a/src/task.c b/src/task.c index 0de5a588d..4f7ef2bb2 100644 --- a/src/task.c +++ b/src/task.c @@ -570,6 +570,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[]) t->calls++; th_ctx->lock_wait_total = 0; + th_ctx->mem_wait_total = 0; th_ctx->locked_total = 0; th_ctx->sched_wake_date = t->wake_date; if (th_ctx->sched_wake_date || (t->state & TASK_F_WANTS_TIME)) { @@ -684,6 +685,8 @@ unsigned int run_tasks_from_lists(unsigned int budgets[]) HA_ATOMIC_ADD(&profile_entry->cpu_time, (uint32_t)(now_mono_time() - th_ctx->sched_call_date)); if (th_ctx->lock_wait_total) HA_ATOMIC_ADD(&profile_entry->lkw_time, th_ctx->lock_wait_total); + if (th_ctx->mem_wait_total) + HA_ATOMIC_ADD(&profile_entry->mem_time, th_ctx->mem_wait_total); if (th_ctx->locked_total) HA_ATOMIC_ADD(&profile_entry->lkd_time, th_ctx->locked_total); }