]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MEDIUM: activity: apply and use new finegrained task profiling settings
authorWilly Tarreau <w@1wt.eu>
Tue, 10 Feb 2026 13:17:36 +0000 (14:17 +0100)
committerWilly Tarreau <w@1wt.eu>
Tue, 10 Feb 2026 16:52:59 +0000 (17:52 +0100)
In continuity of previous patch, this one makes use of the new profiling
flags. For this, based on the global "profiling" setting, when switching
profiling on, we set or clear two flags on the thread context,
TH_FL_TASK_PROFILING_L and TH_FL_TASK_PROFILING_M to indicate whether
lock profiling and/or malloc profiling are desired when profiling is
enabled. These flags are checked along with TH_FL_TASK_PROFILING to
decide when to collect time around a lock or a malloc. And by default
we're back to the behavior of 3.2 in that neither lock nor malloc times
are collected anymore.

This is sufficient to see the CPU usage spent in the VDSO to significantly
drop from 22% to 2.2% on a highly loaded system.

This should be backported to 3.3 along with the previous patch.

include/haproxy/thread.h
include/haproxy/tinfo-t.h
src/activity.c
src/pool.c

index cde5c6aa57dbedeef1bbd3b06f0c3ddde30e99d5..c97566b92a6b9f743daea84f2433b71047ea5dc3 100644 (file)
@@ -362,15 +362,19 @@ static inline unsigned long thread_isolated()
                extern uint64_t now_mono_time(void);                    \
                if (_LK_ != _LK_UN) {                                   \
                        th_ctx->lock_level += bal;                      \
-                       if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) \
+                       if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L)) == \
+                                    (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L))) \
                                lock_start = now_mono_time();           \
                }                                                       \
                (void)(expr);                                           \
                if (_LK_ == _LK_UN) {                                   \
                        th_ctx->lock_level += bal;                      \
-                       if (th_ctx->lock_level == 0 && unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) \
+                       if (th_ctx->lock_level == 0 &&\
+                           unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L)) == \
+                                    (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L))) \
                                th_ctx->locked_total += now_mono_time() - th_ctx->lock_start_date; \
-               } else if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) { \
+               } else if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L)) == \
+                                    (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L))) { \
                        uint64_t now = now_mono_time();                 \
                        if (lock_start)                                 \
                                th_ctx->lock_wait_total += now - lock_start; \
@@ -384,7 +388,8 @@ static inline unsigned long thread_isolated()
                typeof(expr) _expr = (expr);                            \
                if (_expr == 0) {                                       \
                        th_ctx->lock_level += bal;                      \
-                       if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) { \
+                       if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L)) == \
+                                    (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_L))) { \
                                if (_LK_ == _LK_UN && th_ctx->lock_level == 0) \
                                        th_ctx->locked_total += now_mono_time() - th_ctx->lock_start_date; \
                                else if (_LK_ != _LK_UN && th_ctx->lock_level == 1) \
index 62e87f87e47a51d3e238f3633b7df3a9c00973ac..dc713cc1f48f0ccb5da33b7d28ee894ad1d944df 100644 (file)
@@ -69,6 +69,8 @@ enum {
 #define TH_FL_IN_DBG_HANDLER    0x00000100  /* thread currently in the debug signal handler */
 #define TH_FL_IN_WDT_HANDLER    0x00000200  /* thread currently in the wdt signal handler */
 #define TH_FL_IN_ANY_HANDLER    0x00000380  /* mask to test if the thread is in any signal handler */
+#define TH_FL_TASK_PROFILING_L  0x00000400  /* task profiling in locks (also requires TASK_PROFILING) */
+#define TH_FL_TASK_PROFILING_M  0x00000800  /* task profiling in mem alloc (also requires TASK_PROFILING) */
 
 /* we have 4 buffer-wait queues, in highest to lowest emergency order */
 #define DYNBUF_NBQ              4
index 4cc2386de58952712d6198897304a83a013648e4..8f794abb146f9a79fcbcfc8b849e2086ffb46d80 100644 (file)
@@ -659,8 +659,20 @@ void activity_count_runtime(uint32_t run_time)
        if (!(_HA_ATOMIC_LOAD(&th_ctx->flags) & TH_FL_TASK_PROFILING)) {
                if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_ON ||
                             ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AON &&
-                            swrate_avg(run_time, TIME_STATS_SAMPLES) >= up)))
+                             swrate_avg(run_time, TIME_STATS_SAMPLES) >= up))) {
+
+                       if (profiling & HA_PROF_TASKS_LOCK)
+                               _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_TASK_PROFILING_L);
+                       else
+                               _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_TASK_PROFILING_L);
+
+                       if (profiling & HA_PROF_TASKS_MEM)
+                               _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_TASK_PROFILING_M);
+                       else
+                               _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_TASK_PROFILING_M);
+
                        _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_TASK_PROFILING);
+               }
        } else {
                if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_OFF ||
                             ((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AOFF &&
index b76bd83683ce0410bec680a788194a0801058c64..c52da365eccc43dff87392d13a4a3a1b1982a2fe 100644 (file)
@@ -806,7 +806,8 @@ void pool_put_to_cache(struct pool_head *pool, void *ptr, const void *caller)
        if (unlikely(pool_cache_bytes > global.tune.pool_cache_size * 3 / 4)) {
                uint64_t mem_wait_start = 0;
 
-               if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING))
+               if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)) ==
+                            (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)))
                        mem_wait_start = now_mono_time();
 
                if (ph->count >= 16 + pool_cache_count / 8 + CONFIG_HAP_POOL_CLUSTER_SIZE)
@@ -969,7 +970,8 @@ void pool_gc(struct pool_head *pool_ctx)
        uint64_t mem_wait_start = 0;
        int isolated = thread_isolated();
 
-       if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING))
+       if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)) ==
+                    (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)))
                mem_wait_start = now_mono_time();
 
        if (!isolated)
@@ -1031,7 +1033,8 @@ void *__pool_alloc(struct pool_head *pool, unsigned int flags)
                /* count allocation time only for cache misses */
                uint64_t mem_wait_start = 0;
 
-               if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING))
+               if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)) ==
+                            (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)))
                        mem_wait_start = now_mono_time();
 
                p = pool_alloc_nocache(pool, caller);
@@ -1109,7 +1112,8 @@ void __pool_free(struct pool_head *pool, void *ptr)
                     global.tune.pool_cache_size < pool->size)) {
                uint64_t mem_wait_start = 0;
 
-               if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING))
+               if (unlikely((th_ctx->flags & (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)) ==
+                            (TH_FL_TASK_PROFILING|TH_FL_TASK_PROFILING_M)))
                        mem_wait_start = now_mono_time();
 
                pool_free_nocache(pool, ptr);