From: Willy Tarreau Date: Thu, 11 Sep 2025 08:47:35 +0000 (+0200) Subject: MINOR: activity: collect time spent waiting on a lock for each task X-Git-Tag: v3.3-dev9~131 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=503084643f72357e585cc08266815d0bbe254a64;p=thirdparty%2Fhaproxy.git MINOR: activity: collect time spent waiting on a lock for each task When DEBUG_THREAD > 0, and if task profiling is enabled, then each locking attempt will measure the time it takes to obtain the lock, then add that time to a thread_ctx accumulator that the scheduler will then retrieve to update the current task's sched_activity entry. The value will then appear avearaged over the number of calls in the lkw_avg column of "show profiling tasks", such as below: Tasks activity over 48.298 sec till 0.000 sec ago: function calls cpu_tot cpu_avg lkw_avg lat_avg h1_io_cb 3200170 26.81s 8.377us - 32.73us <- sock_conn_iocb@src/sock.c:1099 tasklet_wakeup sc_conn_io_cb 1657841 1.645s 992.0ns - 853.0us <- sc_app_chk_rcv_conn@src/stconn.c:844 tasklet_wakeup process_stream 1600450 49.16s 30.71us 1.936us 1.392ms <- sc_notify@src/stconn.c:1206 task_wakeup process_stream 1600321 7.770m 291.3us 209.1us 901.6us <- stream_new@src/stream.c:585 task_wakeup sc_conn_io_cb 1599928 7.975s 4.984us - 65.77us <- h1_wake_stream_for_recv@src/mux_h1.c:3633 tasklet_wakeup task_process_applet 997609 46.37s 46.48us 16.80us 113.0us <- sc_app_chk_snd_applet@src/stconn.c:1043 appctx_wakeup process_table_expire 922074 48.79s 52.92us 7.275us 181.1us <- run_tasks_from_lists@src/task.c:670 task_queue stktable_add_pend_updates 705423 1.511s 2.142us - 56.81us <- stktable_add_pend_updates@src/stick_table.c:869 tasklet_wakeup task_process_applet 683511 34.75s 50.84us 18.37us 153.3us <- __process_running_peer_sync@src/peers.c:3579 appctx_wakeup h1_io_cb 535395 198.1ms 370.0ns 72.00ns 930.4us <- h1_takeover@src/mux_h1.c:5659 tasklet_wakeup It now makes it pretty obvious which tasks (hence call chains) spend their time waiting on a lock and for what share of their execution time. --- diff --git a/include/haproxy/thread.h b/include/haproxy/thread.h index ad0ae1aa7..c030c5e0f 100644 --- a/include/haproxy/thread.h +++ b/include/haproxy/thread.h @@ -360,7 +360,15 @@ static inline unsigned long thread_isolated() } while (0) #define _lock_wait(_LK_, lbl, expr) do { \ + uint64_t lock_start = 0; \ + extern uint64_t now_mono_time(void); \ + if (_LK_ != _LK_UN) { \ + if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) \ + lock_start = now_mono_time(); \ + } \ (void)(expr); \ + if (_LK_ != _LK_UN && unlikely(lock_start)) \ + th_ctx->lock_wait_total += now_mono_time() - lock_start; \ if (lbl != OTHER_LOCK) \ _lock_wait_common(_LK_, lbl); \ } while (0) diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h index 9e379ae5b..78084c497 100644 --- a/include/haproxy/tinfo-t.h +++ b/include/haproxy/tinfo-t.h @@ -161,13 +161,16 @@ struct thread_ctx { uint32_t sched_wake_date; /* current task/tasklet's wake date in 32-bit ns or 0 if not supported */ uint64_t sched_call_date; /* current task/tasklet's call date in ns */ + uint64_t lock_wait_total; /* total time in ns spent waiting for a lock (task prof) */ uint64_t prev_mono_time; /* previous system wide monotonic time (leaving poll) */ uint64_t curr_mono_time; /* latest system wide monotonic time (leaving poll) */ ulong lock_history; /* history of used locks, see thread.h for more details */ - // third cache line here on 64 bits: accessed mostly using atomic ops + /* around 56 unused bytes here */ + + // fourth cache line here on 64 bits: accessed mostly using atomic ops ALWAYS_ALIGN(64); struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */ unsigned int rqueue_ticks; /* Insertion counter for the run queue */ diff --git a/src/task.c b/src/task.c index 72be355f0..fdd2775ba 100644 --- a/src/task.c +++ b/src/task.c @@ -569,6 +569,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[]) process = t->process; t->calls++; + th_ctx->lock_wait_total = 0; th_ctx->sched_wake_date = t->wake_date; if (th_ctx->sched_wake_date || (t->state & TASK_F_WANTS_TIME)) { /* take the most accurate clock we have, either @@ -678,8 +679,11 @@ unsigned int run_tasks_from_lists(unsigned int budgets[]) __ha_barrier_store(); /* stats are only registered for non-zero wake dates */ - if (unlikely(th_ctx->sched_wake_date)) + if (unlikely(th_ctx->sched_wake_date)) { HA_ATOMIC_ADD(&profile_entry->cpu_time, (uint32_t)(now_mono_time() - th_ctx->sched_call_date)); + if (th_ctx->lock_wait_total) + HA_ATOMIC_ADD(&profile_entry->lkw_time, th_ctx->lock_wait_total); + } } th_ctx->current_queue = -1; th_ctx->sched_wake_date = TICK_ETERNITY;