]> git.ipfire.org Git - thirdparty/haproxy.git/commitdiff
MINOR: activity: collect time spent waiting on a lock for each task
authorWilly Tarreau <w@1wt.eu>
Thu, 11 Sep 2025 08:47:35 +0000 (10:47 +0200)
committerWilly Tarreau <w@1wt.eu>
Thu, 11 Sep 2025 14:32:34 +0000 (16:32 +0200)
When DEBUG_THREAD > 0, and if task profiling is enabled, then each
locking attempt will measure the time it takes to obtain the lock, then
add that time to a thread_ctx accumulator that the scheduler will then
retrieve to update the current task's sched_activity entry. The value
will then appear avearaged over the number of calls in the lkw_avg column
of "show profiling tasks", such as below:

  Tasks activity over 48.298 sec till 0.000 sec ago:
    function                      calls   cpu_tot   cpu_avg   lkw_avg   lat_avg
    h1_io_cb                    3200170   26.81s    8.377us      -      32.73us <- sock_conn_iocb@src/sock.c:1099 tasklet_wakeup
    sc_conn_io_cb               1657841   1.645s    992.0ns      -      853.0us <- sc_app_chk_rcv_conn@src/stconn.c:844 tasklet_wakeup
    process_stream              1600450   49.16s    30.71us   1.936us   1.392ms <- sc_notify@src/stconn.c:1206 task_wakeup
    process_stream              1600321   7.770m    291.3us   209.1us   901.6us <- stream_new@src/stream.c:585 task_wakeup
    sc_conn_io_cb               1599928   7.975s    4.984us      -      65.77us <- h1_wake_stream_for_recv@src/mux_h1.c:3633 tasklet_wakeup
    task_process_applet          997609   46.37s    46.48us   16.80us   113.0us <- sc_app_chk_snd_applet@src/stconn.c:1043 appctx_wakeup
    process_table_expire         922074   48.79s    52.92us   7.275us   181.1us <- run_tasks_from_lists@src/task.c:670 task_queue
    stktable_add_pend_updates    705423   1.511s    2.142us      -      56.81us <- stktable_add_pend_updates@src/stick_table.c:869 tasklet_wakeup
    task_process_applet          683511   34.75s    50.84us   18.37us   153.3us <- __process_running_peer_sync@src/peers.c:3579 appctx_wakeup
    h1_io_cb                     535395   198.1ms   370.0ns   72.00ns   930.4us <- h1_takeover@src/mux_h1.c:5659 tasklet_wakeup

It now makes it pretty obvious which tasks (hence call chains) spend their
time waiting on a lock and for what share of their execution time.

include/haproxy/thread.h
include/haproxy/tinfo-t.h
src/task.c

index ad0ae1aa749991e4bbc6dc3e537822c7f025bfa9..c030c5e0f0d1a34987a35899dbef945895b08e20 100644 (file)
@@ -360,7 +360,15 @@ static inline unsigned long thread_isolated()
        } while (0)
 
 #define _lock_wait(_LK_, lbl, expr) do {                               \
+               uint64_t lock_start = 0;                                \
+               extern uint64_t now_mono_time(void);                    \
+               if (_LK_ != _LK_UN) {                                   \
+                       if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) \
+                               lock_start = now_mono_time();           \
+               }                                                       \
                (void)(expr);                                           \
+               if (_LK_ != _LK_UN && unlikely(lock_start))             \
+                       th_ctx->lock_wait_total += now_mono_time() - lock_start; \
                if (lbl != OTHER_LOCK)                                  \
                        _lock_wait_common(_LK_, lbl);                   \
        } while (0)
index 9e379ae5b458c3b9d785bb2abebf6a569897947a..78084c497ac1e179fb1f35de883722be5eb8806d 100644 (file)
@@ -161,13 +161,16 @@ struct thread_ctx {
 
        uint32_t sched_wake_date;           /* current task/tasklet's wake date in 32-bit ns or 0 if not supported */
        uint64_t sched_call_date;           /* current task/tasklet's call date in ns */
+       uint64_t lock_wait_total;           /* total time in ns spent waiting for a lock (task prof) */
 
        uint64_t prev_mono_time;            /* previous system wide monotonic time (leaving poll) */
        uint64_t curr_mono_time;            /* latest system wide monotonic time (leaving poll) */
 
        ulong lock_history;                 /* history of used locks, see thread.h for more details */
 
-       // third cache line here on 64 bits: accessed mostly using atomic ops
+       /* around 56 unused bytes here */
+
+       // fourth cache line here on 64 bits: accessed mostly using atomic ops
        ALWAYS_ALIGN(64);
        struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */
        unsigned int rqueue_ticks;          /* Insertion counter for the run queue */
index 72be355f051fae67b3930a37d5d2df2790dc01c8..fdd2775baf20b4577e01ea02267631f5ac58c92d 100644 (file)
@@ -569,6 +569,7 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
                process = t->process;
                t->calls++;
 
+               th_ctx->lock_wait_total = 0;
                th_ctx->sched_wake_date = t->wake_date;
                if (th_ctx->sched_wake_date || (t->state & TASK_F_WANTS_TIME)) {
                        /* take the most accurate clock we have, either
@@ -678,8 +679,11 @@ unsigned int run_tasks_from_lists(unsigned int budgets[])
                __ha_barrier_store();
 
                /* stats are only registered for non-zero wake dates */
-               if (unlikely(th_ctx->sched_wake_date))
+               if (unlikely(th_ctx->sched_wake_date)) {
                        HA_ATOMIC_ADD(&profile_entry->cpu_time, (uint32_t)(now_mono_time() - th_ctx->sched_call_date));
+                       if (th_ctx->lock_wait_total)
+                               HA_ATOMIC_ADD(&profile_entry->lkw_time, th_ctx->lock_wait_total);
+               }
        }
        th_ctx->current_queue = -1;
        th_ctx->sched_wake_date = TICK_ETERNITY;