When DEBUG_THREAD > 0 and task profiling enabled, we'll now measure the
time spent with at least one lock held for each task. The time is
collected by locking operations when locks are taken raising the level
to one, or released resetting the level. An accumulator is updated in
the thread_ctx struct that is collected by the scheduler when the task
returns, and updated in the sched_activity entry of the related task.
This allows to observe figures like this one:
Tasks activity over 259.516 sec till 0.000 sec ago:
function calls cpu_tot cpu_avg lkw_avg lkd_avg lat_avg
h1_io_cb
15466589 2.574m 9.984us - - 33.45us <- sock_conn_iocb@src/sock.c:1099 tasklet_wakeup
sc_conn_io_cb
8047994 8.325s 1.034us - - 870.1us <- sc_app_chk_rcv_conn@src/stconn.c:844 tasklet_wakeup
process_stream
7734689 4.356m 33.79us 1.990us 1.641us 1.554ms <- sc_notify@src/stconn.c:1206 task_wakeup
process_stream
7734292 46.74m 362.6us 278.3us 132.2us 972.0us <- stream_new@src/stream.c:585 task_wakeup
sc_conn_io_cb
7733158 46.88s 6.061us - - 68.78us <- h1_wake_stream_for_recv@src/mux_h1.c:3633 tasklet_wakeup
task_process_applet
6603593 4.484m 40.74us 16.69us 34.00us 96.47us <- sc_app_chk_snd_applet@src/stconn.c:1043 appctx_wakeup
task_process_applet
4761796 3.420m 43.09us 18.79us 39.28us 138.2us <- __process_running_peer_sync@src/peers.c:3579 appctx_wakeup
process_table_expire
4710662 4.880m 62.16us 9.648us 53.95us 158.6us <- run_tasks_from_lists@src/task.c:671 task_queue
stktable_add_pend_updates
4171868 6.786s 1.626us - 1.487us 47.94us <- stktable_add_pend_updates@src/stick_table.c:869 tasklet_wakeup
h1_io_cb
2871683 1.198s 417.0ns 70.00ns 69.00ns 1.005ms <- h1_takeover@src/mux_h1.c:5659 tasklet_wakeup
process_peer_sync
2304957 5.368s 2.328us - 1.156us 68.54us <- stktable_add_pend_updates@src/stick_table.c:873 task_wakeup
process_peer_sync
1388141 3.174s 2.286us - 1.130us 52.31us <- run_tasks_from_lists@src/task.c:671 task_queue
stktable_add_pend_updates 463488 3.530s 7.615us 2.000ns 7.134us 771.2us <- stktable_touch_with_exp@src/stick_table.c:654 tasklet_wakeup
Here we see that almost the entirety of stktable_add_pend_updates() is
spent under a lock, that 1/3 of the execution time of process_stream()
was performed under a lock and that 2/3 of it was spent waiting for a
lock (this is related to the 10 track-sc present in this config), and
that the locking time in process_peer_sync() has now significantly
reduced. This is more visible with "show profiling tasks aggr":
Tasks activity over 475.354 sec till 0.000 sec ago:
function calls cpu_tot cpu_avg lkw_avg lkd_avg lat_avg
h1_io_cb
25742539 3.699m 8.622us 11.00ns 10.00ns 188.0us
sc_conn_io_cb
22565666 1.475m 3.920us - - 473.9us
process_stream
21665212 1.195h 198.6us 140.6us 67.08us 1.266ms
task_process_applet
16352495 11.31m 41.51us 17.98us 36.55us 112.3us
process_peer_sync
7831923 17.15s 2.189us - 1.107us 41.27us
process_table_expire
6878569 6.866m 59.89us 9.359us 51.91us 151.8us
stktable_add_pend_updates
6602502 14.77s 2.236us - 2.060us 119.8us
h1_timeout_task 801 703.4us 878.0ns - - 185.7us
srv_cleanup_toremove_conns 347 12.43ms 35.82us 240.0ns 70.00ns 1.924ms
accept_queue_process 142 1.384ms 9.743us - - 340.6us
srv_cleanup_idle_conns 74 475.0us 6.418us 896.0ns 5.667us 114.6us
lock_start = now_mono_time(); \
} \
(void)(expr); \
- if (_LK_ == _LK_UN) \
+ if (_LK_ == _LK_UN) { \
th_ctx->lock_level += bal; \
- else if (unlikely(lock_start)) \
- th_ctx->lock_wait_total += now_mono_time() - lock_start; \
+ if (th_ctx->lock_level == 0 && unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) \
+ th_ctx->locked_total += now_mono_time() - th_ctx->lock_start_date; \
+ } else if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) { \
+ uint64_t now = now_mono_time(); \
+ if (lock_start) \
+ th_ctx->lock_wait_total += now - lock_start; \
+ if (th_ctx->lock_level == 1) \
+ th_ctx->lock_start_date = now; \
+ } \
if (lbl != OTHER_LOCK) \
_lock_wait_common(_LK_, lbl); \
} while (0)
#define _lock_cond(_LK_, bal, lbl, expr) ({ \
typeof(expr) _expr = (expr); \
- if (_expr == 0) \
+ if (_expr == 0) { \
th_ctx->lock_level += bal; \
+ if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) { \
+ if (_LK_ == _LK_UN && th_ctx->lock_level == 0) \
+ th_ctx->locked_total += now_mono_time() - th_ctx->lock_start_date; \
+ else if (_LK_ != _LK_UN && th_ctx->lock_level == 1) \
+ th_ctx->lock_start_date = now_mono_time(); \
+ } \
+ } \
if (lbl != OTHER_LOCK && !_expr) \
_lock_wait_common(_LK_, lbl); \
_expr; \
uint32_t sched_wake_date; /* current task/tasklet's wake date in 32-bit ns or 0 if not supported */
uint64_t sched_call_date; /* current task/tasklet's call date in ns */
uint64_t lock_wait_total; /* total time in ns spent waiting for a lock (task prof) */
+ uint64_t lock_start_date; /* date when first locked was obtained (task prof) */
+ uint64_t locked_total; /* total time in ns spent with at least one lock held (task prof) */
uint64_t prev_mono_time; /* previous system wide monotonic time (leaving poll) */
uint64_t curr_mono_time; /* latest system wide monotonic time (leaving poll) */
t->calls++;
th_ctx->lock_wait_total = 0;
+ th_ctx->locked_total = 0;
th_ctx->sched_wake_date = t->wake_date;
if (th_ctx->sched_wake_date || (t->state & TASK_F_WANTS_TIME)) {
/* take the most accurate clock we have, either
HA_ATOMIC_ADD(&profile_entry->cpu_time, (uint32_t)(now_mono_time() - th_ctx->sched_call_date));
if (th_ctx->lock_wait_total)
HA_ATOMIC_ADD(&profile_entry->lkw_time, th_ctx->lock_wait_total);
+ if (th_ctx->locked_total)
+ HA_ATOMIC_ADD(&profile_entry->lkd_time, th_ctx->locked_total);
}
}
th_ctx->current_queue = -1;