When DEBUG_THREAD > 0, and if task profiling is enabled, then each
locking attempt will measure the time it takes to obtain the lock, then
add that time to a thread_ctx accumulator that the scheduler will then
retrieve to update the current task's sched_activity entry. The value
will then appear avearaged over the number of calls in the lkw_avg column
of "show profiling tasks", such as below:
Tasks activity over 48.298 sec till 0.000 sec ago:
function calls cpu_tot cpu_avg lkw_avg lat_avg
h1_io_cb
3200170 26.81s 8.377us - 32.73us <- sock_conn_iocb@src/sock.c:1099 tasklet_wakeup
sc_conn_io_cb
1657841 1.645s 992.0ns - 853.0us <- sc_app_chk_rcv_conn@src/stconn.c:844 tasklet_wakeup
process_stream
1600450 49.16s 30.71us 1.936us 1.392ms <- sc_notify@src/stconn.c:1206 task_wakeup
process_stream
1600321 7.770m 291.3us 209.1us 901.6us <- stream_new@src/stream.c:585 task_wakeup
sc_conn_io_cb
1599928 7.975s 4.984us - 65.77us <- h1_wake_stream_for_recv@src/mux_h1.c:3633 tasklet_wakeup
task_process_applet 997609 46.37s 46.48us 16.80us 113.0us <- sc_app_chk_snd_applet@src/stconn.c:1043 appctx_wakeup
process_table_expire 922074 48.79s 52.92us 7.275us 181.1us <- run_tasks_from_lists@src/task.c:670 task_queue
stktable_add_pend_updates 705423 1.511s 2.142us - 56.81us <- stktable_add_pend_updates@src/stick_table.c:869 tasklet_wakeup
task_process_applet 683511 34.75s 50.84us 18.37us 153.3us <- __process_running_peer_sync@src/peers.c:3579 appctx_wakeup
h1_io_cb 535395 198.1ms 370.0ns 72.00ns 930.4us <- h1_takeover@src/mux_h1.c:5659 tasklet_wakeup
It now makes it pretty obvious which tasks (hence call chains) spend their
time waiting on a lock and for what share of their execution time.
} while (0)
#define _lock_wait(_LK_, lbl, expr) do { \
+ uint64_t lock_start = 0; \
+ extern uint64_t now_mono_time(void); \
+ if (_LK_ != _LK_UN) { \
+ if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) \
+ lock_start = now_mono_time(); \
+ } \
(void)(expr); \
+ if (_LK_ != _LK_UN && unlikely(lock_start)) \
+ th_ctx->lock_wait_total += now_mono_time() - lock_start; \
if (lbl != OTHER_LOCK) \
_lock_wait_common(_LK_, lbl); \
} while (0)
uint32_t sched_wake_date; /* current task/tasklet's wake date in 32-bit ns or 0 if not supported */
uint64_t sched_call_date; /* current task/tasklet's call date in ns */
+ uint64_t lock_wait_total; /* total time in ns spent waiting for a lock (task prof) */
uint64_t prev_mono_time; /* previous system wide monotonic time (leaving poll) */
uint64_t curr_mono_time; /* latest system wide monotonic time (leaving poll) */
ulong lock_history; /* history of used locks, see thread.h for more details */
- // third cache line here on 64 bits: accessed mostly using atomic ops
+ /* around 56 unused bytes here */
+
+ // fourth cache line here on 64 bits: accessed mostly using atomic ops
ALWAYS_ALIGN(64);
struct mt_list shared_tasklet_list; /* Tasklet to be run, woken up by other threads */
unsigned int rqueue_ticks; /* Insertion counter for the run queue */
process = t->process;
t->calls++;
+ th_ctx->lock_wait_total = 0;
th_ctx->sched_wake_date = t->wake_date;
if (th_ctx->sched_wake_date || (t->state & TASK_F_WANTS_TIME)) {
/* take the most accurate clock we have, either
__ha_barrier_store();
/* stats are only registered for non-zero wake dates */
- if (unlikely(th_ctx->sched_wake_date))
+ if (unlikely(th_ctx->sched_wake_date)) {
HA_ATOMIC_ADD(&profile_entry->cpu_time, (uint32_t)(now_mono_time() - th_ctx->sched_call_date));
+ if (th_ctx->lock_wait_total)
+ HA_ATOMIC_ADD(&profile_entry->lkw_time, th_ctx->lock_wait_total);
+ }
}
th_ctx->current_queue = -1;
th_ctx->sched_wake_date = TICK_ETERNITY;