]>
Commit | Line | Data |
---|---|---|
27e348b2 DBO |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org> | |
4 | */ | |
5 | ||
6 | #include <stdlib.h> | |
7 | #include <errno.h> | |
8 | #include "utils.h" | |
9 | #include "osnoise.h" | |
10 | #include "timerlat.h" | |
c66552be | 11 | #include <unistd.h> |
27e348b2 DBO |
12 | |
13 | enum timelat_state { | |
14 | TIMERLAT_INIT = 0, | |
15 | TIMERLAT_WAITING_IRQ, | |
16 | TIMERLAT_WAITING_THREAD, | |
17 | }; | |
18 | ||
19 | #define MAX_COMM 24 | |
20 | ||
21 | /* | |
22 | * Per-cpu data statistics and data. | |
23 | */ | |
24 | struct timerlat_aa_data { | |
25 | /* Current CPU state */ | |
26 | int curr_state; | |
27 | ||
28 | /* timerlat IRQ latency */ | |
29 | unsigned long long tlat_irq_seqnum; | |
30 | unsigned long long tlat_irq_latency; | |
31 | unsigned long long tlat_irq_timstamp; | |
32 | ||
33 | /* timerlat Thread latency */ | |
34 | unsigned long long tlat_thread_seqnum; | |
35 | unsigned long long tlat_thread_latency; | |
36 | unsigned long long tlat_thread_timstamp; | |
37 | ||
38 | /* | |
39 | * Information about the thread running when the IRQ | |
40 | * arrived. | |
41 | * | |
42 | * This can be blocking or interference, depending on the | |
43 | * priority of the thread. Assuming timerlat is the highest | |
44 | * prio, it is blocking. If timerlat has a lower prio, it is | |
45 | * interference. | |
46 | * note: "unsigned long long" because they are fetch using tep_get_field_val(); | |
47 | */ | |
48 | unsigned long long run_thread_pid; | |
49 | char run_thread_comm[MAX_COMM]; | |
50 | unsigned long long thread_blocking_duration; | |
51 | unsigned long long max_exit_idle_latency; | |
52 | ||
53 | /* Information about the timerlat timer irq */ | |
54 | unsigned long long timer_irq_start_time; | |
55 | unsigned long long timer_irq_start_delay; | |
56 | unsigned long long timer_irq_duration; | |
57 | unsigned long long timer_exit_from_idle; | |
58 | ||
59 | /* | |
60 | * Information about the last IRQ before the timerlat irq | |
61 | * arrived. | |
62 | * | |
63 | * If now - timestamp is <= latency, it might have influenced | |
64 | * in the timerlat irq latency. Otherwise, ignore it. | |
65 | */ | |
66 | unsigned long long prev_irq_duration; | |
67 | unsigned long long prev_irq_timstamp; | |
68 | ||
69 | /* | |
70 | * Interference sum. | |
71 | */ | |
72 | unsigned long long thread_nmi_sum; | |
73 | unsigned long long thread_irq_sum; | |
74 | unsigned long long thread_softirq_sum; | |
75 | unsigned long long thread_thread_sum; | |
76 | ||
77 | /* | |
78 | * Interference task information. | |
79 | */ | |
80 | struct trace_seq *prev_irqs_seq; | |
81 | struct trace_seq *nmi_seq; | |
82 | struct trace_seq *irqs_seq; | |
83 | struct trace_seq *softirqs_seq; | |
84 | struct trace_seq *threads_seq; | |
85 | struct trace_seq *stack_seq; | |
86 | ||
87 | /* | |
88 | * Current thread. | |
89 | */ | |
90 | char current_comm[MAX_COMM]; | |
91 | unsigned long long current_pid; | |
92 | ||
93 | /* | |
94 | * Is the system running a kworker? | |
95 | */ | |
96 | unsigned long long kworker; | |
97 | unsigned long long kworker_func; | |
98 | }; | |
99 | ||
100 | /* | |
101 | * The analysis context and system wide view | |
102 | */ | |
103 | struct timerlat_aa_context { | |
104 | int nr_cpus; | |
105 | int dump_tasks; | |
106 | ||
107 | /* per CPU data */ | |
108 | struct timerlat_aa_data *taa_data; | |
109 | ||
110 | /* | |
111 | * required to translate function names and register | |
112 | * events. | |
113 | */ | |
114 | struct osnoise_tool *tool; | |
115 | }; | |
116 | ||
117 | /* | |
118 | * The data is stored as a local variable, but accessed via a helper function. | |
119 | * | |
120 | * It could be stored inside the trace context. But every access would | |
121 | * require container_of() + a series of pointers. Do we need it? Not sure. | |
122 | * | |
123 | * For now keep it simple. If needed, store it in the tool, add the *context | |
124 | * as a parameter in timerlat_aa_get_ctx() and do the magic there. | |
125 | */ | |
126 | static struct timerlat_aa_context *__timerlat_aa_ctx; | |
127 | ||
128 | static struct timerlat_aa_context *timerlat_aa_get_ctx(void) | |
129 | { | |
130 | return __timerlat_aa_ctx; | |
131 | } | |
132 | ||
133 | /* | |
134 | * timerlat_aa_get_data - Get the per-cpu data from the timerlat context | |
135 | */ | |
136 | static struct timerlat_aa_data | |
137 | *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu) | |
138 | { | |
139 | return &taa_ctx->taa_data[cpu]; | |
140 | } | |
141 | ||
142 | /* | |
143 | * timerlat_aa_irq_latency - Handles timerlat IRQ event | |
144 | */ | |
145 | static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data, | |
146 | struct trace_seq *s, struct tep_record *record, | |
147 | struct tep_event *event) | |
148 | { | |
149 | /* | |
150 | * For interference, we start now looking for things that can delay | |
151 | * the thread. | |
152 | */ | |
153 | taa_data->curr_state = TIMERLAT_WAITING_THREAD; | |
154 | taa_data->tlat_irq_timstamp = record->ts; | |
155 | ||
156 | /* | |
157 | * Zero values. | |
158 | */ | |
159 | taa_data->thread_nmi_sum = 0; | |
160 | taa_data->thread_irq_sum = 0; | |
161 | taa_data->thread_softirq_sum = 0; | |
02d89917 | 162 | taa_data->thread_thread_sum = 0; |
27e348b2 DBO |
163 | taa_data->thread_blocking_duration = 0; |
164 | taa_data->timer_irq_start_time = 0; | |
165 | taa_data->timer_irq_duration = 0; | |
166 | taa_data->timer_exit_from_idle = 0; | |
167 | ||
168 | /* | |
169 | * Zero interference tasks. | |
170 | */ | |
171 | trace_seq_reset(taa_data->nmi_seq); | |
172 | trace_seq_reset(taa_data->irqs_seq); | |
173 | trace_seq_reset(taa_data->softirqs_seq); | |
174 | trace_seq_reset(taa_data->threads_seq); | |
175 | ||
176 | /* IRQ latency values */ | |
177 | tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1); | |
178 | tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1); | |
179 | ||
180 | /* The thread that can cause blocking */ | |
181 | tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1); | |
182 | ||
183 | /* | |
184 | * Get exit from idle case. | |
185 | * | |
186 | * If it is not idle thread: | |
187 | */ | |
188 | if (taa_data->run_thread_pid) | |
189 | return 0; | |
190 | ||
191 | /* | |
192 | * if the latency is shorter than the known exit from idle: | |
193 | */ | |
194 | if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency) | |
195 | return 0; | |
196 | ||
197 | /* | |
198 | * To be safe, ignore the cases in which an IRQ/NMI could have | |
199 | * interfered with the timerlat IRQ. | |
200 | */ | |
201 | if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency | |
202 | < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) | |
203 | return 0; | |
204 | ||
205 | taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency; | |
206 | ||
207 | return 0; | |
208 | } | |
209 | ||
210 | /* | |
211 | * timerlat_aa_thread_latency - Handles timerlat thread event | |
212 | */ | |
213 | static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data, | |
214 | struct trace_seq *s, struct tep_record *record, | |
215 | struct tep_event *event) | |
216 | { | |
217 | /* | |
218 | * For interference, we start now looking for things that can delay | |
219 | * the IRQ of the next cycle. | |
220 | */ | |
221 | taa_data->curr_state = TIMERLAT_WAITING_IRQ; | |
222 | taa_data->tlat_thread_timstamp = record->ts; | |
223 | ||
224 | /* Thread latency values */ | |
225 | tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1); | |
226 | tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1); | |
227 | ||
228 | return 0; | |
229 | } | |
230 | ||
231 | /* | |
232 | * timerlat_aa_handler - Handle timerlat events | |
233 | * | |
234 | * This function is called to handle timerlat events recording statistics. | |
235 | * | |
236 | * Returns 0 on success, -1 otherwise. | |
237 | */ | |
c66552be | 238 | static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record, |
27e348b2 DBO |
239 | struct tep_event *event, void *context) |
240 | { | |
241 | struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); | |
242 | struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); | |
243 | unsigned long long thread; | |
244 | ||
245 | if (!taa_data) | |
246 | return -1; | |
247 | ||
248 | tep_get_field_val(s, event, "context", record, &thread, 1); | |
249 | if (!thread) | |
250 | return timerlat_aa_irq_latency(taa_data, s, record, event); | |
251 | else | |
252 | return timerlat_aa_thread_latency(taa_data, s, record, event); | |
253 | } | |
254 | ||
255 | /* | |
256 | * timerlat_aa_nmi_handler - Handles NMI noise | |
257 | * | |
258 | * It is used to collect information about interferences from NMI. It is | |
259 | * hooked to the osnoise:nmi_noise event. | |
260 | */ | |
261 | static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record, | |
262 | struct tep_event *event, void *context) | |
263 | { | |
264 | struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); | |
265 | struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); | |
266 | unsigned long long duration; | |
267 | unsigned long long start; | |
268 | ||
269 | tep_get_field_val(s, event, "duration", record, &duration, 1); | |
270 | tep_get_field_val(s, event, "start", record, &start, 1); | |
271 | ||
272 | if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { | |
273 | taa_data->prev_irq_duration = duration; | |
274 | taa_data->prev_irq_timstamp = start; | |
275 | ||
276 | trace_seq_reset(taa_data->prev_irqs_seq); | |
277 | trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s \t\t\t%9.2f us\n", | |
278 | "nmi", ns_to_usf(duration)); | |
279 | return 0; | |
280 | } | |
281 | ||
282 | taa_data->thread_nmi_sum += duration; | |
283 | trace_seq_printf(taa_data->nmi_seq, " %24s \t\t\t%9.2f us\n", | |
284 | "nmi", ns_to_usf(duration)); | |
285 | ||
286 | return 0; | |
287 | } | |
288 | ||
289 | /* | |
290 | * timerlat_aa_irq_handler - Handles IRQ noise | |
291 | * | |
292 | * It is used to collect information about interferences from IRQ. It is | |
293 | * hooked to the osnoise:irq_noise event. | |
294 | * | |
295 | * It is a little bit more complex than the other because it measures: | |
296 | * - The IRQs that can delay the timer IRQ before it happened. | |
297 | * - The Timerlat IRQ handler | |
298 | * - The IRQs that happened between the timerlat IRQ and the timerlat thread | |
299 | * (IRQ interference). | |
300 | */ | |
301 | static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record, | |
302 | struct tep_event *event, void *context) | |
303 | { | |
304 | struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); | |
305 | struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); | |
306 | unsigned long long expected_start; | |
307 | unsigned long long duration; | |
308 | unsigned long long vector; | |
309 | unsigned long long start; | |
310 | char *desc; | |
311 | int val; | |
312 | ||
313 | tep_get_field_val(s, event, "duration", record, &duration, 1); | |
314 | tep_get_field_val(s, event, "start", record, &start, 1); | |
315 | tep_get_field_val(s, event, "vector", record, &vector, 1); | |
316 | desc = tep_get_field_raw(s, event, "desc", record, &val, 1); | |
317 | ||
318 | /* | |
319 | * Before the timerlat IRQ. | |
320 | */ | |
321 | if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) { | |
322 | taa_data->prev_irq_duration = duration; | |
323 | taa_data->prev_irq_timstamp = start; | |
324 | ||
325 | trace_seq_reset(taa_data->prev_irqs_seq); | |
326 | trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu \t\t%9.2f us\n", | |
327 | desc, vector, ns_to_usf(duration)); | |
328 | return 0; | |
329 | } | |
330 | ||
331 | /* | |
332 | * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at | |
333 | * the timerlat irq handler. | |
334 | */ | |
335 | if (!taa_data->timer_irq_start_time) { | |
336 | expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency; | |
337 | ||
338 | taa_data->timer_irq_start_time = start; | |
339 | taa_data->timer_irq_duration = duration; | |
340 | ||
6c73daf2 DBO |
341 | /* |
342 | * We are dealing with two different clock sources: the | |
343 | * external clock source that timerlat uses as a reference | |
344 | * and the clock used by the tracer. There are also two | |
345 | * moments: the time reading the clock and the timer in | |
346 | * which the event is placed in the buffer (the trace | |
347 | * event timestamp). If the processor is slow or there | |
348 | * is some hardware noise, the difference between the | |
349 | * timestamp and the external clock read can be longer | |
350 | * than the IRQ handler delay, resulting in a negative | |
351 | * time. If so, set IRQ start delay as 0. In the end, | |
352 | * it is less relevant than the noise. | |
353 | */ | |
354 | if (expected_start < taa_data->timer_irq_start_time) | |
355 | taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start; | |
356 | else | |
357 | taa_data->timer_irq_start_delay = 0; | |
27e348b2 DBO |
358 | |
359 | /* | |
360 | * not exit from idle. | |
361 | */ | |
362 | if (taa_data->run_thread_pid) | |
363 | return 0; | |
364 | ||
365 | if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) | |
366 | taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay; | |
367 | ||
368 | return 0; | |
369 | } | |
370 | ||
371 | /* | |
372 | * IRQ interference. | |
373 | */ | |
374 | taa_data->thread_irq_sum += duration; | |
375 | trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu \t %9.2f us\n", | |
376 | desc, vector, ns_to_usf(duration)); | |
377 | ||
378 | return 0; | |
379 | } | |
380 | ||
381 | static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", | |
382 | "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" }; | |
383 | ||
384 | ||
385 | /* | |
386 | * timerlat_aa_softirq_handler - Handles Softirq noise | |
387 | * | |
388 | * It is used to collect information about interferences from Softirq. It is | |
389 | * hooked to the osnoise:softirq_noise event. | |
390 | * | |
391 | * It is only printed in the non-rt kernel, as softirqs become thread on RT. | |
392 | */ | |
393 | static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record, | |
394 | struct tep_event *event, void *context) | |
395 | { | |
396 | struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); | |
397 | struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); | |
398 | unsigned long long duration; | |
399 | unsigned long long vector; | |
400 | unsigned long long start; | |
401 | ||
402 | if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) | |
403 | return 0; | |
404 | ||
405 | tep_get_field_val(s, event, "duration", record, &duration, 1); | |
406 | tep_get_field_val(s, event, "start", record, &start, 1); | |
407 | tep_get_field_val(s, event, "vector", record, &vector, 1); | |
408 | ||
409 | taa_data->thread_softirq_sum += duration; | |
410 | ||
411 | trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t %9.2f us\n", | |
412 | softirq_name[vector], vector, ns_to_usf(duration)); | |
413 | return 0; | |
414 | } | |
415 | ||
416 | /* | |
417 | * timerlat_aa_softirq_handler - Handles thread noise | |
418 | * | |
419 | * It is used to collect information about interferences from threads. It is | |
420 | * hooked to the osnoise:thread_noise event. | |
421 | * | |
422 | * Note: if you see thread noise, your timerlat thread was not the highest prio one. | |
423 | */ | |
424 | static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record, | |
425 | struct tep_event *event, void *context) | |
426 | { | |
427 | struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); | |
428 | struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); | |
429 | unsigned long long duration; | |
430 | unsigned long long start; | |
431 | unsigned long long pid; | |
432 | const char *comm; | |
433 | int val; | |
434 | ||
435 | if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) | |
436 | return 0; | |
437 | ||
438 | tep_get_field_val(s, event, "duration", record, &duration, 1); | |
439 | tep_get_field_val(s, event, "start", record, &start, 1); | |
440 | ||
441 | tep_get_common_field_val(s, event, "common_pid", record, &pid, 1); | |
442 | comm = tep_get_field_raw(s, event, "comm", record, &val, 1); | |
443 | ||
444 | if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) { | |
445 | taa_data->thread_blocking_duration = duration; | |
446 | ||
447 | if (comm) | |
448 | strncpy(taa_data->run_thread_comm, comm, MAX_COMM); | |
449 | else | |
450 | sprintf(taa_data->run_thread_comm, "<...>"); | |
451 | ||
452 | } else { | |
453 | taa_data->thread_thread_sum += duration; | |
454 | ||
455 | trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu \t\t%9.2f us\n", | |
456 | comm, pid, ns_to_usf(duration)); | |
457 | } | |
458 | ||
459 | return 0; | |
460 | } | |
461 | ||
462 | /* | |
463 | * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace | |
464 | * | |
465 | * Saves and parse the stack trace generated by the timerlat IRQ. | |
466 | */ | |
467 | static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record, | |
468 | struct tep_event *event, void *context) | |
469 | { | |
470 | struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); | |
471 | struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); | |
472 | unsigned long *caller; | |
473 | const char *function; | |
474 | int val, i; | |
475 | ||
476 | trace_seq_reset(taa_data->stack_seq); | |
477 | ||
478 | trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n"); | |
479 | caller = tep_get_field_raw(s, event, "caller", record, &val, 1); | |
480 | if (caller) { | |
481 | for (i = 0; ; i++) { | |
482 | function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]); | |
483 | if (!function) | |
484 | break; | |
485 | trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function); | |
486 | } | |
487 | } | |
488 | return 0; | |
489 | } | |
490 | ||
491 | /* | |
492 | * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU | |
493 | * | |
494 | * Handles the sched:sched_switch event to trace the current thread running on the | |
495 | * CPU. It is used to display the threads running on the other CPUs when the trace | |
496 | * stops. | |
497 | */ | |
498 | static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record, | |
499 | struct tep_event *event, void *context) | |
500 | { | |
501 | struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); | |
502 | struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); | |
503 | const char *comm; | |
504 | int val; | |
505 | ||
506 | tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1); | |
507 | comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1); | |
508 | ||
509 | strncpy(taa_data->current_comm, comm, MAX_COMM); | |
510 | ||
511 | /* | |
512 | * If this was a kworker, clean the last kworkers that ran. | |
513 | */ | |
514 | taa_data->kworker = 0; | |
515 | taa_data->kworker_func = 0; | |
516 | ||
517 | return 0; | |
518 | } | |
519 | ||
520 | /* | |
521 | * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU | |
522 | * | |
523 | * Handles workqueue:workqueue_execute_start event, keeping track of | |
524 | * the job that a kworker could be doing in the CPU. | |
525 | * | |
526 | * We already catch problems of hardware related latencies caused by work queues | |
527 | * running driver code that causes hardware stall. For example, with DRM drivers. | |
528 | */ | |
529 | static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record, | |
530 | struct tep_event *event, void *context) | |
531 | { | |
532 | struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); | |
533 | struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu); | |
534 | ||
535 | tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1); | |
536 | tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1); | |
537 | return 0; | |
538 | } | |
539 | ||
540 | /* | |
541 | * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing | |
542 | * | |
543 | * This is the core of the analysis. | |
544 | */ | |
545 | static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, | |
546 | int irq_thresh, int thread_thresh) | |
547 | { | |
301deca0 | 548 | long long exp_irq_ts; |
27e348b2 DBO |
549 | int total; |
550 | int irq; | |
551 | ||
552 | /* | |
553 | * IRQ latency or Thread latency? | |
554 | */ | |
555 | if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) { | |
556 | irq = 1; | |
557 | total = taa_data->tlat_irq_latency; | |
558 | } else { | |
559 | irq = 0; | |
560 | total = taa_data->tlat_thread_latency; | |
561 | } | |
562 | ||
563 | /* | |
564 | * Expected IRQ arrival time using the trace clock as the base. | |
301deca0 DBO |
565 | * |
566 | * TODO: Add a list of previous IRQ, and then run the list backwards. | |
27e348b2 DBO |
567 | */ |
568 | exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; | |
301deca0 DBO |
569 | if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { |
570 | if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) | |
571 | printf(" Previous IRQ interference: \t\t up to %9.2f us\n", | |
572 | ns_to_usf(taa_data->prev_irq_duration)); | |
573 | } | |
27e348b2 DBO |
574 | |
575 | /* | |
576 | * The delay that the IRQ suffered before starting. | |
577 | */ | |
578 | printf(" IRQ handler delay: %16s %9.2f us (%.2f %%)\n", | |
579 | (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", | |
580 | ns_to_usf(taa_data->timer_irq_start_delay), | |
581 | ns_to_per(total, taa_data->timer_irq_start_delay)); | |
582 | ||
583 | /* | |
584 | * Timerlat IRQ. | |
585 | */ | |
586 | printf(" IRQ latency: \t\t\t\t %9.2f us\n", | |
587 | ns_to_usf(taa_data->tlat_irq_latency)); | |
588 | ||
589 | if (irq) { | |
590 | /* | |
591 | * If the trace stopped due to IRQ, the other events will not happen | |
592 | * because... the trace stopped :-). | |
593 | * | |
594 | * That is all folks, the stack trace was printed before the stop, | |
595 | * so it will be displayed, it is the key. | |
596 | */ | |
597 | printf(" Blocking thread:\n"); | |
598 | printf(" %24s:%-9llu\n", | |
599 | taa_data->run_thread_comm, taa_data->run_thread_pid); | |
600 | } else { | |
601 | /* | |
602 | * The duration of the IRQ handler that handled the timerlat IRQ. | |
603 | */ | |
604 | printf(" Timerlat IRQ duration: \t\t %9.2f us (%.2f %%)\n", | |
605 | ns_to_usf(taa_data->timer_irq_duration), | |
606 | ns_to_per(total, taa_data->timer_irq_duration)); | |
607 | ||
608 | /* | |
609 | * The amount of time that the current thread postponed the scheduler. | |
610 | * | |
611 | * Recalling that it is net from NMI/IRQ/Softirq interference, so there | |
612 | * is no need to compute values here. | |
613 | */ | |
614 | printf(" Blocking thread: \t\t\t %9.2f us (%.2f %%)\n", | |
615 | ns_to_usf(taa_data->thread_blocking_duration), | |
616 | ns_to_per(total, taa_data->thread_blocking_duration)); | |
617 | ||
618 | printf(" %24s:%-9llu %9.2f us\n", | |
619 | taa_data->run_thread_comm, taa_data->run_thread_pid, | |
620 | ns_to_usf(taa_data->thread_blocking_duration)); | |
621 | } | |
622 | ||
623 | /* | |
624 | * Print the stack trace! | |
625 | */ | |
626 | trace_seq_do_printf(taa_data->stack_seq); | |
627 | ||
628 | /* | |
629 | * NMIs can happen during the IRQ, so they are always possible. | |
630 | */ | |
631 | if (taa_data->thread_nmi_sum) | |
632 | printf(" NMI interference \t\t\t %9.2f us (%.2f %%)\n", | |
633 | ns_to_usf(taa_data->thread_nmi_sum), | |
634 | ns_to_per(total, taa_data->thread_nmi_sum)); | |
635 | ||
636 | /* | |
637 | * If it is an IRQ latency, the other factors can be skipped. | |
638 | */ | |
639 | if (irq) | |
640 | goto print_total; | |
641 | ||
642 | /* | |
643 | * Prints the interference caused by IRQs to the thread latency. | |
644 | */ | |
645 | if (taa_data->thread_irq_sum) { | |
646 | printf(" IRQ interference \t\t\t %9.2f us (%.2f %%)\n", | |
647 | ns_to_usf(taa_data->thread_irq_sum), | |
648 | ns_to_per(total, taa_data->thread_irq_sum)); | |
649 | ||
650 | trace_seq_do_printf(taa_data->irqs_seq); | |
651 | } | |
652 | ||
653 | /* | |
654 | * Prints the interference caused by Softirqs to the thread latency. | |
655 | */ | |
656 | if (taa_data->thread_softirq_sum) { | |
657 | printf(" Softirq interference \t\t\t %9.2f us (%.2f %%)\n", | |
658 | ns_to_usf(taa_data->thread_softirq_sum), | |
659 | ns_to_per(total, taa_data->thread_softirq_sum)); | |
660 | ||
661 | trace_seq_do_printf(taa_data->softirqs_seq); | |
662 | } | |
663 | ||
664 | /* | |
665 | * Prints the interference caused by other threads to the thread latency. | |
666 | * | |
667 | * If this happens, your timerlat is not the highest prio. OK, migration | |
668 | * thread can happen. But otherwise, you are not measuring the "scheduling | |
669 | * latency" only, and here is the difference from scheduling latency and | |
670 | * timer handling latency. | |
671 | */ | |
672 | if (taa_data->thread_thread_sum) { | |
673 | printf(" Thread interference \t\t\t %9.2f us (%.2f %%)\n", | |
674 | ns_to_usf(taa_data->thread_thread_sum), | |
675 | ns_to_per(total, taa_data->thread_thread_sum)); | |
676 | ||
677 | trace_seq_do_printf(taa_data->threads_seq); | |
678 | } | |
679 | ||
680 | /* | |
681 | * Done. | |
682 | */ | |
683 | print_total: | |
684 | printf("------------------------------------------------------------------------\n"); | |
685 | printf(" %s latency: \t\t\t %9.2f us (100%%)\n", irq ? "IRQ" : "Thread", | |
686 | ns_to_usf(total)); | |
687 | } | |
688 | ||
c66552be DBO |
689 | static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx) |
690 | { | |
691 | struct trace_instance *trace = &taa_ctx->tool->trace; | |
692 | int retval; | |
693 | ||
694 | retval = tracefs_iterate_raw_events(trace->tep, | |
695 | trace->inst, | |
696 | NULL, | |
697 | 0, | |
698 | collect_registered_events, | |
699 | trace); | |
700 | if (retval < 0) { | |
701 | err_msg("Error iterating on events\n"); | |
702 | return 0; | |
703 | } | |
704 | ||
705 | return 1; | |
706 | } | |
707 | ||
27e348b2 DBO |
708 | /** |
709 | * timerlat_auto_analysis - Analyze the collected data | |
710 | */ | |
711 | void timerlat_auto_analysis(int irq_thresh, int thread_thresh) | |
712 | { | |
713 | struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); | |
714 | unsigned long long max_exit_from_idle = 0; | |
715 | struct timerlat_aa_data *taa_data; | |
716 | int max_exit_from_idle_cpu; | |
717 | struct tep_handle *tep; | |
718 | int cpu; | |
719 | ||
c66552be DBO |
720 | timerlat_auto_analysis_collect_trace(taa_ctx); |
721 | ||
27e348b2 DBO |
722 | /* bring stop tracing to the ns scale */ |
723 | irq_thresh = irq_thresh * 1000; | |
724 | thread_thresh = thread_thresh * 1000; | |
725 | ||
726 | for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { | |
727 | taa_data = timerlat_aa_get_data(taa_ctx, cpu); | |
728 | ||
729 | if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) { | |
730 | printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); | |
731 | timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); | |
732 | } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) { | |
733 | printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu); | |
734 | timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh); | |
735 | } | |
736 | ||
737 | if (taa_data->max_exit_idle_latency > max_exit_from_idle) { | |
738 | max_exit_from_idle = taa_data->max_exit_idle_latency; | |
739 | max_exit_from_idle_cpu = cpu; | |
740 | } | |
741 | ||
742 | } | |
743 | ||
744 | if (max_exit_from_idle) { | |
745 | printf("\n"); | |
746 | printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n", | |
747 | ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu); | |
748 | } | |
749 | if (!taa_ctx->dump_tasks) | |
750 | return; | |
751 | ||
752 | printf("\n"); | |
753 | printf("Printing CPU tasks:\n"); | |
754 | for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) { | |
755 | taa_data = timerlat_aa_get_data(taa_ctx, cpu); | |
756 | tep = taa_ctx->tool->trace.tep; | |
757 | ||
758 | printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid); | |
759 | ||
760 | if (taa_data->kworker_func) | |
761 | printf(" kworker:%s:%s", | |
762 | tep_find_function(tep, taa_data->kworker) ? : "<...>", | |
763 | tep_find_function(tep, taa_data->kworker_func)); | |
764 | printf("\n"); | |
765 | } | |
766 | ||
767 | } | |
768 | ||
769 | /* | |
770 | * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data | |
771 | */ | |
772 | static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx) | |
773 | { | |
774 | struct timerlat_aa_data *taa_data; | |
775 | int i; | |
776 | ||
777 | if (!taa_ctx->taa_data) | |
778 | return; | |
779 | ||
780 | for (i = 0; i < taa_ctx->nr_cpus; i++) { | |
781 | taa_data = timerlat_aa_get_data(taa_ctx, i); | |
782 | ||
783 | if (taa_data->prev_irqs_seq) { | |
784 | trace_seq_destroy(taa_data->prev_irqs_seq); | |
785 | free(taa_data->prev_irqs_seq); | |
786 | } | |
787 | ||
788 | if (taa_data->nmi_seq) { | |
789 | trace_seq_destroy(taa_data->nmi_seq); | |
790 | free(taa_data->nmi_seq); | |
791 | } | |
792 | ||
793 | if (taa_data->irqs_seq) { | |
794 | trace_seq_destroy(taa_data->irqs_seq); | |
795 | free(taa_data->irqs_seq); | |
796 | } | |
797 | ||
798 | if (taa_data->softirqs_seq) { | |
799 | trace_seq_destroy(taa_data->softirqs_seq); | |
800 | free(taa_data->softirqs_seq); | |
801 | } | |
802 | ||
803 | if (taa_data->threads_seq) { | |
804 | trace_seq_destroy(taa_data->threads_seq); | |
805 | free(taa_data->threads_seq); | |
806 | } | |
807 | ||
808 | if (taa_data->stack_seq) { | |
809 | trace_seq_destroy(taa_data->stack_seq); | |
810 | free(taa_data->stack_seq); | |
811 | } | |
812 | } | |
813 | } | |
814 | ||
815 | /* | |
816 | * timerlat_aa_init_seqs - Init seq files used to store parsed information | |
817 | * | |
818 | * Instead of keeping data structures to store raw data, use seq files to | |
819 | * store parsed data. | |
820 | * | |
821 | * Allocates and initialize seq files. | |
822 | * | |
823 | * Returns 0 on success, -1 otherwise. | |
824 | */ | |
825 | static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx) | |
826 | { | |
827 | struct timerlat_aa_data *taa_data; | |
828 | int i; | |
829 | ||
830 | for (i = 0; i < taa_ctx->nr_cpus; i++) { | |
831 | ||
832 | taa_data = timerlat_aa_get_data(taa_ctx, i); | |
833 | ||
834 | taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq)); | |
835 | if (!taa_data->prev_irqs_seq) | |
836 | goto out_err; | |
837 | ||
838 | trace_seq_init(taa_data->prev_irqs_seq); | |
839 | ||
840 | taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq)); | |
841 | if (!taa_data->nmi_seq) | |
842 | goto out_err; | |
843 | ||
844 | trace_seq_init(taa_data->nmi_seq); | |
845 | ||
846 | taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq)); | |
847 | if (!taa_data->irqs_seq) | |
848 | goto out_err; | |
849 | ||
850 | trace_seq_init(taa_data->irqs_seq); | |
851 | ||
852 | taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq)); | |
853 | if (!taa_data->softirqs_seq) | |
854 | goto out_err; | |
855 | ||
856 | trace_seq_init(taa_data->softirqs_seq); | |
857 | ||
858 | taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq)); | |
859 | if (!taa_data->threads_seq) | |
860 | goto out_err; | |
861 | ||
862 | trace_seq_init(taa_data->threads_seq); | |
863 | ||
864 | taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq)); | |
865 | if (!taa_data->stack_seq) | |
866 | goto out_err; | |
867 | ||
868 | trace_seq_init(taa_data->stack_seq); | |
869 | } | |
870 | ||
871 | return 0; | |
872 | ||
873 | out_err: | |
874 | timerlat_aa_destroy_seqs(taa_ctx); | |
875 | return -1; | |
876 | } | |
877 | ||
878 | /* | |
879 | * timerlat_aa_unregister_events - Unregister events used in the auto-analysis | |
880 | */ | |
881 | static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks) | |
882 | { | |
c66552be DBO |
883 | |
884 | tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", | |
885 | timerlat_aa_handler, tool); | |
886 | ||
27e348b2 DBO |
887 | tracefs_event_disable(tool->trace.inst, "osnoise", NULL); |
888 | ||
889 | tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", | |
890 | timerlat_aa_nmi_handler, tool); | |
891 | ||
892 | tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", | |
893 | timerlat_aa_irq_handler, tool); | |
894 | ||
895 | tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", | |
896 | timerlat_aa_softirq_handler, tool); | |
897 | ||
898 | tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", | |
899 | timerlat_aa_thread_handler, tool); | |
900 | ||
901 | tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", | |
902 | timerlat_aa_stack_handler, tool); | |
903 | if (!dump_tasks) | |
904 | return; | |
905 | ||
906 | tracefs_event_disable(tool->trace.inst, "sched", "sched_switch"); | |
907 | tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch", | |
908 | timerlat_aa_sched_switch_handler, tool); | |
909 | ||
910 | tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start"); | |
911 | tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", | |
912 | timerlat_aa_kworker_start_handler, tool); | |
913 | } | |
914 | ||
915 | /* | |
916 | * timerlat_aa_register_events - Register events used in the auto-analysis | |
917 | * | |
918 | * Returns 0 on success, -1 otherwise. | |
919 | */ | |
920 | static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks) | |
921 | { | |
922 | int retval; | |
923 | ||
c66552be DBO |
924 | tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat", |
925 | timerlat_aa_handler, tool); | |
926 | ||
927 | ||
27e348b2 DBO |
928 | /* |
929 | * register auto-analysis handlers. | |
930 | */ | |
931 | retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL); | |
932 | if (retval < 0 && !errno) { | |
933 | err_msg("Could not find osnoise events\n"); | |
934 | goto out_err; | |
935 | } | |
936 | ||
937 | tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise", | |
938 | timerlat_aa_nmi_handler, tool); | |
939 | ||
940 | tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise", | |
941 | timerlat_aa_irq_handler, tool); | |
942 | ||
943 | tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise", | |
944 | timerlat_aa_softirq_handler, tool); | |
945 | ||
946 | tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise", | |
947 | timerlat_aa_thread_handler, tool); | |
948 | ||
949 | tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack", | |
950 | timerlat_aa_stack_handler, tool); | |
951 | ||
952 | if (!dump_tasks) | |
953 | return 0; | |
954 | ||
955 | /* | |
956 | * Dump task events. | |
957 | */ | |
958 | retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch"); | |
959 | if (retval < 0 && !errno) { | |
960 | err_msg("Could not find sched_switch\n"); | |
961 | goto out_err; | |
962 | } | |
963 | ||
964 | tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch", | |
965 | timerlat_aa_sched_switch_handler, tool); | |
966 | ||
967 | retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start"); | |
968 | if (retval < 0 && !errno) { | |
969 | err_msg("Could not find workqueue_execute_start\n"); | |
970 | goto out_err; | |
971 | } | |
972 | ||
973 | tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start", | |
974 | timerlat_aa_kworker_start_handler, tool); | |
975 | ||
976 | return 0; | |
977 | ||
978 | out_err: | |
979 | timerlat_aa_unregister_events(tool, dump_tasks); | |
980 | return -1; | |
981 | } | |
982 | ||
983 | /** | |
984 | * timerlat_aa_destroy - Destroy timerlat auto-analysis | |
985 | */ | |
986 | void timerlat_aa_destroy(void) | |
987 | { | |
988 | struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx(); | |
989 | ||
990 | if (!taa_ctx) | |
991 | return; | |
992 | ||
993 | if (!taa_ctx->taa_data) | |
994 | goto out_ctx; | |
995 | ||
996 | timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks); | |
997 | timerlat_aa_destroy_seqs(taa_ctx); | |
998 | free(taa_ctx->taa_data); | |
999 | out_ctx: | |
1000 | free(taa_ctx); | |
1001 | } | |
1002 | ||
1003 | /** | |
1004 | * timerlat_aa_init - Initialize timerlat auto-analysis | |
1005 | * | |
1006 | * Returns 0 on success, -1 otherwise. | |
1007 | */ | |
c66552be | 1008 | int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks) |
27e348b2 | 1009 | { |
c66552be | 1010 | int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); |
27e348b2 DBO |
1011 | struct timerlat_aa_context *taa_ctx; |
1012 | int retval; | |
1013 | ||
1014 | taa_ctx = calloc(1, sizeof(*taa_ctx)); | |
1015 | if (!taa_ctx) | |
1016 | return -1; | |
1017 | ||
1018 | __timerlat_aa_ctx = taa_ctx; | |
1019 | ||
1020 | taa_ctx->nr_cpus = nr_cpus; | |
1021 | taa_ctx->tool = tool; | |
1022 | taa_ctx->dump_tasks = dump_tasks; | |
1023 | ||
1024 | taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data)); | |
1025 | if (!taa_ctx->taa_data) | |
1026 | goto out_err; | |
1027 | ||
1028 | retval = timerlat_aa_init_seqs(taa_ctx); | |
1029 | if (retval) | |
1030 | goto out_err; | |
1031 | ||
1032 | retval = timerlat_aa_register_events(tool, dump_tasks); | |
1033 | if (retval) | |
1034 | goto out_err; | |
1035 | ||
1036 | return 0; | |
1037 | ||
1038 | out_err: | |
1039 | timerlat_aa_destroy(); | |
1040 | return -1; | |
1041 | } |