]> git.ipfire.org Git - thirdparty/kernel/stable.git/blob - tools/tracing/rtla/src/timerlat_aa.c
KVM: x86: Ignore MSR_AMD64_TW_CFG access
[thirdparty/kernel/stable.git] / tools / tracing / rtla / src / timerlat_aa.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
4 */
5
6 #include <stdlib.h>
7 #include <errno.h>
8 #include "utils.h"
9 #include "osnoise.h"
10 #include "timerlat.h"
11 #include <unistd.h>
12
13 enum timelat_state {
14 TIMERLAT_INIT = 0,
15 TIMERLAT_WAITING_IRQ,
16 TIMERLAT_WAITING_THREAD,
17 };
18
19 #define MAX_COMM 24
20
21 /*
22 * Per-cpu data statistics and data.
23 */
24 struct timerlat_aa_data {
25 /* Current CPU state */
26 int curr_state;
27
28 /* timerlat IRQ latency */
29 unsigned long long tlat_irq_seqnum;
30 unsigned long long tlat_irq_latency;
31 unsigned long long tlat_irq_timstamp;
32
33 /* timerlat Thread latency */
34 unsigned long long tlat_thread_seqnum;
35 unsigned long long tlat_thread_latency;
36 unsigned long long tlat_thread_timstamp;
37
38 /*
39 * Information about the thread running when the IRQ
40 * arrived.
41 *
42 * This can be blocking or interference, depending on the
43 * priority of the thread. Assuming timerlat is the highest
44 * prio, it is blocking. If timerlat has a lower prio, it is
45 * interference.
46 * note: "unsigned long long" because they are fetch using tep_get_field_val();
47 */
48 unsigned long long run_thread_pid;
49 char run_thread_comm[MAX_COMM];
50 unsigned long long thread_blocking_duration;
51 unsigned long long max_exit_idle_latency;
52
53 /* Information about the timerlat timer irq */
54 unsigned long long timer_irq_start_time;
55 unsigned long long timer_irq_start_delay;
56 unsigned long long timer_irq_duration;
57 unsigned long long timer_exit_from_idle;
58
59 /*
60 * Information about the last IRQ before the timerlat irq
61 * arrived.
62 *
63 * If now - timestamp is <= latency, it might have influenced
64 * in the timerlat irq latency. Otherwise, ignore it.
65 */
66 unsigned long long prev_irq_duration;
67 unsigned long long prev_irq_timstamp;
68
69 /*
70 * Interference sum.
71 */
72 unsigned long long thread_nmi_sum;
73 unsigned long long thread_irq_sum;
74 unsigned long long thread_softirq_sum;
75 unsigned long long thread_thread_sum;
76
77 /*
78 * Interference task information.
79 */
80 struct trace_seq *prev_irqs_seq;
81 struct trace_seq *nmi_seq;
82 struct trace_seq *irqs_seq;
83 struct trace_seq *softirqs_seq;
84 struct trace_seq *threads_seq;
85 struct trace_seq *stack_seq;
86
87 /*
88 * Current thread.
89 */
90 char current_comm[MAX_COMM];
91 unsigned long long current_pid;
92
93 /*
94 * Is the system running a kworker?
95 */
96 unsigned long long kworker;
97 unsigned long long kworker_func;
98 };
99
100 /*
101 * The analysis context and system wide view
102 */
103 struct timerlat_aa_context {
104 int nr_cpus;
105 int dump_tasks;
106
107 /* per CPU data */
108 struct timerlat_aa_data *taa_data;
109
110 /*
111 * required to translate function names and register
112 * events.
113 */
114 struct osnoise_tool *tool;
115 };
116
117 /*
118 * The data is stored as a local variable, but accessed via a helper function.
119 *
120 * It could be stored inside the trace context. But every access would
121 * require container_of() + a series of pointers. Do we need it? Not sure.
122 *
123 * For now keep it simple. If needed, store it in the tool, add the *context
124 * as a parameter in timerlat_aa_get_ctx() and do the magic there.
125 */
126 static struct timerlat_aa_context *__timerlat_aa_ctx;
127
128 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
129 {
130 return __timerlat_aa_ctx;
131 }
132
133 /*
134 * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
135 */
136 static struct timerlat_aa_data
137 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
138 {
139 return &taa_ctx->taa_data[cpu];
140 }
141
142 /*
143 * timerlat_aa_irq_latency - Handles timerlat IRQ event
144 */
145 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
146 struct trace_seq *s, struct tep_record *record,
147 struct tep_event *event)
148 {
149 /*
150 * For interference, we start now looking for things that can delay
151 * the thread.
152 */
153 taa_data->curr_state = TIMERLAT_WAITING_THREAD;
154 taa_data->tlat_irq_timstamp = record->ts;
155
156 /*
157 * Zero values.
158 */
159 taa_data->thread_nmi_sum = 0;
160 taa_data->thread_irq_sum = 0;
161 taa_data->thread_softirq_sum = 0;
162 taa_data->thread_blocking_duration = 0;
163 taa_data->timer_irq_start_time = 0;
164 taa_data->timer_irq_duration = 0;
165 taa_data->timer_exit_from_idle = 0;
166
167 /*
168 * Zero interference tasks.
169 */
170 trace_seq_reset(taa_data->nmi_seq);
171 trace_seq_reset(taa_data->irqs_seq);
172 trace_seq_reset(taa_data->softirqs_seq);
173 trace_seq_reset(taa_data->threads_seq);
174
175 /* IRQ latency values */
176 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
177 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
178
179 /* The thread that can cause blocking */
180 tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
181
182 /*
183 * Get exit from idle case.
184 *
185 * If it is not idle thread:
186 */
187 if (taa_data->run_thread_pid)
188 return 0;
189
190 /*
191 * if the latency is shorter than the known exit from idle:
192 */
193 if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
194 return 0;
195
196 /*
197 * To be safe, ignore the cases in which an IRQ/NMI could have
198 * interfered with the timerlat IRQ.
199 */
200 if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
201 < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
202 return 0;
203
204 taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
205
206 return 0;
207 }
208
209 /*
210 * timerlat_aa_thread_latency - Handles timerlat thread event
211 */
212 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
213 struct trace_seq *s, struct tep_record *record,
214 struct tep_event *event)
215 {
216 /*
217 * For interference, we start now looking for things that can delay
218 * the IRQ of the next cycle.
219 */
220 taa_data->curr_state = TIMERLAT_WAITING_IRQ;
221 taa_data->tlat_thread_timstamp = record->ts;
222
223 /* Thread latency values */
224 tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
225 tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
226
227 return 0;
228 }
229
230 /*
231 * timerlat_aa_handler - Handle timerlat events
232 *
233 * This function is called to handle timerlat events recording statistics.
234 *
235 * Returns 0 on success, -1 otherwise.
236 */
237 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
238 struct tep_event *event, void *context)
239 {
240 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
241 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
242 unsigned long long thread;
243
244 if (!taa_data)
245 return -1;
246
247 tep_get_field_val(s, event, "context", record, &thread, 1);
248 if (!thread)
249 return timerlat_aa_irq_latency(taa_data, s, record, event);
250 else
251 return timerlat_aa_thread_latency(taa_data, s, record, event);
252 }
253
254 /*
255 * timerlat_aa_nmi_handler - Handles NMI noise
256 *
257 * It is used to collect information about interferences from NMI. It is
258 * hooked to the osnoise:nmi_noise event.
259 */
260 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
261 struct tep_event *event, void *context)
262 {
263 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
264 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
265 unsigned long long duration;
266 unsigned long long start;
267
268 tep_get_field_val(s, event, "duration", record, &duration, 1);
269 tep_get_field_val(s, event, "start", record, &start, 1);
270
271 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
272 taa_data->prev_irq_duration = duration;
273 taa_data->prev_irq_timstamp = start;
274
275 trace_seq_reset(taa_data->prev_irqs_seq);
276 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s \t\t\t%9.2f us\n",
277 "nmi", ns_to_usf(duration));
278 return 0;
279 }
280
281 taa_data->thread_nmi_sum += duration;
282 trace_seq_printf(taa_data->nmi_seq, " %24s \t\t\t%9.2f us\n",
283 "nmi", ns_to_usf(duration));
284
285 return 0;
286 }
287
288 /*
289 * timerlat_aa_irq_handler - Handles IRQ noise
290 *
291 * It is used to collect information about interferences from IRQ. It is
292 * hooked to the osnoise:irq_noise event.
293 *
294 * It is a little bit more complex than the other because it measures:
295 * - The IRQs that can delay the timer IRQ before it happened.
296 * - The Timerlat IRQ handler
297 * - The IRQs that happened between the timerlat IRQ and the timerlat thread
298 * (IRQ interference).
299 */
300 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
301 struct tep_event *event, void *context)
302 {
303 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
304 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
305 unsigned long long expected_start;
306 unsigned long long duration;
307 unsigned long long vector;
308 unsigned long long start;
309 char *desc;
310 int val;
311
312 tep_get_field_val(s, event, "duration", record, &duration, 1);
313 tep_get_field_val(s, event, "start", record, &start, 1);
314 tep_get_field_val(s, event, "vector", record, &vector, 1);
315 desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
316
317 /*
318 * Before the timerlat IRQ.
319 */
320 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
321 taa_data->prev_irq_duration = duration;
322 taa_data->prev_irq_timstamp = start;
323
324 trace_seq_reset(taa_data->prev_irqs_seq);
325 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu \t\t%9.2f us\n",
326 desc, vector, ns_to_usf(duration));
327 return 0;
328 }
329
330 /*
331 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
332 * the timerlat irq handler.
333 */
334 if (!taa_data->timer_irq_start_time) {
335 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
336
337 taa_data->timer_irq_start_time = start;
338 taa_data->timer_irq_duration = duration;
339
340 taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
341
342 /*
343 * not exit from idle.
344 */
345 if (taa_data->run_thread_pid)
346 return 0;
347
348 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
349 taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
350
351 return 0;
352 }
353
354 /*
355 * IRQ interference.
356 */
357 taa_data->thread_irq_sum += duration;
358 trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu \t %9.2f us\n",
359 desc, vector, ns_to_usf(duration));
360
361 return 0;
362 }
363
364 static char *softirq_name[] = { "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK",
365 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
366
367
368 /*
369 * timerlat_aa_softirq_handler - Handles Softirq noise
370 *
371 * It is used to collect information about interferences from Softirq. It is
372 * hooked to the osnoise:softirq_noise event.
373 *
374 * It is only printed in the non-rt kernel, as softirqs become thread on RT.
375 */
376 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
377 struct tep_event *event, void *context)
378 {
379 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
380 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
381 unsigned long long duration;
382 unsigned long long vector;
383 unsigned long long start;
384
385 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
386 return 0;
387
388 tep_get_field_val(s, event, "duration", record, &duration, 1);
389 tep_get_field_val(s, event, "start", record, &start, 1);
390 tep_get_field_val(s, event, "vector", record, &vector, 1);
391
392 taa_data->thread_softirq_sum += duration;
393
394 trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t %9.2f us\n",
395 softirq_name[vector], vector, ns_to_usf(duration));
396 return 0;
397 }
398
399 /*
400 * timerlat_aa_softirq_handler - Handles thread noise
401 *
402 * It is used to collect information about interferences from threads. It is
403 * hooked to the osnoise:thread_noise event.
404 *
405 * Note: if you see thread noise, your timerlat thread was not the highest prio one.
406 */
407 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
408 struct tep_event *event, void *context)
409 {
410 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
411 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
412 unsigned long long duration;
413 unsigned long long start;
414 unsigned long long pid;
415 const char *comm;
416 int val;
417
418 if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
419 return 0;
420
421 tep_get_field_val(s, event, "duration", record, &duration, 1);
422 tep_get_field_val(s, event, "start", record, &start, 1);
423
424 tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
425 comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
426
427 if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
428 taa_data->thread_blocking_duration = duration;
429
430 if (comm)
431 strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
432 else
433 sprintf(taa_data->run_thread_comm, "<...>");
434
435 } else {
436 taa_data->thread_thread_sum += duration;
437
438 trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu \t\t%9.2f us\n",
439 comm, pid, ns_to_usf(duration));
440 }
441
442 return 0;
443 }
444
445 /*
446 * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
447 *
448 * Saves and parse the stack trace generated by the timerlat IRQ.
449 */
450 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
451 struct tep_event *event, void *context)
452 {
453 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
454 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
455 unsigned long *caller;
456 const char *function;
457 int val, i;
458
459 trace_seq_reset(taa_data->stack_seq);
460
461 trace_seq_printf(taa_data->stack_seq, " Blocking thread stack trace\n");
462 caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
463 if (caller) {
464 for (i = 0; ; i++) {
465 function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
466 if (!function)
467 break;
468 trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
469 }
470 }
471 return 0;
472 }
473
474 /*
475 * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
476 *
477 * Handles the sched:sched_switch event to trace the current thread running on the
478 * CPU. It is used to display the threads running on the other CPUs when the trace
479 * stops.
480 */
481 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
482 struct tep_event *event, void *context)
483 {
484 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
485 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
486 const char *comm;
487 int val;
488
489 tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
490 comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
491
492 strncpy(taa_data->current_comm, comm, MAX_COMM);
493
494 /*
495 * If this was a kworker, clean the last kworkers that ran.
496 */
497 taa_data->kworker = 0;
498 taa_data->kworker_func = 0;
499
500 return 0;
501 }
502
503 /*
504 * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
505 *
506 * Handles workqueue:workqueue_execute_start event, keeping track of
507 * the job that a kworker could be doing in the CPU.
508 *
509 * We already catch problems of hardware related latencies caused by work queues
510 * running driver code that causes hardware stall. For example, with DRM drivers.
511 */
512 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
513 struct tep_event *event, void *context)
514 {
515 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
516 struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
517
518 tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
519 tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
520 return 0;
521 }
522
523 /*
524 * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
525 *
526 * This is the core of the analysis.
527 */
528 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
529 int irq_thresh, int thread_thresh)
530 {
531 unsigned long long exp_irq_ts;
532 int total;
533 int irq;
534
535 /*
536 * IRQ latency or Thread latency?
537 */
538 if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
539 irq = 1;
540 total = taa_data->tlat_irq_latency;
541 } else {
542 irq = 0;
543 total = taa_data->tlat_thread_latency;
544 }
545
546 /*
547 * Expected IRQ arrival time using the trace clock as the base.
548 */
549 exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
550
551 if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
552 printf(" Previous IRQ interference: \t\t up to %9.2f us\n",
553 ns_to_usf(taa_data->prev_irq_duration));
554
555 /*
556 * The delay that the IRQ suffered before starting.
557 */
558 printf(" IRQ handler delay: %16s %9.2f us (%.2f %%)\n",
559 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
560 ns_to_usf(taa_data->timer_irq_start_delay),
561 ns_to_per(total, taa_data->timer_irq_start_delay));
562
563 /*
564 * Timerlat IRQ.
565 */
566 printf(" IRQ latency: \t\t\t\t %9.2f us\n",
567 ns_to_usf(taa_data->tlat_irq_latency));
568
569 if (irq) {
570 /*
571 * If the trace stopped due to IRQ, the other events will not happen
572 * because... the trace stopped :-).
573 *
574 * That is all folks, the stack trace was printed before the stop,
575 * so it will be displayed, it is the key.
576 */
577 printf(" Blocking thread:\n");
578 printf(" %24s:%-9llu\n",
579 taa_data->run_thread_comm, taa_data->run_thread_pid);
580 } else {
581 /*
582 * The duration of the IRQ handler that handled the timerlat IRQ.
583 */
584 printf(" Timerlat IRQ duration: \t\t %9.2f us (%.2f %%)\n",
585 ns_to_usf(taa_data->timer_irq_duration),
586 ns_to_per(total, taa_data->timer_irq_duration));
587
588 /*
589 * The amount of time that the current thread postponed the scheduler.
590 *
591 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
592 * is no need to compute values here.
593 */
594 printf(" Blocking thread: \t\t\t %9.2f us (%.2f %%)\n",
595 ns_to_usf(taa_data->thread_blocking_duration),
596 ns_to_per(total, taa_data->thread_blocking_duration));
597
598 printf(" %24s:%-9llu %9.2f us\n",
599 taa_data->run_thread_comm, taa_data->run_thread_pid,
600 ns_to_usf(taa_data->thread_blocking_duration));
601 }
602
603 /*
604 * Print the stack trace!
605 */
606 trace_seq_do_printf(taa_data->stack_seq);
607
608 /*
609 * NMIs can happen during the IRQ, so they are always possible.
610 */
611 if (taa_data->thread_nmi_sum)
612 printf(" NMI interference \t\t\t %9.2f us (%.2f %%)\n",
613 ns_to_usf(taa_data->thread_nmi_sum),
614 ns_to_per(total, taa_data->thread_nmi_sum));
615
616 /*
617 * If it is an IRQ latency, the other factors can be skipped.
618 */
619 if (irq)
620 goto print_total;
621
622 /*
623 * Prints the interference caused by IRQs to the thread latency.
624 */
625 if (taa_data->thread_irq_sum) {
626 printf(" IRQ interference \t\t\t %9.2f us (%.2f %%)\n",
627 ns_to_usf(taa_data->thread_irq_sum),
628 ns_to_per(total, taa_data->thread_irq_sum));
629
630 trace_seq_do_printf(taa_data->irqs_seq);
631 }
632
633 /*
634 * Prints the interference caused by Softirqs to the thread latency.
635 */
636 if (taa_data->thread_softirq_sum) {
637 printf(" Softirq interference \t\t\t %9.2f us (%.2f %%)\n",
638 ns_to_usf(taa_data->thread_softirq_sum),
639 ns_to_per(total, taa_data->thread_softirq_sum));
640
641 trace_seq_do_printf(taa_data->softirqs_seq);
642 }
643
644 /*
645 * Prints the interference caused by other threads to the thread latency.
646 *
647 * If this happens, your timerlat is not the highest prio. OK, migration
648 * thread can happen. But otherwise, you are not measuring the "scheduling
649 * latency" only, and here is the difference from scheduling latency and
650 * timer handling latency.
651 */
652 if (taa_data->thread_thread_sum) {
653 printf(" Thread interference \t\t\t %9.2f us (%.2f %%)\n",
654 ns_to_usf(taa_data->thread_thread_sum),
655 ns_to_per(total, taa_data->thread_thread_sum));
656
657 trace_seq_do_printf(taa_data->threads_seq);
658 }
659
660 /*
661 * Done.
662 */
663 print_total:
664 printf("------------------------------------------------------------------------\n");
665 printf(" %s latency: \t\t\t %9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
666 ns_to_usf(total));
667 }
668
669 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
670 {
671 struct trace_instance *trace = &taa_ctx->tool->trace;
672 int retval;
673
674 retval = tracefs_iterate_raw_events(trace->tep,
675 trace->inst,
676 NULL,
677 0,
678 collect_registered_events,
679 trace);
680 if (retval < 0) {
681 err_msg("Error iterating on events\n");
682 return 0;
683 }
684
685 return 1;
686 }
687
688 /**
689 * timerlat_auto_analysis - Analyze the collected data
690 */
691 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
692 {
693 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
694 unsigned long long max_exit_from_idle = 0;
695 struct timerlat_aa_data *taa_data;
696 int max_exit_from_idle_cpu;
697 struct tep_handle *tep;
698 int cpu;
699
700 timerlat_auto_analysis_collect_trace(taa_ctx);
701
702 /* bring stop tracing to the ns scale */
703 irq_thresh = irq_thresh * 1000;
704 thread_thresh = thread_thresh * 1000;
705
706 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
707 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
708
709 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
710 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
711 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
712 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
713 printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
714 timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
715 }
716
717 if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
718 max_exit_from_idle = taa_data->max_exit_idle_latency;
719 max_exit_from_idle_cpu = cpu;
720 }
721
722 }
723
724 if (max_exit_from_idle) {
725 printf("\n");
726 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
727 ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
728 }
729 if (!taa_ctx->dump_tasks)
730 return;
731
732 printf("\n");
733 printf("Printing CPU tasks:\n");
734 for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
735 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
736 tep = taa_ctx->tool->trace.tep;
737
738 printf(" [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
739
740 if (taa_data->kworker_func)
741 printf(" kworker:%s:%s",
742 tep_find_function(tep, taa_data->kworker) ? : "<...>",
743 tep_find_function(tep, taa_data->kworker_func));
744 printf("\n");
745 }
746
747 }
748
749 /*
750 * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
751 */
752 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
753 {
754 struct timerlat_aa_data *taa_data;
755 int i;
756
757 if (!taa_ctx->taa_data)
758 return;
759
760 for (i = 0; i < taa_ctx->nr_cpus; i++) {
761 taa_data = timerlat_aa_get_data(taa_ctx, i);
762
763 if (taa_data->prev_irqs_seq) {
764 trace_seq_destroy(taa_data->prev_irqs_seq);
765 free(taa_data->prev_irqs_seq);
766 }
767
768 if (taa_data->nmi_seq) {
769 trace_seq_destroy(taa_data->nmi_seq);
770 free(taa_data->nmi_seq);
771 }
772
773 if (taa_data->irqs_seq) {
774 trace_seq_destroy(taa_data->irqs_seq);
775 free(taa_data->irqs_seq);
776 }
777
778 if (taa_data->softirqs_seq) {
779 trace_seq_destroy(taa_data->softirqs_seq);
780 free(taa_data->softirqs_seq);
781 }
782
783 if (taa_data->threads_seq) {
784 trace_seq_destroy(taa_data->threads_seq);
785 free(taa_data->threads_seq);
786 }
787
788 if (taa_data->stack_seq) {
789 trace_seq_destroy(taa_data->stack_seq);
790 free(taa_data->stack_seq);
791 }
792 }
793 }
794
795 /*
796 * timerlat_aa_init_seqs - Init seq files used to store parsed information
797 *
798 * Instead of keeping data structures to store raw data, use seq files to
799 * store parsed data.
800 *
801 * Allocates and initialize seq files.
802 *
803 * Returns 0 on success, -1 otherwise.
804 */
805 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
806 {
807 struct timerlat_aa_data *taa_data;
808 int i;
809
810 for (i = 0; i < taa_ctx->nr_cpus; i++) {
811
812 taa_data = timerlat_aa_get_data(taa_ctx, i);
813
814 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
815 if (!taa_data->prev_irqs_seq)
816 goto out_err;
817
818 trace_seq_init(taa_data->prev_irqs_seq);
819
820 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
821 if (!taa_data->nmi_seq)
822 goto out_err;
823
824 trace_seq_init(taa_data->nmi_seq);
825
826 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
827 if (!taa_data->irqs_seq)
828 goto out_err;
829
830 trace_seq_init(taa_data->irqs_seq);
831
832 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
833 if (!taa_data->softirqs_seq)
834 goto out_err;
835
836 trace_seq_init(taa_data->softirqs_seq);
837
838 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
839 if (!taa_data->threads_seq)
840 goto out_err;
841
842 trace_seq_init(taa_data->threads_seq);
843
844 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
845 if (!taa_data->stack_seq)
846 goto out_err;
847
848 trace_seq_init(taa_data->stack_seq);
849 }
850
851 return 0;
852
853 out_err:
854 timerlat_aa_destroy_seqs(taa_ctx);
855 return -1;
856 }
857
858 /*
859 * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
860 */
861 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
862 {
863
864 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
865 timerlat_aa_handler, tool);
866
867 tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
868
869 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
870 timerlat_aa_nmi_handler, tool);
871
872 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
873 timerlat_aa_irq_handler, tool);
874
875 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
876 timerlat_aa_softirq_handler, tool);
877
878 tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
879 timerlat_aa_thread_handler, tool);
880
881 tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
882 timerlat_aa_stack_handler, tool);
883 if (!dump_tasks)
884 return;
885
886 tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
887 tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
888 timerlat_aa_sched_switch_handler, tool);
889
890 tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
891 tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
892 timerlat_aa_kworker_start_handler, tool);
893 }
894
895 /*
896 * timerlat_aa_register_events - Register events used in the auto-analysis
897 *
898 * Returns 0 on success, -1 otherwise.
899 */
900 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
901 {
902 int retval;
903
904 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
905 timerlat_aa_handler, tool);
906
907
908 /*
909 * register auto-analysis handlers.
910 */
911 retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
912 if (retval < 0 && !errno) {
913 err_msg("Could not find osnoise events\n");
914 goto out_err;
915 }
916
917 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
918 timerlat_aa_nmi_handler, tool);
919
920 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
921 timerlat_aa_irq_handler, tool);
922
923 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
924 timerlat_aa_softirq_handler, tool);
925
926 tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
927 timerlat_aa_thread_handler, tool);
928
929 tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
930 timerlat_aa_stack_handler, tool);
931
932 if (!dump_tasks)
933 return 0;
934
935 /*
936 * Dump task events.
937 */
938 retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
939 if (retval < 0 && !errno) {
940 err_msg("Could not find sched_switch\n");
941 goto out_err;
942 }
943
944 tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
945 timerlat_aa_sched_switch_handler, tool);
946
947 retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
948 if (retval < 0 && !errno) {
949 err_msg("Could not find workqueue_execute_start\n");
950 goto out_err;
951 }
952
953 tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
954 timerlat_aa_kworker_start_handler, tool);
955
956 return 0;
957
958 out_err:
959 timerlat_aa_unregister_events(tool, dump_tasks);
960 return -1;
961 }
962
963 /**
964 * timerlat_aa_destroy - Destroy timerlat auto-analysis
965 */
966 void timerlat_aa_destroy(void)
967 {
968 struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
969
970 if (!taa_ctx)
971 return;
972
973 if (!taa_ctx->taa_data)
974 goto out_ctx;
975
976 timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
977 timerlat_aa_destroy_seqs(taa_ctx);
978 free(taa_ctx->taa_data);
979 out_ctx:
980 free(taa_ctx);
981 }
982
983 /**
984 * timerlat_aa_init - Initialize timerlat auto-analysis
985 *
986 * Returns 0 on success, -1 otherwise.
987 */
988 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
989 {
990 int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
991 struct timerlat_aa_context *taa_ctx;
992 int retval;
993
994 taa_ctx = calloc(1, sizeof(*taa_ctx));
995 if (!taa_ctx)
996 return -1;
997
998 __timerlat_aa_ctx = taa_ctx;
999
1000 taa_ctx->nr_cpus = nr_cpus;
1001 taa_ctx->tool = tool;
1002 taa_ctx->dump_tasks = dump_tasks;
1003
1004 taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1005 if (!taa_ctx->taa_data)
1006 goto out_err;
1007
1008 retval = timerlat_aa_init_seqs(taa_ctx);
1009 if (retval)
1010 goto out_err;
1011
1012 retval = timerlat_aa_register_events(tool, dump_tasks);
1013 if (retval)
1014 goto out_err;
1015
1016 return 0;
1017
1018 out_err:
1019 timerlat_aa_destroy();
1020 return -1;
1021 }