tools/tracing/rtla/src/timerlat_aa.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
   4  */
   5
   6 #include <stdlib.h>
   7 #include <errno.h>
   8 #include "utils.h"
   9 #include "osnoise.h"
  10 #include "timerlat.h"
  11 #include <unistd.h>
  12
  13 enum timelat_state {
  14         TIMERLAT_INIT = 0,
  15         TIMERLAT_WAITING_IRQ,
  16         TIMERLAT_WAITING_THREAD,
  17 };
  18
  19 #define MAX_COMM                24
  20
  21 /*
  22  * Per-cpu data statistics and data.
  23  */
  24 struct timerlat_aa_data {
  25         /* Current CPU state */
  26         int                     curr_state;
  27
  28         /* timerlat IRQ latency */
  29         unsigned long long      tlat_irq_seqnum;
  30         unsigned long long      tlat_irq_latency;
  31         unsigned long long      tlat_irq_timstamp;
  32
  33         /* timerlat Thread latency */
  34         unsigned long long      tlat_thread_seqnum;
  35         unsigned long long      tlat_thread_latency;
  36         unsigned long long      tlat_thread_timstamp;
  37
  38         /*
  39          * Information about the thread running when the IRQ
  40          * arrived.
  41          *
  42          * This can be blocking or interference, depending on the
  43          * priority of the thread. Assuming timerlat is the highest
  44          * prio, it is blocking. If timerlat has a lower prio, it is
  45          * interference.
  46          * note: "unsigned long long" because they are fetch using tep_get_field_val();
  47          */
  48         unsigned long long      run_thread_pid;
  49         char                    run_thread_comm[MAX_COMM];
  50         unsigned long long      thread_blocking_duration;
  51         unsigned long long      max_exit_idle_latency;
  52
  53         /* Information about the timerlat timer irq */
  54         unsigned long long      timer_irq_start_time;
  55         unsigned long long      timer_irq_start_delay;
  56         unsigned long long      timer_irq_duration;
  57         unsigned long long      timer_exit_from_idle;
  58
  59         /*
  60          * Information about the last IRQ before the timerlat irq
  61          * arrived.
  62          *
  63          * If now - timestamp is <= latency, it might have influenced
  64          * in the timerlat irq latency. Otherwise, ignore it.
  65          */
  66         unsigned long long      prev_irq_duration;
  67         unsigned long long      prev_irq_timstamp;
  68
  69         /*
  70          * Interference sum.
  71          */
  72         unsigned long long      thread_nmi_sum;
  73         unsigned long long      thread_irq_sum;
  74         unsigned long long      thread_softirq_sum;
  75         unsigned long long      thread_thread_sum;
  76
  77         /*
  78          * Interference task information.
  79          */
  80         struct trace_seq        *prev_irqs_seq;
  81         struct trace_seq        *nmi_seq;
  82         struct trace_seq        *irqs_seq;
  83         struct trace_seq        *softirqs_seq;
  84         struct trace_seq        *threads_seq;
  85         struct trace_seq        *stack_seq;
  86
  87         /*
  88          * Current thread.
  89          */
  90         char                    current_comm[MAX_COMM];
  91         unsigned long long      current_pid;
  92
  93         /*
  94          * Is the system running a kworker?
  95          */
  96         unsigned long long      kworker;
  97         unsigned long long      kworker_func;
  98 };
  99
 100 /*
 101  * The analysis context and system wide view
 102  */
 103 struct timerlat_aa_context {
 104         int nr_cpus;
 105         int dump_tasks;
 106
 107         /* per CPU data */
 108         struct timerlat_aa_data *taa_data;
 109
 110         /*
 111          * required to translate function names and register
 112          * events.
 113          */
 114         struct osnoise_tool *tool;
 115 };
 116
 117 /*
 118  * The data is stored as a local variable, but accessed via a helper function.
 119  *
 120  * It could be stored inside the trace context. But every access would
 121  * require container_of() + a series of pointers. Do we need it? Not sure.
 122  *
 123  * For now keep it simple. If needed, store it in the tool, add the *context
 124  * as a parameter in timerlat_aa_get_ctx() and do the magic there.
 125  */
 126 static struct timerlat_aa_context *__timerlat_aa_ctx;
 127
 128 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
 129 {
 130         return __timerlat_aa_ctx;
 131 }
 132
 133 /*
 134  * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
 135  */
 136 static struct timerlat_aa_data
 137 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
 138 {
 139         return &taa_ctx->taa_data[cpu];
 140 }
 141
 142 /*
 143  * timerlat_aa_irq_latency - Handles timerlat IRQ event
 144  */
 145 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
 146                                    struct trace_seq *s, struct tep_record *record,
 147                                    struct tep_event *event)
 148 {
 149         /*
 150          * For interference, we start now looking for things that can delay
 151          * the thread.
 152          */
 153         taa_data->curr_state = TIMERLAT_WAITING_THREAD;
 154         taa_data->tlat_irq_timstamp = record->ts;
 155
 156         /*
 157          * Zero values.
 158          */
 159         taa_data->thread_nmi_sum = 0;
 160         taa_data->thread_irq_sum = 0;
 161         taa_data->thread_softirq_sum = 0;
 162         taa_data->thread_blocking_duration = 0;
 163         taa_data->timer_irq_start_time = 0;
 164         taa_data->timer_irq_duration = 0;
 165         taa_data->timer_exit_from_idle = 0;
 166
 167         /*
 168          * Zero interference tasks.
 169          */
 170         trace_seq_reset(taa_data->nmi_seq);
 171         trace_seq_reset(taa_data->irqs_seq);
 172         trace_seq_reset(taa_data->softirqs_seq);
 173         trace_seq_reset(taa_data->threads_seq);
 174
 175         /* IRQ latency values */
 176         tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
 177         tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
 178
 179         /* The thread that can cause blocking */
 180         tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
 181
 182         /*
 183          * Get exit from idle case.
 184          *
 185          * If it is not idle thread:
 186          */
 187         if (taa_data->run_thread_pid)
 188                 return 0;
 189
 190         /*
 191          * if the latency is shorter than the known exit from idle:
 192          */
 193         if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
 194                 return 0;
 195
 196         /*
 197          * To be safe, ignore the cases in which an IRQ/NMI could have
 198          * interfered with the timerlat IRQ.
 199          */
 200         if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
 201             < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
 202                 return 0;
 203
 204         taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
 205
 206         return 0;
 207 }
 208
 209 /*
 210  * timerlat_aa_thread_latency - Handles timerlat thread event
 211  */
 212 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
 213                                       struct trace_seq *s, struct tep_record *record,
 214                                       struct tep_event *event)
 215 {
 216         /*
 217          * For interference, we start now looking for things that can delay
 218          * the IRQ of the next cycle.
 219          */
 220         taa_data->curr_state = TIMERLAT_WAITING_IRQ;
 221         taa_data->tlat_thread_timstamp = record->ts;
 222
 223         /* Thread latency values */
 224         tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
 225         tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
 226
 227         return 0;
 228 }
 229
 230 /*
 231  * timerlat_aa_handler - Handle timerlat events
 232  *
 233  * This function is called to handle timerlat events recording statistics.
 234  *
 235  * Returns 0 on success, -1 otherwise.
 236  */
 237 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
 238                         struct tep_event *event, void *context)
 239 {
 240         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 241         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 242         unsigned long long thread;
 243
 244         if (!taa_data)
 245                 return -1;
 246
 247         tep_get_field_val(s, event, "context", record, &thread, 1);
 248         if (!thread)
 249                 return timerlat_aa_irq_latency(taa_data, s, record, event);
 250         else
 251                 return timerlat_aa_thread_latency(taa_data, s, record, event);
 252 }
 253
 254 /*
 255  * timerlat_aa_nmi_handler - Handles NMI noise
 256  *
 257  * It is used to collect information about interferences from NMI. It is
 258  * hooked to the osnoise:nmi_noise event.
 259  */
 260 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
 261                                    struct tep_event *event, void *context)
 262 {
 263         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 264         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 265         unsigned long long duration;
 266         unsigned long long start;
 267
 268         tep_get_field_val(s, event, "duration", record, &duration, 1);
 269         tep_get_field_val(s, event, "start", record, &start, 1);
 270
 271         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
 272                 taa_data->prev_irq_duration = duration;
 273                 taa_data->prev_irq_timstamp = start;
 274
 275                 trace_seq_reset(taa_data->prev_irqs_seq);
 276                 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s       \t\t\t%9.2f us\n",
 277                          "nmi", ns_to_usf(duration));
 278                 return 0;
 279         }
 280
 281         taa_data->thread_nmi_sum += duration;
 282         trace_seq_printf(taa_data->nmi_seq, "   %24s    \t\t\t%9.2f us\n",
 283                  "nmi", ns_to_usf(duration));
 284
 285         return 0;
 286 }
 287
 288 /*
 289  * timerlat_aa_irq_handler - Handles IRQ noise
 290  *
 291  * It is used to collect information about interferences from IRQ. It is
 292  * hooked to the osnoise:irq_noise event.
 293  *
 294  * It is a little bit more complex than the other because it measures:
 295  *      - The IRQs that can delay the timer IRQ before it happened.
 296  *      - The Timerlat IRQ handler
 297  *      - The IRQs that happened between the timerlat IRQ and the timerlat thread
 298  *        (IRQ interference).
 299  */
 300 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
 301                                    struct tep_event *event, void *context)
 302 {
 303         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 304         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 305         unsigned long long expected_start;
 306         unsigned long long duration;
 307         unsigned long long vector;
 308         unsigned long long start;
 309         char *desc;
 310         int val;
 311
 312         tep_get_field_val(s, event, "duration", record, &duration, 1);
 313         tep_get_field_val(s, event, "start", record, &start, 1);
 314         tep_get_field_val(s, event, "vector", record, &vector, 1);
 315         desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
 316
 317         /*
 318          * Before the timerlat IRQ.
 319          */
 320         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
 321                 taa_data->prev_irq_duration = duration;
 322                 taa_data->prev_irq_timstamp = start;
 323
 324                 trace_seq_reset(taa_data->prev_irqs_seq);
 325                 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu        \t\t%9.2f us\n",
 326                                  desc, vector, ns_to_usf(duration));
 327                 return 0;
 328         }
 329
 330         /*
 331          * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
 332          * the timerlat irq handler.
 333          */
 334         if (!taa_data->timer_irq_start_time) {
 335                 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
 336
 337                 taa_data->timer_irq_start_time = start;
 338                 taa_data->timer_irq_duration = duration;
 339
 340                 taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
 341
 342                 /*
 343                  * not exit from idle.
 344                  */
 345                 if (taa_data->run_thread_pid)
 346                         return 0;
 347
 348                 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
 349                         taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
 350
 351                 return 0;
 352         }
 353
 354         /*
 355          * IRQ interference.
 356          */
 357         taa_data->thread_irq_sum += duration;
 358         trace_seq_printf(taa_data->irqs_seq, "  %24s:%-3llu     \t      %9.2f us\n",
 359                          desc, vector, ns_to_usf(duration));
 360
 361         return 0;
 362 }
 363
 364 static char *softirq_name[] = { "HI", "TIMER",  "NET_TX", "NET_RX", "BLOCK",
 365                                 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
 366
 367
 368 /*
 369  * timerlat_aa_softirq_handler - Handles Softirq noise
 370  *
 371  * It is used to collect information about interferences from Softirq. It is
 372  * hooked to the osnoise:softirq_noise event.
 373  *
 374  * It is only printed in the non-rt kernel, as softirqs become thread on RT.
 375  */
 376 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
 377                                        struct tep_event *event, void *context)
 378 {
 379         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 380         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 381         unsigned long long duration;
 382         unsigned long long vector;
 383         unsigned long long start;
 384
 385         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
 386                 return 0;
 387
 388         tep_get_field_val(s, event, "duration", record, &duration, 1);
 389         tep_get_field_val(s, event, "start", record, &start, 1);
 390         tep_get_field_val(s, event, "vector", record, &vector, 1);
 391
 392         taa_data->thread_softirq_sum += duration;
 393
 394         trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t      %9.2f us\n",
 395                          softirq_name[vector], vector, ns_to_usf(duration));
 396         return 0;
 397 }
 398
 399 /*
 400  * timerlat_aa_softirq_handler - Handles thread noise
 401  *
 402  * It is used to collect information about interferences from threads. It is
 403  * hooked to the osnoise:thread_noise event.
 404  *
 405  * Note: if you see thread noise, your timerlat thread was not the highest prio one.
 406  */
 407 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
 408                                       struct tep_event *event, void *context)
 409 {
 410         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 411         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 412         unsigned long long duration;
 413         unsigned long long start;
 414         unsigned long long pid;
 415         const char *comm;
 416         int val;
 417
 418         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
 419                 return 0;
 420
 421         tep_get_field_val(s, event, "duration", record, &duration, 1);
 422         tep_get_field_val(s, event, "start", record, &start, 1);
 423
 424         tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
 425         comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
 426
 427         if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
 428                 taa_data->thread_blocking_duration = duration;
 429
 430                 if (comm)
 431                         strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
 432                 else
 433                         sprintf(taa_data->run_thread_comm, "<...>");
 434
 435         } else {
 436                 taa_data->thread_thread_sum += duration;
 437
 438                 trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu  \t\t%9.2f us\n",
 439                          comm, pid, ns_to_usf(duration));
 440         }
 441
 442         return 0;
 443 }
 444
 445 /*
 446  * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
 447  *
 448  * Saves and parse the stack trace generated by the timerlat IRQ.
 449  */
 450 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
 451                               struct tep_event *event, void *context)
 452 {
 453         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 454         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 455         unsigned long *caller;
 456         const char *function;
 457         int val, i;
 458
 459         trace_seq_reset(taa_data->stack_seq);
 460
 461         trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
 462         caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
 463         if (caller) {
 464                 for (i = 0; ; i++) {
 465                         function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
 466                         if (!function)
 467                                 break;
 468                         trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
 469                 }
 470         }
 471         return 0;
 472 }
 473
 474 /*
 475  * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
 476  *
 477  * Handles the sched:sched_switch event to trace the current thread running on the
 478  * CPU. It is used to display the threads running on the other CPUs when the trace
 479  * stops.
 480  */
 481 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
 482                                             struct tep_event *event, void *context)
 483 {
 484         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 485         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 486         const char *comm;
 487         int val;
 488
 489         tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
 490         comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
 491
 492         strncpy(taa_data->current_comm, comm, MAX_COMM);
 493
 494         /*
 495          * If this was a kworker, clean the last kworkers that ran.
 496          */
 497         taa_data->kworker = 0;
 498         taa_data->kworker_func = 0;
 499
 500         return 0;
 501 }
 502
 503 /*
 504  * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
 505  *
 506  * Handles workqueue:workqueue_execute_start event, keeping track of
 507  * the job that a kworker could be doing in the CPU.
 508  *
 509  * We already catch problems of hardware related latencies caused by work queues
 510  * running driver code that causes hardware stall. For example, with DRM drivers.
 511  */
 512 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
 513                                              struct tep_event *event, void *context)
 514 {
 515         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 516         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 517
 518         tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
 519         tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
 520         return 0;
 521 }
 522
 523 /*
 524  * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
 525  *
 526  * This is the core of the analysis.
 527  */
 528 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
 529                                      int irq_thresh, int thread_thresh)
 530 {
 531         unsigned long long exp_irq_ts;
 532         int total;
 533         int irq;
 534
 535         /*
 536          * IRQ latency or Thread latency?
 537          */
 538         if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
 539                 irq = 1;
 540                 total = taa_data->tlat_irq_latency;
 541         } else {
 542                 irq = 0;
 543                 total = taa_data->tlat_thread_latency;
 544         }
 545
 546         /*
 547          * Expected IRQ arrival time using the trace clock as the base.
 548          */
 549         exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
 550
 551         if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
 552                 printf("  Previous IRQ interference:    \t\t up to  %9.2f us\n",
 553                         ns_to_usf(taa_data->prev_irq_duration));
 554
 555         /*
 556          * The delay that the IRQ suffered before starting.
 557          */
 558         printf("  IRQ handler delay:            %16s    %9.2f us (%.2f %%)\n",
 559                 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
 560                 ns_to_usf(taa_data->timer_irq_start_delay),
 561                 ns_to_per(total, taa_data->timer_irq_start_delay));
 562
 563         /*
 564          * Timerlat IRQ.
 565          */
 566         printf("  IRQ latency:  \t\t\t\t        %9.2f us\n",
 567                 ns_to_usf(taa_data->tlat_irq_latency));
 568
 569         if (irq) {
 570                 /*
 571                  * If the trace stopped due to IRQ, the other events will not happen
 572                  * because... the trace stopped :-).
 573                  *
 574                  * That is all folks, the stack trace was printed before the stop,
 575                  * so it will be displayed, it is the key.
 576                  */
 577                 printf("  Blocking thread:\n");
 578                 printf("        %24s:%-9llu\n",
 579                         taa_data->run_thread_comm, taa_data->run_thread_pid);
 580         } else  {
 581                 /*
 582                  * The duration of the IRQ handler that handled the timerlat IRQ.
 583                  */
 584                 printf("  Timerlat IRQ duration:        \t\t    %9.2f us (%.2f %%)\n",
 585                         ns_to_usf(taa_data->timer_irq_duration),
 586                         ns_to_per(total, taa_data->timer_irq_duration));
 587
 588                 /*
 589                  * The amount of time that the current thread postponed the scheduler.
 590                  *
 591                  * Recalling that it is net from NMI/IRQ/Softirq interference, so there
 592                  * is no need to compute values here.
 593                  */
 594                 printf("  Blocking thread:      \t\t\t  %9.2f us (%.2f %%)\n",
 595                         ns_to_usf(taa_data->thread_blocking_duration),
 596                         ns_to_per(total, taa_data->thread_blocking_duration));
 597
 598                 printf("        %24s:%-9llu             %9.2f us\n",
 599                         taa_data->run_thread_comm, taa_data->run_thread_pid,
 600                         ns_to_usf(taa_data->thread_blocking_duration));
 601         }
 602
 603         /*
 604          * Print the stack trace!
 605          */
 606         trace_seq_do_printf(taa_data->stack_seq);
 607
 608         /*
 609          * NMIs can happen during the IRQ, so they are always possible.
 610          */
 611         if (taa_data->thread_nmi_sum)
 612                 printf("  NMI interference      \t\t\t  %9.2f us (%.2f %%)\n",
 613                         ns_to_usf(taa_data->thread_nmi_sum),
 614                         ns_to_per(total, taa_data->thread_nmi_sum));
 615
 616         /*
 617          * If it is an IRQ latency, the other factors can be skipped.
 618          */
 619         if (irq)
 620                 goto print_total;
 621
 622         /*
 623          * Prints the interference caused by IRQs to the thread latency.
 624          */
 625         if (taa_data->thread_irq_sum) {
 626                 printf("  IRQ interference      \t\t\t  %9.2f us (%.2f %%)\n",
 627                         ns_to_usf(taa_data->thread_irq_sum),
 628                         ns_to_per(total, taa_data->thread_irq_sum));
 629
 630                 trace_seq_do_printf(taa_data->irqs_seq);
 631         }
 632
 633         /*
 634          * Prints the interference caused by Softirqs to the thread latency.
 635          */
 636         if (taa_data->thread_softirq_sum) {
 637                 printf("  Softirq interference  \t\t\t  %9.2f us (%.2f %%)\n",
 638                         ns_to_usf(taa_data->thread_softirq_sum),
 639                         ns_to_per(total, taa_data->thread_softirq_sum));
 640
 641                 trace_seq_do_printf(taa_data->softirqs_seq);
 642         }
 643
 644         /*
 645          * Prints the interference caused by other threads to the thread latency.
 646          *
 647          * If this happens, your timerlat is not the highest prio. OK, migration
 648          * thread can happen. But otherwise, you are not measuring the "scheduling
 649          * latency" only, and here is the difference from scheduling latency and
 650          * timer handling latency.
 651          */
 652         if (taa_data->thread_thread_sum) {
 653                 printf("  Thread interference   \t\t\t  %9.2f us (%.2f %%)\n",
 654                         ns_to_usf(taa_data->thread_thread_sum),
 655                         ns_to_per(total, taa_data->thread_thread_sum));
 656
 657                 trace_seq_do_printf(taa_data->threads_seq);
 658         }
 659
 660         /*
 661          * Done.
 662          */
 663 print_total:
 664         printf("------------------------------------------------------------------------\n");
 665         printf("  %s latency:   \t\t\t  %9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
 666                 ns_to_usf(total));
 667 }
 668
 669 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
 670 {
 671         struct trace_instance *trace = &taa_ctx->tool->trace;
 672         int retval;
 673
 674         retval = tracefs_iterate_raw_events(trace->tep,
 675                                             trace->inst,
 676                                             NULL,
 677                                             0,
 678                                             collect_registered_events,
 679                                             trace);
 680                 if (retval < 0) {
 681                         err_msg("Error iterating on events\n");
 682                         return 0;
 683                 }
 684
 685         return 1;
 686 }
 687
 688 /**
 689  * timerlat_auto_analysis - Analyze the collected data
 690  */
 691 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
 692 {
 693         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 694         unsigned long long max_exit_from_idle = 0;
 695         struct timerlat_aa_data *taa_data;
 696         int max_exit_from_idle_cpu;
 697         struct tep_handle *tep;
 698         int cpu;
 699
 700         timerlat_auto_analysis_collect_trace(taa_ctx);
 701
 702         /* bring stop tracing to the ns scale */
 703         irq_thresh = irq_thresh * 1000;
 704         thread_thresh = thread_thresh * 1000;
 705
 706         for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
 707                 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
 708
 709                 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
 710                         printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
 711                         timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
 712                 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
 713                         printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
 714                         timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
 715                 }
 716
 717                 if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
 718                         max_exit_from_idle = taa_data->max_exit_idle_latency;
 719                         max_exit_from_idle_cpu = cpu;
 720                 }
 721
 722         }
 723
 724         if (max_exit_from_idle) {
 725                 printf("\n");
 726                 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
 727                         ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
 728         }
 729         if (!taa_ctx->dump_tasks)
 730                 return;
 731
 732         printf("\n");
 733         printf("Printing CPU tasks:\n");
 734         for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
 735                 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
 736                 tep = taa_ctx->tool->trace.tep;
 737
 738                 printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
 739
 740                 if (taa_data->kworker_func)
 741                         printf(" kworker:%s:%s",
 742                                 tep_find_function(tep, taa_data->kworker) ? : "<...>",
 743                                 tep_find_function(tep, taa_data->kworker_func));
 744                 printf("\n");
 745         }
 746
 747 }
 748
 749 /*
 750  * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
 751  */
 752 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
 753 {
 754         struct timerlat_aa_data *taa_data;
 755         int i;
 756
 757         if (!taa_ctx->taa_data)
 758                 return;
 759
 760         for (i = 0; i < taa_ctx->nr_cpus; i++) {
 761                 taa_data = timerlat_aa_get_data(taa_ctx, i);
 762
 763                 if (taa_data->prev_irqs_seq) {
 764                         trace_seq_destroy(taa_data->prev_irqs_seq);
 765                         free(taa_data->prev_irqs_seq);
 766                 }
 767
 768                 if (taa_data->nmi_seq) {
 769                         trace_seq_destroy(taa_data->nmi_seq);
 770                         free(taa_data->nmi_seq);
 771                 }
 772
 773                 if (taa_data->irqs_seq) {
 774                         trace_seq_destroy(taa_data->irqs_seq);
 775                         free(taa_data->irqs_seq);
 776                 }
 777
 778                 if (taa_data->softirqs_seq) {
 779                         trace_seq_destroy(taa_data->softirqs_seq);
 780                         free(taa_data->softirqs_seq);
 781                 }
 782
 783                 if (taa_data->threads_seq) {
 784                         trace_seq_destroy(taa_data->threads_seq);
 785                         free(taa_data->threads_seq);
 786                 }
 787
 788                 if (taa_data->stack_seq) {
 789                         trace_seq_destroy(taa_data->stack_seq);
 790                         free(taa_data->stack_seq);
 791                 }
 792         }
 793 }
 794
 795 /*
 796  * timerlat_aa_init_seqs - Init seq files used to store parsed information
 797  *
 798  * Instead of keeping data structures to store raw data, use seq files to
 799  * store parsed data.
 800  *
 801  * Allocates and initialize seq files.
 802  *
 803  * Returns 0 on success, -1 otherwise.
 804  */
 805 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
 806 {
 807         struct timerlat_aa_data *taa_data;
 808         int i;
 809
 810         for (i = 0; i < taa_ctx->nr_cpus; i++) {
 811
 812                 taa_data = timerlat_aa_get_data(taa_ctx, i);
 813
 814                 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
 815                 if (!taa_data->prev_irqs_seq)
 816                         goto out_err;
 817
 818                 trace_seq_init(taa_data->prev_irqs_seq);
 819
 820                 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
 821                 if (!taa_data->nmi_seq)
 822                         goto out_err;
 823
 824                 trace_seq_init(taa_data->nmi_seq);
 825
 826                 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
 827                 if (!taa_data->irqs_seq)
 828                         goto out_err;
 829
 830                 trace_seq_init(taa_data->irqs_seq);
 831
 832                 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
 833                 if (!taa_data->softirqs_seq)
 834                         goto out_err;
 835
 836                 trace_seq_init(taa_data->softirqs_seq);
 837
 838                 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
 839                 if (!taa_data->threads_seq)
 840                         goto out_err;
 841
 842                 trace_seq_init(taa_data->threads_seq);
 843
 844                 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
 845                 if (!taa_data->stack_seq)
 846                         goto out_err;
 847
 848                 trace_seq_init(taa_data->stack_seq);
 849         }
 850
 851         return 0;
 852
 853 out_err:
 854         timerlat_aa_destroy_seqs(taa_ctx);
 855         return -1;
 856 }
 857
 858 /*
 859  * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
 860  */
 861 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
 862 {
 863
 864         tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
 865                                      timerlat_aa_handler, tool);
 866
 867         tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
 868
 869         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
 870                                      timerlat_aa_nmi_handler, tool);
 871
 872         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
 873                                      timerlat_aa_irq_handler, tool);
 874
 875         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
 876                                      timerlat_aa_softirq_handler, tool);
 877
 878         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
 879                                      timerlat_aa_thread_handler, tool);
 880
 881         tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
 882                                      timerlat_aa_stack_handler, tool);
 883         if (!dump_tasks)
 884                 return;
 885
 886         tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
 887         tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
 888                                      timerlat_aa_sched_switch_handler, tool);
 889
 890         tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
 891         tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
 892                                      timerlat_aa_kworker_start_handler, tool);
 893 }
 894
 895 /*
 896  * timerlat_aa_register_events - Register events used in the auto-analysis
 897  *
 898  * Returns 0 on success, -1 otherwise.
 899  */
 900 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
 901 {
 902         int retval;
 903
 904         tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
 905                                 timerlat_aa_handler, tool);
 906
 907
 908         /*
 909          * register auto-analysis handlers.
 910          */
 911         retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
 912         if (retval < 0 && !errno) {
 913                 err_msg("Could not find osnoise events\n");
 914                 goto out_err;
 915         }
 916
 917         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
 918                                    timerlat_aa_nmi_handler, tool);
 919
 920         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
 921                                    timerlat_aa_irq_handler, tool);
 922
 923         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
 924                                    timerlat_aa_softirq_handler, tool);
 925
 926         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
 927                                    timerlat_aa_thread_handler, tool);
 928
 929         tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
 930                                    timerlat_aa_stack_handler, tool);
 931
 932         if (!dump_tasks)
 933                 return 0;
 934
 935         /*
 936          * Dump task events.
 937          */
 938         retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
 939         if (retval < 0 && !errno) {
 940                 err_msg("Could not find sched_switch\n");
 941                 goto out_err;
 942         }
 943
 944         tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
 945                                    timerlat_aa_sched_switch_handler, tool);
 946
 947         retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
 948         if (retval < 0 && !errno) {
 949                 err_msg("Could not find workqueue_execute_start\n");
 950                 goto out_err;
 951         }
 952
 953         tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
 954                                    timerlat_aa_kworker_start_handler, tool);
 955
 956         return 0;
 957
 958 out_err:
 959         timerlat_aa_unregister_events(tool, dump_tasks);
 960         return -1;
 961 }
 962
 963 /**
 964  * timerlat_aa_destroy - Destroy timerlat auto-analysis
 965  */
 966 void timerlat_aa_destroy(void)
 967 {
 968         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 969
 970         if (!taa_ctx)
 971                 return;
 972
 973         if (!taa_ctx->taa_data)
 974                 goto out_ctx;
 975
 976         timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
 977         timerlat_aa_destroy_seqs(taa_ctx);
 978         free(taa_ctx->taa_data);
 979 out_ctx:
 980         free(taa_ctx);
 981 }
 982
 983 /**
 984  * timerlat_aa_init - Initialize timerlat auto-analysis
 985  *
 986  * Returns 0 on success, -1 otherwise.
 987  */
 988 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
 989 {
 990         int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
 991         struct timerlat_aa_context *taa_ctx;
 992         int retval;
 993
 994         taa_ctx = calloc(1, sizeof(*taa_ctx));
 995         if (!taa_ctx)
 996                 return -1;
 997
 998         __timerlat_aa_ctx = taa_ctx;
 999
1000         taa_ctx->nr_cpus = nr_cpus;
1001         taa_ctx->tool = tool;
1002         taa_ctx->dump_tasks = dump_tasks;
1003
1004         taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1005         if (!taa_ctx->taa_data)
1006                 goto out_err;
1007
1008         retval = timerlat_aa_init_seqs(taa_ctx);
1009         if (retval)
1010                 goto out_err;
1011
1012         retval = timerlat_aa_register_events(tool, dump_tasks);
1013         if (retval)
1014                 goto out_err;
1015
1016         return 0;
1017
1018 out_err:
1019         timerlat_aa_destroy();
1020         return -1;
1021 }