tools/tracing/rtla/src/timerlat_aa.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
   4  */
   5
   6 #include <stdlib.h>
   7 #include <errno.h>
   8 #include "utils.h"
   9 #include "osnoise.h"
  10 #include "timerlat.h"
  11 #include <unistd.h>
  12
  13 enum timelat_state {
  14         TIMERLAT_INIT = 0,
  15         TIMERLAT_WAITING_IRQ,
  16         TIMERLAT_WAITING_THREAD,
  17 };
  18
  19 #define MAX_COMM                24
  20
  21 /*
  22  * Per-cpu data statistics and data.
  23  */
  24 struct timerlat_aa_data {
  25         /* Current CPU state */
  26         int                     curr_state;
  27
  28         /* timerlat IRQ latency */
  29         unsigned long long      tlat_irq_seqnum;
  30         unsigned long long      tlat_irq_latency;
  31         unsigned long long      tlat_irq_timstamp;
  32
  33         /* timerlat Thread latency */
  34         unsigned long long      tlat_thread_seqnum;
  35         unsigned long long      tlat_thread_latency;
  36         unsigned long long      tlat_thread_timstamp;
  37
  38         /*
  39          * Information about the thread running when the IRQ
  40          * arrived.
  41          *
  42          * This can be blocking or interference, depending on the
  43          * priority of the thread. Assuming timerlat is the highest
  44          * prio, it is blocking. If timerlat has a lower prio, it is
  45          * interference.
  46          * note: "unsigned long long" because they are fetch using tep_get_field_val();
  47          */
  48         unsigned long long      run_thread_pid;
  49         char                    run_thread_comm[MAX_COMM];
  50         unsigned long long      thread_blocking_duration;
  51         unsigned long long      max_exit_idle_latency;
  52
  53         /* Information about the timerlat timer irq */
  54         unsigned long long      timer_irq_start_time;
  55         unsigned long long      timer_irq_start_delay;
  56         unsigned long long      timer_irq_duration;
  57         unsigned long long      timer_exit_from_idle;
  58
  59         /*
  60          * Information about the last IRQ before the timerlat irq
  61          * arrived.
  62          *
  63          * If now - timestamp is <= latency, it might have influenced
  64          * in the timerlat irq latency. Otherwise, ignore it.
  65          */
  66         unsigned long long      prev_irq_duration;
  67         unsigned long long      prev_irq_timstamp;
  68
  69         /*
  70          * Interference sum.
  71          */
  72         unsigned long long      thread_nmi_sum;
  73         unsigned long long      thread_irq_sum;
  74         unsigned long long      thread_softirq_sum;
  75         unsigned long long      thread_thread_sum;
  76
  77         /*
  78          * Interference task information.
  79          */
  80         struct trace_seq        *prev_irqs_seq;
  81         struct trace_seq        *nmi_seq;
  82         struct trace_seq        *irqs_seq;
  83         struct trace_seq        *softirqs_seq;
  84         struct trace_seq        *threads_seq;
  85         struct trace_seq        *stack_seq;
  86
  87         /*
  88          * Current thread.
  89          */
  90         char                    current_comm[MAX_COMM];
  91         unsigned long long      current_pid;
  92
  93         /*
  94          * Is the system running a kworker?
  95          */
  96         unsigned long long      kworker;
  97         unsigned long long      kworker_func;
  98 };
  99
 100 /*
 101  * The analysis context and system wide view
 102  */
 103 struct timerlat_aa_context {
 104         int nr_cpus;
 105         int dump_tasks;
 106
 107         /* per CPU data */
 108         struct timerlat_aa_data *taa_data;
 109
 110         /*
 111          * required to translate function names and register
 112          * events.
 113          */
 114         struct osnoise_tool *tool;
 115 };
 116
 117 /*
 118  * The data is stored as a local variable, but accessed via a helper function.
 119  *
 120  * It could be stored inside the trace context. But every access would
 121  * require container_of() + a series of pointers. Do we need it? Not sure.
 122  *
 123  * For now keep it simple. If needed, store it in the tool, add the *context
 124  * as a parameter in timerlat_aa_get_ctx() and do the magic there.
 125  */
 126 static struct timerlat_aa_context *__timerlat_aa_ctx;
 127
 128 static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
 129 {
 130         return __timerlat_aa_ctx;
 131 }
 132
 133 /*
 134  * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
 135  */
 136 static struct timerlat_aa_data
 137 *timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
 138 {
 139         return &taa_ctx->taa_data[cpu];
 140 }
 141
 142 /*
 143  * timerlat_aa_irq_latency - Handles timerlat IRQ event
 144  */
 145 static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
 146                                    struct trace_seq *s, struct tep_record *record,
 147                                    struct tep_event *event)
 148 {
 149         /*
 150          * For interference, we start now looking for things that can delay
 151          * the thread.
 152          */
 153         taa_data->curr_state = TIMERLAT_WAITING_THREAD;
 154         taa_data->tlat_irq_timstamp = record->ts;
 155
 156         /*
 157          * Zero values.
 158          */
 159         taa_data->thread_nmi_sum = 0;
 160         taa_data->thread_irq_sum = 0;
 161         taa_data->thread_softirq_sum = 0;
 162         taa_data->thread_thread_sum = 0;
 163         taa_data->thread_blocking_duration = 0;
 164         taa_data->timer_irq_start_time = 0;
 165         taa_data->timer_irq_duration = 0;
 166         taa_data->timer_exit_from_idle = 0;
 167
 168         /*
 169          * Zero interference tasks.
 170          */
 171         trace_seq_reset(taa_data->nmi_seq);
 172         trace_seq_reset(taa_data->irqs_seq);
 173         trace_seq_reset(taa_data->softirqs_seq);
 174         trace_seq_reset(taa_data->threads_seq);
 175
 176         /* IRQ latency values */
 177         tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
 178         tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
 179
 180         /* The thread that can cause blocking */
 181         tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
 182
 183         /*
 184          * Get exit from idle case.
 185          *
 186          * If it is not idle thread:
 187          */
 188         if (taa_data->run_thread_pid)
 189                 return 0;
 190
 191         /*
 192          * if the latency is shorter than the known exit from idle:
 193          */
 194         if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
 195                 return 0;
 196
 197         /*
 198          * To be safe, ignore the cases in which an IRQ/NMI could have
 199          * interfered with the timerlat IRQ.
 200          */
 201         if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
 202             < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
 203                 return 0;
 204
 205         taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
 206
 207         return 0;
 208 }
 209
 210 /*
 211  * timerlat_aa_thread_latency - Handles timerlat thread event
 212  */
 213 static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
 214                                       struct trace_seq *s, struct tep_record *record,
 215                                       struct tep_event *event)
 216 {
 217         /*
 218          * For interference, we start now looking for things that can delay
 219          * the IRQ of the next cycle.
 220          */
 221         taa_data->curr_state = TIMERLAT_WAITING_IRQ;
 222         taa_data->tlat_thread_timstamp = record->ts;
 223
 224         /* Thread latency values */
 225         tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
 226         tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
 227
 228         return 0;
 229 }
 230
 231 /*
 232  * timerlat_aa_handler - Handle timerlat events
 233  *
 234  * This function is called to handle timerlat events recording statistics.
 235  *
 236  * Returns 0 on success, -1 otherwise.
 237  */
 238 static int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
 239                         struct tep_event *event, void *context)
 240 {
 241         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 242         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 243         unsigned long long thread;
 244
 245         if (!taa_data)
 246                 return -1;
 247
 248         tep_get_field_val(s, event, "context", record, &thread, 1);
 249         if (!thread)
 250                 return timerlat_aa_irq_latency(taa_data, s, record, event);
 251         else
 252                 return timerlat_aa_thread_latency(taa_data, s, record, event);
 253 }
 254
 255 /*
 256  * timerlat_aa_nmi_handler - Handles NMI noise
 257  *
 258  * It is used to collect information about interferences from NMI. It is
 259  * hooked to the osnoise:nmi_noise event.
 260  */
 261 static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
 262                                    struct tep_event *event, void *context)
 263 {
 264         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 265         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 266         unsigned long long duration;
 267         unsigned long long start;
 268
 269         tep_get_field_val(s, event, "duration", record, &duration, 1);
 270         tep_get_field_val(s, event, "start", record, &start, 1);
 271
 272         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
 273                 taa_data->prev_irq_duration = duration;
 274                 taa_data->prev_irq_timstamp = start;
 275
 276                 trace_seq_reset(taa_data->prev_irqs_seq);
 277                 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s       \t\t\t%9.2f us\n",
 278                          "nmi", ns_to_usf(duration));
 279                 return 0;
 280         }
 281
 282         taa_data->thread_nmi_sum += duration;
 283         trace_seq_printf(taa_data->nmi_seq, "   %24s    \t\t\t%9.2f us\n",
 284                  "nmi", ns_to_usf(duration));
 285
 286         return 0;
 287 }
 288
 289 /*
 290  * timerlat_aa_irq_handler - Handles IRQ noise
 291  *
 292  * It is used to collect information about interferences from IRQ. It is
 293  * hooked to the osnoise:irq_noise event.
 294  *
 295  * It is a little bit more complex than the other because it measures:
 296  *      - The IRQs that can delay the timer IRQ before it happened.
 297  *      - The Timerlat IRQ handler
 298  *      - The IRQs that happened between the timerlat IRQ and the timerlat thread
 299  *        (IRQ interference).
 300  */
 301 static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
 302                                    struct tep_event *event, void *context)
 303 {
 304         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 305         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 306         unsigned long long expected_start;
 307         unsigned long long duration;
 308         unsigned long long vector;
 309         unsigned long long start;
 310         char *desc;
 311         int val;
 312
 313         tep_get_field_val(s, event, "duration", record, &duration, 1);
 314         tep_get_field_val(s, event, "start", record, &start, 1);
 315         tep_get_field_val(s, event, "vector", record, &vector, 1);
 316         desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
 317
 318         /*
 319          * Before the timerlat IRQ.
 320          */
 321         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
 322                 taa_data->prev_irq_duration = duration;
 323                 taa_data->prev_irq_timstamp = start;
 324
 325                 trace_seq_reset(taa_data->prev_irqs_seq);
 326                 trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu        \t\t%9.2f us\n",
 327                                  desc, vector, ns_to_usf(duration));
 328                 return 0;
 329         }
 330
 331         /*
 332          * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
 333          * the timerlat irq handler.
 334          */
 335         if (!taa_data->timer_irq_start_time) {
 336                 expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
 337
 338                 taa_data->timer_irq_start_time = start;
 339                 taa_data->timer_irq_duration = duration;
 340
 341                 /*
 342                  * We are dealing with two different clock sources: the
 343                  * external clock source that timerlat uses as a reference
 344                  * and the clock used by the tracer. There are also two
 345                  * moments: the time reading the clock and the timer in
 346                  * which the event is placed in the buffer (the trace
 347                  * event timestamp). If the processor is slow or there
 348                  * is some hardware noise, the difference between the
 349                  * timestamp and the external clock read can be longer
 350                  * than the IRQ handler delay, resulting in a negative
 351                  * time. If so, set IRQ start delay as 0. In the end,
 352                  * it is less relevant than the noise.
 353                  */
 354                 if (expected_start < taa_data->timer_irq_start_time)
 355                         taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
 356                 else
 357                         taa_data->timer_irq_start_delay = 0;
 358
 359                 /*
 360                  * not exit from idle.
 361                  */
 362                 if (taa_data->run_thread_pid)
 363                         return 0;
 364
 365                 if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
 366                         taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
 367
 368                 return 0;
 369         }
 370
 371         /*
 372          * IRQ interference.
 373          */
 374         taa_data->thread_irq_sum += duration;
 375         trace_seq_printf(taa_data->irqs_seq, "  %24s:%-3llu     \t      %9.2f us\n",
 376                          desc, vector, ns_to_usf(duration));
 377
 378         return 0;
 379 }
 380
 381 static char *softirq_name[] = { "HI", "TIMER",  "NET_TX", "NET_RX", "BLOCK",
 382                                 "IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
 383
 384
 385 /*
 386  * timerlat_aa_softirq_handler - Handles Softirq noise
 387  *
 388  * It is used to collect information about interferences from Softirq. It is
 389  * hooked to the osnoise:softirq_noise event.
 390  *
 391  * It is only printed in the non-rt kernel, as softirqs become thread on RT.
 392  */
 393 static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
 394                                        struct tep_event *event, void *context)
 395 {
 396         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 397         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 398         unsigned long long duration;
 399         unsigned long long vector;
 400         unsigned long long start;
 401
 402         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
 403                 return 0;
 404
 405         tep_get_field_val(s, event, "duration", record, &duration, 1);
 406         tep_get_field_val(s, event, "start", record, &start, 1);
 407         tep_get_field_val(s, event, "vector", record, &vector, 1);
 408
 409         taa_data->thread_softirq_sum += duration;
 410
 411         trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t      %9.2f us\n",
 412                          softirq_name[vector], vector, ns_to_usf(duration));
 413         return 0;
 414 }
 415
 416 /*
 417  * timerlat_aa_softirq_handler - Handles thread noise
 418  *
 419  * It is used to collect information about interferences from threads. It is
 420  * hooked to the osnoise:thread_noise event.
 421  *
 422  * Note: if you see thread noise, your timerlat thread was not the highest prio one.
 423  */
 424 static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
 425                                       struct tep_event *event, void *context)
 426 {
 427         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 428         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 429         unsigned long long duration;
 430         unsigned long long start;
 431         unsigned long long pid;
 432         const char *comm;
 433         int val;
 434
 435         if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
 436                 return 0;
 437
 438         tep_get_field_val(s, event, "duration", record, &duration, 1);
 439         tep_get_field_val(s, event, "start", record, &start, 1);
 440
 441         tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
 442         comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
 443
 444         if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
 445                 taa_data->thread_blocking_duration = duration;
 446
 447                 if (comm)
 448                         strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
 449                 else
 450                         sprintf(taa_data->run_thread_comm, "<...>");
 451
 452         } else {
 453                 taa_data->thread_thread_sum += duration;
 454
 455                 trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu  \t\t%9.2f us\n",
 456                          comm, pid, ns_to_usf(duration));
 457         }
 458
 459         return 0;
 460 }
 461
 462 /*
 463  * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
 464  *
 465  * Saves and parse the stack trace generated by the timerlat IRQ.
 466  */
 467 static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
 468                               struct tep_event *event, void *context)
 469 {
 470         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 471         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 472         unsigned long *caller;
 473         const char *function;
 474         int val, i;
 475
 476         trace_seq_reset(taa_data->stack_seq);
 477
 478         trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
 479         caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
 480         if (caller) {
 481                 for (i = 0; ; i++) {
 482                         function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
 483                         if (!function)
 484                                 break;
 485                         trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
 486                 }
 487         }
 488         return 0;
 489 }
 490
 491 /*
 492  * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
 493  *
 494  * Handles the sched:sched_switch event to trace the current thread running on the
 495  * CPU. It is used to display the threads running on the other CPUs when the trace
 496  * stops.
 497  */
 498 static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
 499                                             struct tep_event *event, void *context)
 500 {
 501         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 502         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 503         const char *comm;
 504         int val;
 505
 506         tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
 507         comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
 508
 509         strncpy(taa_data->current_comm, comm, MAX_COMM);
 510
 511         /*
 512          * If this was a kworker, clean the last kworkers that ran.
 513          */
 514         taa_data->kworker = 0;
 515         taa_data->kworker_func = 0;
 516
 517         return 0;
 518 }
 519
 520 /*
 521  * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
 522  *
 523  * Handles workqueue:workqueue_execute_start event, keeping track of
 524  * the job that a kworker could be doing in the CPU.
 525  *
 526  * We already catch problems of hardware related latencies caused by work queues
 527  * running driver code that causes hardware stall. For example, with DRM drivers.
 528  */
 529 static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
 530                                              struct tep_event *event, void *context)
 531 {
 532         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 533         struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
 534
 535         tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
 536         tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
 537         return 0;
 538 }
 539
 540 /*
 541  * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
 542  *
 543  * This is the core of the analysis.
 544  */
 545 static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
 546                                      int irq_thresh, int thread_thresh)
 547 {
 548         long long exp_irq_ts;
 549         int total;
 550         int irq;
 551
 552         /*
 553          * IRQ latency or Thread latency?
 554          */
 555         if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
 556                 irq = 1;
 557                 total = taa_data->tlat_irq_latency;
 558         } else {
 559                 irq = 0;
 560                 total = taa_data->tlat_thread_latency;
 561         }
 562
 563         /*
 564          * Expected IRQ arrival time using the trace clock as the base.
 565          *
 566          * TODO: Add a list of previous IRQ, and then run the list backwards.
 567          */
 568         exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
 569         if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) {
 570                 if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time)
 571                         printf("  Previous IRQ interference:    \t\t up to  %9.2f us\n",
 572                                 ns_to_usf(taa_data->prev_irq_duration));
 573         }
 574
 575         /*
 576          * The delay that the IRQ suffered before starting.
 577          */
 578         printf("  IRQ handler delay:            %16s    %9.2f us (%.2f %%)\n",
 579                 (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
 580                 ns_to_usf(taa_data->timer_irq_start_delay),
 581                 ns_to_per(total, taa_data->timer_irq_start_delay));
 582
 583         /*
 584          * Timerlat IRQ.
 585          */
 586         printf("  IRQ latency:  \t\t\t\t        %9.2f us\n",
 587                 ns_to_usf(taa_data->tlat_irq_latency));
 588
 589         if (irq) {
 590                 /*
 591                  * If the trace stopped due to IRQ, the other events will not happen
 592                  * because... the trace stopped :-).
 593                  *
 594                  * That is all folks, the stack trace was printed before the stop,
 595                  * so it will be displayed, it is the key.
 596                  */
 597                 printf("  Blocking thread:\n");
 598                 printf("        %24s:%-9llu\n",
 599                         taa_data->run_thread_comm, taa_data->run_thread_pid);
 600         } else  {
 601                 /*
 602                  * The duration of the IRQ handler that handled the timerlat IRQ.
 603                  */
 604                 printf("  Timerlat IRQ duration:        \t\t    %9.2f us (%.2f %%)\n",
 605                         ns_to_usf(taa_data->timer_irq_duration),
 606                         ns_to_per(total, taa_data->timer_irq_duration));
 607
 608                 /*
 609                  * The amount of time that the current thread postponed the scheduler.
 610                  *
 611                  * Recalling that it is net from NMI/IRQ/Softirq interference, so there
 612                  * is no need to compute values here.
 613                  */
 614                 printf("  Blocking thread:      \t\t\t  %9.2f us (%.2f %%)\n",
 615                         ns_to_usf(taa_data->thread_blocking_duration),
 616                         ns_to_per(total, taa_data->thread_blocking_duration));
 617
 618                 printf("        %24s:%-9llu             %9.2f us\n",
 619                         taa_data->run_thread_comm, taa_data->run_thread_pid,
 620                         ns_to_usf(taa_data->thread_blocking_duration));
 621         }
 622
 623         /*
 624          * Print the stack trace!
 625          */
 626         trace_seq_do_printf(taa_data->stack_seq);
 627
 628         /*
 629          * NMIs can happen during the IRQ, so they are always possible.
 630          */
 631         if (taa_data->thread_nmi_sum)
 632                 printf("  NMI interference      \t\t\t  %9.2f us (%.2f %%)\n",
 633                         ns_to_usf(taa_data->thread_nmi_sum),
 634                         ns_to_per(total, taa_data->thread_nmi_sum));
 635
 636         /*
 637          * If it is an IRQ latency, the other factors can be skipped.
 638          */
 639         if (irq)
 640                 goto print_total;
 641
 642         /*
 643          * Prints the interference caused by IRQs to the thread latency.
 644          */
 645         if (taa_data->thread_irq_sum) {
 646                 printf("  IRQ interference      \t\t\t  %9.2f us (%.2f %%)\n",
 647                         ns_to_usf(taa_data->thread_irq_sum),
 648                         ns_to_per(total, taa_data->thread_irq_sum));
 649
 650                 trace_seq_do_printf(taa_data->irqs_seq);
 651         }
 652
 653         /*
 654          * Prints the interference caused by Softirqs to the thread latency.
 655          */
 656         if (taa_data->thread_softirq_sum) {
 657                 printf("  Softirq interference  \t\t\t  %9.2f us (%.2f %%)\n",
 658                         ns_to_usf(taa_data->thread_softirq_sum),
 659                         ns_to_per(total, taa_data->thread_softirq_sum));
 660
 661                 trace_seq_do_printf(taa_data->softirqs_seq);
 662         }
 663
 664         /*
 665          * Prints the interference caused by other threads to the thread latency.
 666          *
 667          * If this happens, your timerlat is not the highest prio. OK, migration
 668          * thread can happen. But otherwise, you are not measuring the "scheduling
 669          * latency" only, and here is the difference from scheduling latency and
 670          * timer handling latency.
 671          */
 672         if (taa_data->thread_thread_sum) {
 673                 printf("  Thread interference   \t\t\t  %9.2f us (%.2f %%)\n",
 674                         ns_to_usf(taa_data->thread_thread_sum),
 675                         ns_to_per(total, taa_data->thread_thread_sum));
 676
 677                 trace_seq_do_printf(taa_data->threads_seq);
 678         }
 679
 680         /*
 681          * Done.
 682          */
 683 print_total:
 684         printf("------------------------------------------------------------------------\n");
 685         printf("  %s latency:   \t\t\t  %9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
 686                 ns_to_usf(total));
 687 }
 688
 689 static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx)
 690 {
 691         struct trace_instance *trace = &taa_ctx->tool->trace;
 692         int retval;
 693
 694         retval = tracefs_iterate_raw_events(trace->tep,
 695                                             trace->inst,
 696                                             NULL,
 697                                             0,
 698                                             collect_registered_events,
 699                                             trace);
 700                 if (retval < 0) {
 701                         err_msg("Error iterating on events\n");
 702                         return 0;
 703                 }
 704
 705         return 1;
 706 }
 707
 708 /**
 709  * timerlat_auto_analysis - Analyze the collected data
 710  */
 711 void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
 712 {
 713         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 714         unsigned long long max_exit_from_idle = 0;
 715         struct timerlat_aa_data *taa_data;
 716         int max_exit_from_idle_cpu;
 717         struct tep_handle *tep;
 718         int cpu;
 719
 720         timerlat_auto_analysis_collect_trace(taa_ctx);
 721
 722         /* bring stop tracing to the ns scale */
 723         irq_thresh = irq_thresh * 1000;
 724         thread_thresh = thread_thresh * 1000;
 725
 726         for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
 727                 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
 728
 729                 if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
 730                         printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
 731                         timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
 732                 } else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
 733                         printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
 734                         timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
 735                 }
 736
 737                 if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
 738                         max_exit_from_idle = taa_data->max_exit_idle_latency;
 739                         max_exit_from_idle_cpu = cpu;
 740                 }
 741
 742         }
 743
 744         if (max_exit_from_idle) {
 745                 printf("\n");
 746                 printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
 747                         ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
 748         }
 749         if (!taa_ctx->dump_tasks)
 750                 return;
 751
 752         printf("\n");
 753         printf("Printing CPU tasks:\n");
 754         for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
 755                 taa_data = timerlat_aa_get_data(taa_ctx, cpu);
 756                 tep = taa_ctx->tool->trace.tep;
 757
 758                 printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
 759
 760                 if (taa_data->kworker_func)
 761                         printf(" kworker:%s:%s",
 762                                 tep_find_function(tep, taa_data->kworker) ? : "<...>",
 763                                 tep_find_function(tep, taa_data->kworker_func));
 764                 printf("\n");
 765         }
 766
 767 }
 768
 769 /*
 770  * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
 771  */
 772 static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
 773 {
 774         struct timerlat_aa_data *taa_data;
 775         int i;
 776
 777         if (!taa_ctx->taa_data)
 778                 return;
 779
 780         for (i = 0; i < taa_ctx->nr_cpus; i++) {
 781                 taa_data = timerlat_aa_get_data(taa_ctx, i);
 782
 783                 if (taa_data->prev_irqs_seq) {
 784                         trace_seq_destroy(taa_data->prev_irqs_seq);
 785                         free(taa_data->prev_irqs_seq);
 786                 }
 787
 788                 if (taa_data->nmi_seq) {
 789                         trace_seq_destroy(taa_data->nmi_seq);
 790                         free(taa_data->nmi_seq);
 791                 }
 792
 793                 if (taa_data->irqs_seq) {
 794                         trace_seq_destroy(taa_data->irqs_seq);
 795                         free(taa_data->irqs_seq);
 796                 }
 797
 798                 if (taa_data->softirqs_seq) {
 799                         trace_seq_destroy(taa_data->softirqs_seq);
 800                         free(taa_data->softirqs_seq);
 801                 }
 802
 803                 if (taa_data->threads_seq) {
 804                         trace_seq_destroy(taa_data->threads_seq);
 805                         free(taa_data->threads_seq);
 806                 }
 807
 808                 if (taa_data->stack_seq) {
 809                         trace_seq_destroy(taa_data->stack_seq);
 810                         free(taa_data->stack_seq);
 811                 }
 812         }
 813 }
 814
 815 /*
 816  * timerlat_aa_init_seqs - Init seq files used to store parsed information
 817  *
 818  * Instead of keeping data structures to store raw data, use seq files to
 819  * store parsed data.
 820  *
 821  * Allocates and initialize seq files.
 822  *
 823  * Returns 0 on success, -1 otherwise.
 824  */
 825 static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
 826 {
 827         struct timerlat_aa_data *taa_data;
 828         int i;
 829
 830         for (i = 0; i < taa_ctx->nr_cpus; i++) {
 831
 832                 taa_data = timerlat_aa_get_data(taa_ctx, i);
 833
 834                 taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
 835                 if (!taa_data->prev_irqs_seq)
 836                         goto out_err;
 837
 838                 trace_seq_init(taa_data->prev_irqs_seq);
 839
 840                 taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
 841                 if (!taa_data->nmi_seq)
 842                         goto out_err;
 843
 844                 trace_seq_init(taa_data->nmi_seq);
 845
 846                 taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
 847                 if (!taa_data->irqs_seq)
 848                         goto out_err;
 849
 850                 trace_seq_init(taa_data->irqs_seq);
 851
 852                 taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
 853                 if (!taa_data->softirqs_seq)
 854                         goto out_err;
 855
 856                 trace_seq_init(taa_data->softirqs_seq);
 857
 858                 taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
 859                 if (!taa_data->threads_seq)
 860                         goto out_err;
 861
 862                 trace_seq_init(taa_data->threads_seq);
 863
 864                 taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
 865                 if (!taa_data->stack_seq)
 866                         goto out_err;
 867
 868                 trace_seq_init(taa_data->stack_seq);
 869         }
 870
 871         return 0;
 872
 873 out_err:
 874         timerlat_aa_destroy_seqs(taa_ctx);
 875         return -1;
 876 }
 877
 878 /*
 879  * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
 880  */
 881 static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
 882 {
 883
 884         tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
 885                                      timerlat_aa_handler, tool);
 886
 887         tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
 888
 889         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
 890                                      timerlat_aa_nmi_handler, tool);
 891
 892         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
 893                                      timerlat_aa_irq_handler, tool);
 894
 895         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
 896                                      timerlat_aa_softirq_handler, tool);
 897
 898         tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
 899                                      timerlat_aa_thread_handler, tool);
 900
 901         tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
 902                                      timerlat_aa_stack_handler, tool);
 903         if (!dump_tasks)
 904                 return;
 905
 906         tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
 907         tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
 908                                      timerlat_aa_sched_switch_handler, tool);
 909
 910         tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
 911         tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
 912                                      timerlat_aa_kworker_start_handler, tool);
 913 }
 914
 915 /*
 916  * timerlat_aa_register_events - Register events used in the auto-analysis
 917  *
 918  * Returns 0 on success, -1 otherwise.
 919  */
 920 static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
 921 {
 922         int retval;
 923
 924         tep_register_event_handler(tool->trace.tep, -1, "ftrace", "timerlat",
 925                                 timerlat_aa_handler, tool);
 926
 927
 928         /*
 929          * register auto-analysis handlers.
 930          */
 931         retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
 932         if (retval < 0 && !errno) {
 933                 err_msg("Could not find osnoise events\n");
 934                 goto out_err;
 935         }
 936
 937         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
 938                                    timerlat_aa_nmi_handler, tool);
 939
 940         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
 941                                    timerlat_aa_irq_handler, tool);
 942
 943         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
 944                                    timerlat_aa_softirq_handler, tool);
 945
 946         tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
 947                                    timerlat_aa_thread_handler, tool);
 948
 949         tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
 950                                    timerlat_aa_stack_handler, tool);
 951
 952         if (!dump_tasks)
 953                 return 0;
 954
 955         /*
 956          * Dump task events.
 957          */
 958         retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
 959         if (retval < 0 && !errno) {
 960                 err_msg("Could not find sched_switch\n");
 961                 goto out_err;
 962         }
 963
 964         tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
 965                                    timerlat_aa_sched_switch_handler, tool);
 966
 967         retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
 968         if (retval < 0 && !errno) {
 969                 err_msg("Could not find workqueue_execute_start\n");
 970                 goto out_err;
 971         }
 972
 973         tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
 974                                    timerlat_aa_kworker_start_handler, tool);
 975
 976         return 0;
 977
 978 out_err:
 979         timerlat_aa_unregister_events(tool, dump_tasks);
 980         return -1;
 981 }
 982
 983 /**
 984  * timerlat_aa_destroy - Destroy timerlat auto-analysis
 985  */
 986 void timerlat_aa_destroy(void)
 987 {
 988         struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
 989
 990         if (!taa_ctx)
 991                 return;
 992
 993         if (!taa_ctx->taa_data)
 994                 goto out_ctx;
 995
 996         timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
 997         timerlat_aa_destroy_seqs(taa_ctx);
 998         free(taa_ctx->taa_data);
 999 out_ctx:
1000         free(taa_ctx);
1001 }
1002
1003 /**
1004  * timerlat_aa_init - Initialize timerlat auto-analysis
1005  *
1006  * Returns 0 on success, -1 otherwise.
1007  */
1008 int timerlat_aa_init(struct osnoise_tool *tool, int dump_tasks)
1009 {
1010         int nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
1011         struct timerlat_aa_context *taa_ctx;
1012         int retval;
1013
1014         taa_ctx = calloc(1, sizeof(*taa_ctx));
1015         if (!taa_ctx)
1016                 return -1;
1017
1018         __timerlat_aa_ctx = taa_ctx;
1019
1020         taa_ctx->nr_cpus = nr_cpus;
1021         taa_ctx->tool = tool;
1022         taa_ctx->dump_tasks = dump_tasks;
1023
1024         taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
1025         if (!taa_ctx->taa_data)
1026                 goto out_err;
1027
1028         retval = timerlat_aa_init_seqs(taa_ctx);
1029         if (retval)
1030                 goto out_err;
1031
1032         retval = timerlat_aa_register_events(tool, dump_tasks);
1033         if (retval)
1034                 goto out_err;
1035
1036         return 0;
1037
1038 out_err:
1039         timerlat_aa_destroy();
1040         return -1;
1041 }