]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
perf stat: Add command line option for enabling TPEBS recording
authorWeilin Wang <weilin.wang@intel.com>
Sat, 20 Jul 2024 06:20:59 +0000 (02:20 -0400)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 13 Aug 2024 18:25:32 +0000 (15:25 -0300)
With this command line option, TPEBS recording is turned off in 'perf
stat' on default. It will only be turned on when this option is given in
'perf stat' command.

Example with --record-tpebs:

  perf stat -M tma_split_loads -C1-4 --record-tpebs sleep 1

  [ perf record: Woken up 2 times to write data ]
  [ perf record: Captured and wrote 0.044 MB - ]

   Performance counter stats for 'CPU(s) 1-4':

      53,259,156,071      cpu_core/TOPDOWN.SLOTS/          #      1.6 %  tma_split_loads   (50.00%)
      15,867,565,250      cpu_core/topdown-retiring/                                       (50.00%)
      15,655,580,731      cpu_core/topdown-mem-bound/                                      (50.00%)
      11,738,022,218      cpu_core/topdown-bad-spec/                                       (50.00%)
       6,151,265,424      cpu_core/topdown-fe-bound/                                       (50.00%)
      20,445,917,581      cpu_core/topdown-be-bound/                                       (50.00%)
       6,925,098,013      cpu_core/L1D_PEND_MISS.PENDING/                                  (50.00%)
       3,838,653,421      cpu_core/MEMORY_ACTIVITY.STALLS_L1D_MISS/                        (50.00%)
       4,797,059,783      cpu_core/EXE_ACTIVITY.BOUND_ON_LOADS/                            (50.00%)
      11,931,916,714      cpu_core/CPU_CLK_UNHALTED.THREAD/                                (50.00%)
         102,576,164      cpu_core/MEM_LOAD_COMPLETED.L1_MISS_ANY/                         (50.00%)
          64,071,854      cpu_core/MEM_INST_RETIRED.SPLIT_LOADS/                           (50.00%)
                   3      cpu_core/MEM_INST_RETIRED.SPLIT_LOADS/R

         1.003049679 seconds time elapsed

Example without --record-tpebs:

  perf stat -M tma_contested_accesses -C1 sleep 1

   Performance counter stats for 'CPU(s) 1':

          50,203,891      cpu_core/TOPDOWN.SLOTS/          #      0.0 %  tma_contested_accesses   (63.60%)
          10,040,777      cpu_core/topdown-retiring/                                              (63.60%)
           6,890,729      cpu_core/topdown-mem-bound/                                             (63.60%)
           2,756,463      cpu_core/topdown-bad-spec/                                              (63.60%)
          10,828,288      cpu_core/topdown-fe-bound/                                              (63.60%)
          28,350,432      cpu_core/topdown-be-bound/                                              (63.60%)
                  98      cpu_core/OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM/                          (63.70%)
             577,520      cpu_core/MEMORY_ACTIVITY.STALLS_L2_MISS/                                (54.62%)
             313,339      cpu_core/MEMORY_ACTIVITY.STALLS_L3_MISS/                                (54.62%)
              14,155      cpu_core/MEM_LOAD_RETIRED.L1_MISS/                                      (45.54%)
                   0      cpu_core/OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD/                  (36.30%)
           8,468,077      cpu_core/CPU_CLK_UNHALTED.THREAD/                                       (45.38%)
                 198      cpu_core/MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS/                             (45.38%)
               8,324      cpu_core/MEM_LOAD_RETIRED.FB_HIT/                                       (45.38%)
       3,388,031,520      TSC
          23,226,785      cpu_core/CPU_CLK_UNHALTED.REF_TSC/                                      (54.46%)
                  80      cpu_core/MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD/                              (54.46%)
                   0      cpu_core/MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD/R
                   0      cpu_core/MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS/R
       1,006,816,667 ns   duration_time

         1.002537737 seconds time elapsed

Reviewed-by: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Weilin Wang <weilin.wang@intel.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Samantha Alt <samantha.alt@intel.com>
Link: https://lore.kernel.org/r/20240720062102.444578-7-weilin.wang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-stat.txt
tools/perf/builtin-stat.c

index 29756a87ab6fa696d0966a965f1d4bf36bccde50..2bc06367248691dd1320d5bbd1c433f1a8a7ddfe 100644 (file)
@@ -498,6 +498,14 @@ To interpret the results it is usually needed to know on which
 CPUs the workload runs on. If needed the CPUs can be forced using
 taskset.
 
+--record-tpebs::
+Enable automatic sampling on Intel TPEBS retire_latency events (event with :R
+modifier). Without this option, perf would not capture dynamic retire_latency
+at runtime. Currently, a zero value is assigned to the retire_latency event when
+this option is not set. The TPEBS hardware feature starts from Intel Granite
+Rapids microarchitecture. This option only exists in X86_64 and is meaningful on
+Intel platforms with TPEBS feature.
+
 --td-level::
 Print the top-down statistics that equal the input level. It allows
 users to print the interested top-down metrics level instead of the
index 6162b98fc9414f1d55b99623d16cfcf96923d74e..cf985cdb9a6ee58802c3d7c31fbbbdbf0376acc3 100644 (file)
@@ -2475,6 +2475,10 @@ int cmd_stat(int argc, const char **argv)
                        "disable adding events for the metric threshold calculation"),
                OPT_BOOLEAN(0, "topdown", &topdown_run,
                        "measure top-down statistics"),
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+               OPT_BOOLEAN(0, "record-tpebs", &tpebs_recording,
+                       "enable recording for tpebs when retire_latency required"),
+#endif
                OPT_UINTEGER(0, "td-level", &stat_config.topdown_level,
                        "Set the metrics level for the top-down statistics (0: max level)"),
                OPT_BOOLEAN(0, "smi-cost", &smi_cost,