]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf mem: Add 'dtlb' output field
authorNamhyung Kim <namhyung@kernel.org>
Wed, 30 Apr 2025 20:55:48 +0000 (13:55 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 2 May 2025 18:36:14 +0000 (15:36 -0300)
This is a breakdown of perf_mem_data_src.mem_dtlb values.  It assumes
PMU drivers would set PERF_MEM_TLB_HIT bit with an appropriate level.

And having PERF_MEM_TLB_MISS means that it failed to find one in any
levels of TLB.  For now, it doesn't use PERF_MEM_TLB_{WK,OS} bits.

Also it seems Intel machines don't distinguish L1 or L2 precisely.  So I
added ANY_HIT (printed as "L?-Hit") to handle the case.

  $ perf mem report -F overhead,dtlb,dso --stdio
  ...
  #           --- D-TLB ----
  # Overhead   L?-Hit   Miss  Shared Object
  # ........  ..............  .................
  #
      67.03%    99.5%   0.5%  [unknown]
      31.23%    99.2%   0.8%  [kernel.kallsyms]
       1.08%    97.8%   2.2%  [i915]
       0.36%   100.0%   0.0%  [JIT] tid 6853
       0.12%   100.0%   0.0%  [drm]
       0.05%   100.0%   0.0%  [drm_kms_helper]
       0.05%   100.0%   0.0%  [ext4]
       0.02%   100.0%   0.0%  [aesni_intel]
       0.02%   100.0%   0.0%  [crc32c_intel]
       0.02%   100.0%   0.0%  [dm_crypt]
       ...

Committer testing:

  # perf report --header | grep cpudesc
  # cpudesc : AMD Ryzen 9 9950X3D 16-Core Processor
  # perf mem report -F overhead,dtlb,dso --stdio | head -20
  # To display the perf.data header info, please use --header/--header-only options.
  #
  #
  # Total Lost Samples: 0
  #
  # Samples: 2K of event 'cycles:P'
  # Total weight : 2637
  # Sort order   : local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,local_p_stage_cyc
  #
  #           ---------- D-TLB -----------
  # Overhead   L1-Hit L2-Hit   Miss  Other  Shared Object
  # ........  ............................  .................................
  #
      77.47%    18.4%   0.1%   0.6%  80.9%  [kernel.kallsyms]
       5.61%    36.5%   0.7%   1.4%  61.5%  libxul.so
       2.77%    39.7%   0.0%  12.3%  47.9%  libc.so.6
       2.01%    34.0%   1.9%   1.9%  62.3%  libglib-2.0.so.0.8400.1
       1.93%    31.4%   2.0%   2.0%  64.7%  [amdgpu]
       1.63%    48.8%   0.0%   0.0%  51.2%  [JIT] tid 60168
       1.14%     3.3%   0.0%   0.0%  96.7%  [vdso]
  #

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20250430205548.789750-12-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/ui/browsers/hists.c
tools/perf/ui/hist.c
tools/perf/util/hist.h
tools/perf/util/mem-events.c
tools/perf/util/mem-events.h
tools/perf/util/sort.c

index 5b080f506244024608cd248bbba3dcdef73ddf5e..d26b925e3d7f46afad89b08ebb38a6cae34e9479 100644 (file)
@@ -1288,6 +1288,7 @@ __HPP_COLOR_MEM_STAT_FN(op, OP)
 __HPP_COLOR_MEM_STAT_FN(cache, CACHE)
 __HPP_COLOR_MEM_STAT_FN(memory, MEMORY)
 __HPP_COLOR_MEM_STAT_FN(snoop, SNOOP)
+__HPP_COLOR_MEM_STAT_FN(dtlb, DTLB)
 
 #undef __HPP_COLOR_PERCENT_FN
 #undef __HPP_COLOR_ACC_PERCENT_FN
@@ -1319,6 +1320,8 @@ void hist_browser__init_hpp(void)
                                hist_browser__hpp_color_mem_stat_memory;
        perf_hpp__format[PERF_HPP__MEM_STAT_SNOOP].color =
                                hist_browser__hpp_color_mem_stat_snoop;
+       perf_hpp__format[PERF_HPP__MEM_STAT_DTLB].color =
+                               hist_browser__hpp_color_mem_stat_dtlb;
 
        res_sample_init();
 }
index 94024dfa8dccf9ba5e51c0dcfb6ea126348e4d6b..ed5c40ebd906f0768a18bc3422ecf437e36bb69e 100644 (file)
@@ -354,6 +354,8 @@ static enum mem_stat_type hpp__mem_stat_type(struct perf_hpp_fmt *fmt)
                return PERF_MEM_STAT_MEMORY;
        case PERF_HPP__MEM_STAT_SNOOP:
                return PERF_MEM_STAT_SNOOP;
+       case PERF_HPP__MEM_STAT_DTLB:
+               return PERF_MEM_STAT_DTLB;
        default:
                break;
        }
@@ -653,6 +655,7 @@ HPP_MEM_STAT_FNS(op, OP)
 HPP_MEM_STAT_FNS(cache, CACHE)
 HPP_MEM_STAT_FNS(memory, MEMORY)
 HPP_MEM_STAT_FNS(snoop, SNOOP)
+HPP_MEM_STAT_FNS(dtlb, DTLB)
 
 static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
                            struct hist_entry *a __maybe_unused,
@@ -760,6 +763,7 @@ struct perf_hpp_fmt perf_hpp__format[] = {
        HPP__MEM_STAT_PRINT_FNS("Cache", cache, CACHE),
        HPP__MEM_STAT_PRINT_FNS("Memory", memory, MEMORY),
        HPP__MEM_STAT_PRINT_FNS("Snoop", snoop, SNOOP),
+       HPP__MEM_STAT_PRINT_FNS("D-TLB", dtlb, DTLB),
 };
 
 struct perf_hpp_list perf_hpp_list = {
@@ -1118,6 +1122,7 @@ void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
        case PERF_HPP__MEM_STAT_CACHE:
        case PERF_HPP__MEM_STAT_MEMORY:
        case PERF_HPP__MEM_STAT_SNOOP:
+       case PERF_HPP__MEM_STAT_DTLB:
                fmt->len = MEM_STAT_LEN * MEM_STAT_PRINT_LEN;
                break;
 
index c2d286c4ba395674927458bb738fcec20ac057c2..355198fd70281f4364c69d56386476c0a0130c12 100644 (file)
@@ -592,6 +592,7 @@ enum {
        PERF_HPP__MEM_STAT_CACHE,
        PERF_HPP__MEM_STAT_MEMORY,
        PERF_HPP__MEM_STAT_SNOOP,
+       PERF_HPP__MEM_STAT_DTLB,
 
        PERF_HPP__MAX_INDEX
 };
index ddcfc6500d77a9e60a62c2bed0b2293520644390..3e9131e05348a9968fb7f51574edb609114528c1 100644 (file)
@@ -868,6 +868,19 @@ int mem_stat_index(const enum mem_stat_type mst, const u64 val)
                default:
                        return MEM_STAT_SNOOP_OTHER;
                }
+       case PERF_MEM_STAT_DTLB:
+               switch (src.mem_dtlb) {
+               case PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT:
+                       return MEM_STAT_DTLB_L1_HIT;
+               case PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT:
+                       return MEM_STAT_DTLB_L2_HIT;
+               case PERF_MEM_TLB_L1 | PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT:
+                       return MEM_STAT_DTLB_ANY_HIT;
+               default:
+                       if (src.mem_dtlb & PERF_MEM_TLB_MISS)
+                               return MEM_STAT_DTLB_MISS;
+                       return MEM_STAT_DTLB_OTHER;
+               }
        default:
                break;
        }
@@ -942,6 +955,20 @@ const char *mem_stat_name(const enum mem_stat_type mst, const int idx)
                default:
                        return "Other";
                }
+       case PERF_MEM_STAT_DTLB:
+               switch (idx) {
+               case MEM_STAT_DTLB_L1_HIT:
+                       return "L1-Hit";
+               case MEM_STAT_DTLB_L2_HIT:
+                       return "L2-Hit";
+               case MEM_STAT_DTLB_ANY_HIT:
+                       return "L?-Hit";
+               case MEM_STAT_DTLB_MISS:
+                       return "Miss";
+               case MEM_STAT_DTLB_OTHER:
+               default:
+                       return "Other";
+               }
        default:
                break;
        }
index 4d8f18583af425501e202f2a9b0d8f3718553522..5b98076904b0b6892557f1aac2c1658601edea46 100644 (file)
@@ -94,6 +94,7 @@ enum mem_stat_type {
        PERF_MEM_STAT_CACHE,
        PERF_MEM_STAT_MEMORY,
        PERF_MEM_STAT_SNOOP,
+       PERF_MEM_STAT_DTLB,
 };
 
 #define MEM_STAT_PRINT_LEN  7  /* 1 space + 5 digits + 1 percent sign */
@@ -134,6 +135,14 @@ enum mem_stat_snoop {
        MEM_STAT_SNOOP_OTHER,
 };
 
+enum mem_stat_dtlb {
+       MEM_STAT_DTLB_L1_HIT,
+       MEM_STAT_DTLB_L2_HIT,
+       MEM_STAT_DTLB_ANY_HIT,
+       MEM_STAT_DTLB_MISS,
+       MEM_STAT_DTLB_OTHER,
+};
+
 int mem_stat_index(const enum mem_stat_type mst, const u64 data_src);
 const char *mem_stat_name(const enum mem_stat_type mst, const int idx);
 
index 51a210d874327d3a07e6a4c2c54c9d4b04dcd3ad..8efafa7c10822ee9c2d541ddd1e2ff8e891ff460 100644 (file)
@@ -2627,6 +2627,7 @@ static struct hpp_dimension hpp_sort_dimensions[] = {
        DIM_MEM(PERF_HPP__MEM_STAT_CACHE, "cache"),
        DIM_MEM(PERF_HPP__MEM_STAT_MEMORY, "memory"),
        DIM_MEM(PERF_HPP__MEM_STAT_SNOOP, "snoop"),
+       DIM_MEM(PERF_HPP__MEM_STAT_DTLB, "dtlb"),
 };
 
 #undef DIM_MEM