]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
perf vendor events amd: Fix Zen 4 cache latency events
authorSandipan Das <sandipan.das@amd.com>
Fri, 1 Mar 2024 08:44:31 +0000 (14:14 +0530)
committerSasha Levin <sashal@kernel.org>
Tue, 26 Mar 2024 22:17:16 +0000 (18:17 -0400)
[ Upstream commit 498d3486376befe4e82b5334d44bbc86b1982ee4 ]

L3PMCx0AC and L3PMCx0AD, used in l3_xi_sampled_latency* events, have a
quirk that requires them to be programmed with SliceId set to 0x3.
Without this, the events do not count at all and affects dependent
metrics such as l3_read_miss_latency.

If ThreadMask is not specified, the amd-uncore driver internally sets
ThreadMask to 0x3, EnAllCores to 0x1 and EnAllSlices to 0x1 but does
not set SliceId. Since SliceId must also be set to 0x3 in this case,
specify all the other fields explicitly.

E.g.

  $ sudo perf stat -e l3_xi_sampled_latency.all,l3_xi_sampled_latency_requests.all -a sleep 1

Before:

   Performance counter stats for 'system wide':

                   0      l3_xi_sampled_latency.all
                   0      l3_xi_sampled_latency_requests.all

         1.005155399 seconds time elapsed

After:

   Performance counter stats for 'system wide':

             921,446      l3_xi_sampled_latency.all
              54,210      l3_xi_sampled_latency_requests.all

         1.005664472 seconds time elapsed

Fixes: 5b2ca349c313 ("perf vendor events amd: Add Zen 4 uncore events")
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: ananth.narayan@amd.com
Cc: ravi.bangoria@amd.com
Cc: eranian@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240301084431.646221-1-sandipan.das@amd.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
tools/perf/pmu-events/arch/x86/amdzen4/cache.json
tools/perf/pmu-events/jevents.py

index ecbe9660b2b31b72760609bcb771fd6e8475c856..e6d710cf3ce29849957c26bba5b322c52478c102 100644 (file)
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency when data is sourced from DRAM in the same NUMA node.",
     "UMask": "0x01",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency when data is sourced from DRAM in a different NUMA node.",
     "UMask": "0x02",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency when data is sourced from another CCX's cache when the address was in the same NUMA node.",
     "UMask": "0x04",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency when data is sourced from another CCX's cache when the address was in a different NUMA node.",
     "UMask": "0x08",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency when data is sourced from extension memory (CXL) in the same NUMA node.",
     "UMask": "0x10",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency when data is sourced from extension memory (CXL) in a different NUMA node.",
     "UMask": "0x20",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xac",
     "BriefDescription": "Average sampled latency from all data sources.",
     "UMask": "0x3f",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from DRAM in the same NUMA node.",
     "UMask": "0x01",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from DRAM in a different NUMA node.",
     "UMask": "0x02",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from another CCX's cache when the address was in the same NUMA node.",
     "UMask": "0x04",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from another CCX's cache when the address was in a different NUMA node.",
     "UMask": "0x08",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from extension memory (CXL) in the same NUMA node.",
     "UMask": "0x10",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from extension memory (CXL) in a different NUMA node.",
     "UMask": "0x20",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   },
   {
     "EventCode": "0xad",
     "BriefDescription": "L3 cache fill requests sourced from all data sources.",
     "UMask": "0x3f",
+    "EnAllCores": "0x1",
+    "EnAllSlices": "0x1",
+    "SliceId": "0x3",
+    "ThreadMask": "0x3",
     "Unit": "L3PMC"
   }
 ]
index 53ab050c8fa436f584867c707a91a1ae985aa567..ce846f29c08d640f810d64af0466744b2db11053 100755 (executable)
@@ -356,6 +356,10 @@ class JsonEvent:
         ('UMask', 'umask='),
         ('NodeType', 'type='),
         ('RdWrMask', 'rdwrmask='),
+        ('EnAllCores', 'enallcores='),
+        ('EnAllSlices', 'enallslices='),
+        ('SliceId', 'sliceid='),
+        ('ThreadMask', 'threadmask='),
     ]
     for key, value in event_fields:
       if key in jd and jd[key] != '0':