From 6ec3058e709cc63513bafe105bf48b512baabe04 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Tue, 27 Jan 2026 10:45:02 -0800 Subject: [PATCH] perf jevents: Add local/remote miss latency metrics for Intel Derive from CBOX/CHA occupancy and inserts the average latency as is provided in Intel's uncore performance monitoring reference. Signed-off-by: Ian Rogers Tested-by: Thomas Falcon Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Benjamin Gray Cc: Caleb Biggers Cc: Edward Baker Cc: Ingo Molnar Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: Leo Yan Cc: Namhyung Kim Cc: Perry Taylor Cc: Peter Zijlstra Cc: Sandipan Das Cc: Weilin Wang Cc: Xu Yang Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/intel_metrics.py | 70 ++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 3 deletions(-) diff --git a/tools/perf/pmu-events/intel_metrics.py b/tools/perf/pmu-events/intel_metrics.py index 118fe0fc05a3..037f9b2ea1b6 100755 --- a/tools/perf/pmu-events/intel_metrics.py +++ b/tools/perf/pmu-events/intel_metrics.py @@ -6,9 +6,10 @@ import math import os import re from typing import Optional -from metric import (d_ratio, has_event, max, CheckPmu, Event, JsonEncodeMetric, - JsonEncodeMetricGroupDescriptions, Literal, LoadEvents, - Metric, MetricConstraint, MetricGroup, MetricRef, Select) +from metric import (d_ratio, has_event, max, source_count, CheckPmu, Event, + JsonEncodeMetric, JsonEncodeMetricGroupDescriptions, + Literal, LoadEvents, Metric, MetricConstraint, MetricGroup, + MetricRef, Select) # Global command line arguments. _args = None @@ -624,6 +625,68 @@ def IntelL2() -> Optional[MetricGroup]: ], description="L2 data cache analysis") +def IntelMissLat() -> Optional[MetricGroup]: + try: + ticks = Event("UNC_CHA_CLOCKTICKS", "UNC_C_CLOCKTICKS") + data_rd_loc_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL", + "UNC_CHA_TOR_OCCUPANCY.IA_MISS", + "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE", + "UNC_C_TOR_OCCUPANCY.MISS_OPCODE") + data_rd_loc_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_LOCAL", + "UNC_CHA_TOR_INSERTS.IA_MISS", + "UNC_C_TOR_INSERTS.MISS_LOCAL_OPCODE", + "UNC_C_TOR_INSERTS.MISS_OPCODE") + data_rd_rem_occ = Event("UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_REMOTE", + "UNC_CHA_TOR_OCCUPANCY.IA_MISS", + "UNC_C_TOR_OCCUPANCY.MISS_REMOTE_OPCODE", + "UNC_C_TOR_OCCUPANCY.NID_MISS_OPCODE") + data_rd_rem_ins = Event("UNC_CHA_TOR_INSERTS.IA_MISS_DRD_REMOTE", + "UNC_CHA_TOR_INSERTS.IA_MISS", + "UNC_C_TOR_INSERTS.MISS_REMOTE_OPCODE", + "UNC_C_TOR_INSERTS.NID_MISS_OPCODE") + except: + return None + + if (data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_LOCAL_OPCODE" or + data_rd_loc_occ.name == "UNC_C_TOR_OCCUPANCY.MISS_OPCODE"): + data_rd = 0x182 + for e in [data_rd_loc_occ, data_rd_loc_ins, data_rd_rem_occ, data_rd_rem_ins]: + e.name += f"/filter_opc={hex(data_rd)}/" + elif data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS": + # Demand Data Read - Full cache-line read requests from core for + # lines to be cached in S or E, typically for data + demand_data_rd = 0x202 + # LLC Prefetch Data - Uncore will first look up the line in the + # LLC; for a cache hit, the LRU will be updated, on a miss, the + # DRd will be initiated + llc_prefetch_data = 0x25a + local_filter = (f"/filter_opc0={hex(demand_data_rd)}," + f"filter_opc1={hex(llc_prefetch_data)}," + "filter_loc,filter_nm,filter_not_nm/") + remote_filter = (f"/filter_opc0={hex(demand_data_rd)}," + f"filter_opc1={hex(llc_prefetch_data)}," + "filter_rem,filter_nm,filter_not_nm/") + for e in [data_rd_loc_occ, data_rd_loc_ins]: + e.name += local_filter + for e in [data_rd_rem_occ, data_rd_rem_ins]: + e.name += remote_filter + else: + assert data_rd_loc_occ.name == "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_LOCAL", data_rd_loc_occ + + ticks_per_cha = ticks / source_count(data_rd_loc_ins) + loc_lat = interval_sec * 1e9 * data_rd_loc_occ / \ + (ticks_per_cha * data_rd_loc_ins) + ticks_per_cha = ticks / source_count(data_rd_rem_ins) + rem_lat = interval_sec * 1e9 * data_rd_rem_occ / \ + (ticks_per_cha * data_rd_rem_ins) + return MetricGroup("lpm_miss_lat", [ + Metric("lpm_miss_lat_loc", "Local to a socket miss latency in nanoseconds", + loc_lat, "ns"), + Metric("lpm_miss_lat_rem", "Remote to a socket miss latency in nanoseconds", + rem_lat, "ns"), + ]) + + def IntelMlp() -> Optional[Metric]: try: l1d = Event("L1D_PEND_MISS.PENDING") @@ -1005,6 +1068,7 @@ def main() -> None: IntelIlp(), IntelL2(), IntelLdSt(), + IntelMissLat(), IntelMlp(), IntelPorts(), IntelSwpf(), -- 2.47.3