*/
+union omr_encoding {
+ struct {
+ u8 omr_source : 4;
+ u8 omr_remote : 1;
+ u8 omr_hitm : 1;
+ u8 omr_snoop : 1;
+ u8 omr_promoted : 1;
+ };
+ u8 omr_full;
+};
+
union intel_x86_pebs_dse {
u64 val;
struct {
unsigned int lnc_addr_blk:1;
unsigned int ld_reserved6:18;
};
+ struct {
+ unsigned int pnc_dse: 8;
+ unsigned int pnc_l2_miss:1;
+ unsigned int pnc_stlb_clean_hit:1;
+ unsigned int pnc_stlb_any_hit:1;
+ unsigned int pnc_stlb_miss:1;
+ unsigned int pnc_locked:1;
+ unsigned int pnc_data_blk:1;
+ unsigned int pnc_addr_blk:1;
+ unsigned int pnc_fb_full:1;
+ unsigned int ld_reserved8:16;
+ };
};
__intel_pmu_pebs_data_source_cmt(data_source);
}
+/* Version for Panthercove and later */
+
+/* L2 hit */
+#define PNC_PEBS_DATA_SOURCE_MAX 16
+static u64 pnc_pebs_l2_hit_data_source[PNC_PEBS_DATA_SOURCE_MAX] = {
+ P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA), /* 0x00: non-cache access */
+ OP_LH | LEVEL(L0) | P(SNOOP, NONE), /* 0x01: L0 hit */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x02: L1 hit */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x03: L1 Miss Handling Buffer hit */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x04: L2 Hit Clean */
+ 0, /* 0x05: Reserved */
+ 0, /* 0x06: Reserved */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT), /* 0x07: L2 Hit Snoop HIT */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM), /* 0x08: L2 Hit Snoop Hit Modified */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, MISS), /* 0x09: Prefetch Promotion */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, MISS), /* 0x0a: Cross Core Prefetch Promotion */
+ 0, /* 0x0b: Reserved */
+ 0, /* 0x0c: Reserved */
+ 0, /* 0x0d: Reserved */
+ 0, /* 0x0e: Reserved */
+ OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */
+};
+
+/* L2 miss */
+#define OMR_DATA_SOURCE_MAX 16
+static u64 omr_data_source[OMR_DATA_SOURCE_MAX] = {
+ P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA), /* 0x00: invalid */
+ 0, /* 0x01: Reserved */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, L_SHARE), /* 0x02: local CA shared cache */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, L_NON_SHARE),/* 0x03: local CA non-shared cache */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_IO), /* 0x04: other CA IO agent */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_SHARE), /* 0x05: other CA shared cache */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_NON_SHARE),/* 0x06: other CA non-shared cache */
+ OP_LH | LEVEL(RAM) | P(REGION, MMIO), /* 0x07: MMIO */
+ OP_LH | LEVEL(RAM) | P(REGION, MEM0), /* 0x08: Memory region 0 */
+ OP_LH | LEVEL(RAM) | P(REGION, MEM1), /* 0x09: Memory region 1 */
+ OP_LH | LEVEL(RAM) | P(REGION, MEM2), /* 0x0a: Memory region 2 */
+ OP_LH | LEVEL(RAM) | P(REGION, MEM3), /* 0x0b: Memory region 3 */
+ OP_LH | LEVEL(RAM) | P(REGION, MEM4), /* 0x0c: Memory region 4 */
+ OP_LH | LEVEL(RAM) | P(REGION, MEM5), /* 0x0d: Memory region 5 */
+ OP_LH | LEVEL(RAM) | P(REGION, MEM6), /* 0x0e: Memory region 6 */
+ OP_LH | LEVEL(RAM) | P(REGION, MEM7), /* 0x0f: Memory region 7 */
+};
+
+static u64 parse_omr_data_source(u8 dse)
+{
+ union omr_encoding omr;
+ u64 val = 0;
+
+ omr.omr_full = dse;
+ val = omr_data_source[omr.omr_source];
+ if (omr.omr_source > 0x1 && omr.omr_source < 0x7)
+ val |= omr.omr_remote ? P(LVL, REM_CCE1) : 0;
+ else if (omr.omr_source > 0x7)
+ val |= omr.omr_remote ? P(LVL, REM_RAM1) : P(LVL, LOC_RAM);
+
+ if (omr.omr_remote)
+ val |= REM;
+
+ val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT);
+
+ if (omr.omr_source == 0x2) {
+ u8 snoop = omr.omr_snoop | omr.omr_promoted;
+
+ if (snoop == 0x0)
+ val |= P(SNOOP, NA);
+ else if (snoop == 0x1)
+ val |= P(SNOOP, MISS);
+ else if (snoop == 0x2)
+ val |= P(SNOOP, HIT);
+ else if (snoop == 0x3)
+ val |= P(SNOOP, NONE);
+ } else if (omr.omr_source > 0x2 && omr.omr_source < 0x7) {
+ val |= omr.omr_snoop ? P(SNOOPX, FWD) : 0;
+ }
+
+ return val;
+}
+
static u64 precise_store_data(u64 status)
{
union intel_x86_pebs_dse dse;
return lnl_latency_data(event, status);
}
+u64 pnc_latency_data(struct perf_event *event, u64 status)
+{
+ union intel_x86_pebs_dse dse;
+ union perf_mem_data_src src;
+ u64 val;
+
+ dse.val = status;
+
+ if (!dse.pnc_l2_miss)
+ val = pnc_pebs_l2_hit_data_source[dse.pnc_dse & 0xf];
+ else
+ val = parse_omr_data_source(dse.pnc_dse);
+
+ if (!val)
+ val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
+
+ if (dse.pnc_stlb_miss)
+ val |= P(TLB, MISS) | P(TLB, L2);
+ else
+ val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+ if (dse.pnc_locked)
+ val |= P(LOCK, LOCKED);
+
+ if (dse.pnc_data_blk)
+ val |= P(BLK, DATA);
+ if (dse.pnc_addr_blk)
+ val |= P(BLK, ADDR);
+ if (!dse.pnc_data_blk && !dse.pnc_addr_blk)
+ val |= P(BLK, NA);
+
+ src.val = val;
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+ src.mem_op = P(OP, STORE);
+
+ return src.val;
+}
+
static u64 load_latency_data(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
mem_snoopx : 2, /* Snoop mode, ext */
mem_blk : 3, /* Access blocked */
mem_hops : 3, /* Hop level */
- mem_rsvd : 18;
+ mem_region : 5, /* cache/memory regions */
+ mem_rsvd : 13;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd : 18,
+ __u64 mem_rsvd : 13,
+ mem_region : 5, /* cache/memory regions */
mem_hops : 3, /* Hop level */
mem_blk : 3, /* Access blocked */
mem_snoopx : 2, /* Snoop mode, ext */
#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */
#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */
#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */
-/* 0x007 available */
+#define PERF_MEM_LVLNUM_L0 0x0007 /* L0 */
#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */
#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */
#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */
/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
+/* Cache/Memory region */
+#define PERF_MEM_REGION_NA 0x0 /* Invalid */
+#define PERF_MEM_REGION_RSVD 0x01 /* Reserved */
+#define PERF_MEM_REGION_L_SHARE 0x02 /* Local CA shared cache */
+#define PERF_MEM_REGION_L_NON_SHARE 0x03 /* Local CA non-shared cache */
+#define PERF_MEM_REGION_O_IO 0x04 /* Other CA IO agent */
+#define PERF_MEM_REGION_O_SHARE 0x05 /* Other CA shared cache */
+#define PERF_MEM_REGION_O_NON_SHARE 0x06 /* Other CA non-shared cache */
+#define PERF_MEM_REGION_MMIO 0x07 /* MMIO */
+#define PERF_MEM_REGION_MEM0 0x08 /* Memory region 0 */
+#define PERF_MEM_REGION_MEM1 0x09 /* Memory region 1 */
+#define PERF_MEM_REGION_MEM2 0x0a /* Memory region 2 */
+#define PERF_MEM_REGION_MEM3 0x0b /* Memory region 3 */
+#define PERF_MEM_REGION_MEM4 0x0c /* Memory region 4 */
+#define PERF_MEM_REGION_MEM5 0x0d /* Memory region 5 */
+#define PERF_MEM_REGION_MEM6 0x0e /* Memory region 6 */
+#define PERF_MEM_REGION_MEM7 0x0f /* Memory region 7 */
+#define PERF_MEM_REGION_SHIFT 46
+
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
mem_snoopx : 2, /* Snoop mode, ext */
mem_blk : 3, /* Access blocked */
mem_hops : 3, /* Hop level */
- mem_rsvd : 18;
+ mem_region : 5, /* cache/memory regions */
+ mem_rsvd : 13;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd : 18,
+ __u64 mem_rsvd : 13,
+ mem_region : 5, /* cache/memory regions */
mem_hops : 3, /* Hop level */
mem_blk : 3, /* Access blocked */
mem_snoopx : 2, /* Snoop mode, ext */
#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */
#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */
#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */
-/* 0x007 available */
+#define PERF_MEM_LVLNUM_L0 0x0007 /* L0 */
#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */
#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */
#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */
/* 5-7 available */
#define PERF_MEM_HOPS_SHIFT 43
+/* Cache/Memory region */
+#define PERF_MEM_REGION_NA 0x0 /* Invalid */
+#define PERF_MEM_REGION_RSVD 0x01 /* Reserved */
+#define PERF_MEM_REGION_L_SHARE 0x02 /* Local CA shared cache */
+#define PERF_MEM_REGION_L_NON_SHARE 0x03 /* Local CA non-shared cache */
+#define PERF_MEM_REGION_O_IO 0x04 /* Other CA IO agent */
+#define PERF_MEM_REGION_O_SHARE 0x05 /* Other CA shared cache */
+#define PERF_MEM_REGION_O_NON_SHARE 0x06 /* Other CA non-shared cache */
+#define PERF_MEM_REGION_MMIO 0x07 /* MMIO */
+#define PERF_MEM_REGION_MEM0 0x08 /* Memory region 0 */
+#define PERF_MEM_REGION_MEM1 0x09 /* Memory region 1 */
+#define PERF_MEM_REGION_MEM2 0x0a /* Memory region 2 */
+#define PERF_MEM_REGION_MEM3 0x0b /* Memory region 3 */
+#define PERF_MEM_REGION_MEM4 0x0c /* Memory region 4 */
+#define PERF_MEM_REGION_MEM5 0x0d /* Memory region 5 */
+#define PERF_MEM_REGION_MEM6 0x0e /* Memory region 6 */
+#define PERF_MEM_REGION_MEM7 0x0f /* Memory region 7 */
+#define PERF_MEM_REGION_SHIFT 46
+
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)