-l::
--ldlat::
- Configure mem-loads latency. Supported on Intel and Arm64 processors
- only. Ignored on other archs.
+ Configure mem-loads latency. Supported on Intel, Arm64 and some AMD
+ processors. Ignored on other archs.
+
+ On supported AMD processors:
+ - /sys/bus/event_source/devices/ibs_op/caps/ldlat file contains '1'.
+ - Supported latency values are 128 to 2048 (both inclusive).
+ - Latency value which is a multiple of 128 incurs a little less profiling
+ overhead compared to other values.
+ - Load latency filtering is disabled by default.
-k::
--all-kernel::
Due to the statistical nature of SPE sampling, not every memory operation will
be sampled.
+On AMD this use IBS Op PMU to sample load-store operations.
+
COMMON OPTIONS
--------------
-f::
Configure all used events to run in user space.
--ldlat <n>::
- Specify desired latency for loads event. Supported on Intel and Arm64
- processors only. Ignored on other archs.
+ Specify desired latency for loads event. Supported on Intel, Arm64 and
+ some AMD processors. Ignored on other archs.
+
+ On supported AMD processors:
+ - /sys/bus/event_source/devices/ibs_op/caps/ldlat file contains '1'.
+ - Supported latency values are 128 to 2048 (both inclusive).
+ - Latency value which is a multiple of 128 incurs a little less profiling
+ overhead compared to other values.
+ - Load latency filtering is disabled by default.
REPORT OPTIONS
--------------
E(NULL, NULL, NULL, false, 0),
E("mem-ldst", "%s//", NULL, false, 0),
};
+
+struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX] = {
+ E(NULL, NULL, NULL, false, 0),
+ E(NULL, NULL, NULL, false, 0),
+ E("mem-ldst", "%s/ldlat=%u/", NULL, true, 0),
+};
extern struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX];
extern struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX];
+extern struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX];
#endif /* _X86_MEM_EVENTS_H */
#include "mem-events.h"
#include "util/env.h"
-void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
+void perf_pmu__arch_init(struct perf_pmu *pmu)
{
+ struct perf_pmu_caps *ldlat_cap;
+
#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
pmu->auxtrace = true;
#endif
if (x86__is_amd_cpu()) {
- if (!strcmp(pmu->name, "ibs_op"))
- pmu->mem_events = perf_mem_events_amd;
+ if (strcmp(pmu->name, "ibs_op"))
+ return;
+
+ pmu->mem_events = perf_mem_events_amd;
+
+ if (!perf_pmu__caps_parse(pmu))
+ return;
+
+ ldlat_cap = perf_pmu__get_cap(pmu, "ldlat");
+ if (!ldlat_cap || strcmp(ldlat_cap->value, "1"))
+ return;
+
+ perf_mem_events__loads_ldlat = 0;
+ pmu->mem_events = perf_mem_events_amd_ldlat;
} else if (pmu->is_core) {
if (perf_pmu__have_event(pmu, "mem-loads-aux"))
pmu->mem_events = perf_mem_events_intel_aux;
echo "Recording workload..."
-# perf mem/c2c internally uses IBS PMU on AMD CPU which doesn't support
-# user/kernel filtering and per-process monitoring, spin program on
-# specific CPU and test in per-CPU mode.
is_amd=$(grep -E -c 'vendor_id.*AuthenticAMD' /proc/cpuinfo)
if (($is_amd >= 1)); then
+ mem_events="$(perf mem record -v -e list 2>&1)"
+ if ! [[ "$mem_events" =~ ^mem\-ldst.*ibs_op/(.*)/.*available ]]; then
+ echo "ERROR: mem-ldst event is not matching"
+ exit 1
+ fi
+
+ # --ldlat on AMD:
+ # o Zen4 and earlier uarch does not support ldlat
+ # o Even on supported platforms, it's disabled (--ldlat=0) by default.
+ ldlat=${BASH_REMATCH[1]}
+ if [[ -n $ldlat ]]; then
+ if ! [[ "$ldlat" =~ ldlat=0 ]]; then
+ echo "ERROR: ldlat not initialized to 0?"
+ exit 1
+ fi
+
+ mem_events="$(perf mem record -v --ldlat=150 -e list 2>&1)"
+ if ! [[ "$mem_events" =~ ^mem-ldst.*ibs_op/ldlat=150/.*available ]]; then
+ echo "ERROR: --ldlat not honored?"
+ exit 1
+ fi
+ fi
+
+ # perf mem/c2c internally uses IBS PMU on AMD CPU which doesn't
+ # support user/kernel filtering and per-process monitoring on older
+ # kernels, spin program on specific CPU and test in per-CPU mode.
perf mem record -vvv -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM 2>"${ERR_FILE}"
else
perf mem record -vvv --all-user -o ${PERF_DATA} -- $TEST_PROGRAM 2>"${ERR_FILE}"
}
}
+struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name)
+{
+ struct perf_pmu_caps *caps;
+
+ list_for_each_entry(caps, &pmu->caps, list) {
+ if (!strcmp(caps->name, name))
+ return caps;
+ }
+ return NULL;
+}
+
/*
* Reading/parsing the given pmu capabilities, which should be located at:
* /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes.
int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
+struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name);
+
int perf_pmu__caps_parse(struct perf_pmu *pmu);
void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,