From: Greg Kroah-Hartman Date: Sat, 15 Mar 2025 15:44:57 +0000 (+0100) Subject: drop queue-5.15/perf-x86-intel-use-better-start-period-for-frequency.patch X-Git-Tag: v6.6.84~41 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=222aab59af60b35580a08a7c376f1a3f402d3255;p=thirdparty%2Fkernel%2Fstable-queue.git drop queue-5.15/perf-x86-intel-use-better-start-period-for-frequency.patch --- diff --git a/queue-5.15/perf-x86-intel-use-better-start-period-for-frequency.patch b/queue-5.15/perf-x86-intel-use-better-start-period-for-frequency.patch deleted file mode 100644 index d32f65bf03..0000000000 --- a/queue-5.15/perf-x86-intel-use-better-start-period-for-frequency.patch +++ /dev/null @@ -1,150 +0,0 @@ -From d4b58c9fc31f4815b2e04ff60dfeb27dab7af085 Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Fri, 17 Jan 2025 07:19:13 -0800 -Subject: perf/x86/intel: Use better start period for frequency mode - -From: Kan Liang - -[ Upstream commit a26b24b2e21f6222635a95426b9ef9eec63d69b1 ] - -Freqency mode is the current default mode of Linux perf. A period of 1 is -used as a starting period. The period is auto-adjusted on each tick or an -overflow, to meet the frequency target. - -The start period of 1 is too low and may trigger some issues: - -- Many HWs do not support period 1 well. - https://lore.kernel.org/lkml/875xs2oh69.ffs@tglx/ - -- For an event that occurs frequently, period 1 is too far away from the - real period. Lots of samples are generated at the beginning. - The distribution of samples may not be even. - -- A low starting period for frequently occurring events also challenges - virtualization, which has a longer path to handle a PMI. - -The limit_period value only checks the minimum acceptable value for HW. -It cannot be used to set the start period, because some events may -need a very low period. The limit_period cannot be set too high. It -doesn't help with the events that occur frequently. - -It's hard to find a universal starting period for all events. The idea -implemented by this patch is to only give an estimate for the popular -HW and HW cache events. For the rest of the events, start from the lowest -possible recommended value. - -Signed-off-by: Kan Liang -Signed-off-by: Ingo Molnar -Cc: Peter Zijlstra -Link: https://lore.kernel.org/r/20250117151913.3043942-3-kan.liang@linux.intel.com -Signed-off-by: Sasha Levin ---- - arch/x86/events/intel/core.c | 85 ++++++++++++++++++++++++++++++++++++ - 1 file changed, 85 insertions(+) - -diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c -index 767c60af13be3..9a44e53591f96 100644 ---- a/arch/x86/events/intel/core.c -+++ b/arch/x86/events/intel/core.c -@@ -3717,6 +3717,85 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx) - return test_bit(idx, (unsigned long *)&intel_cap->capabilities); - } - -+static u64 intel_pmu_freq_start_period(struct perf_event *event) -+{ -+ int type = event->attr.type; -+ u64 config, factor; -+ s64 start; -+ -+ /* -+ * The 127 is the lowest possible recommended SAV (sample after value) -+ * for a 4000 freq (default freq), according to the event list JSON file. -+ * Also, assume the workload is idle 50% time. -+ */ -+ factor = 64 * 4000; -+ if (type != PERF_TYPE_HARDWARE && type != PERF_TYPE_HW_CACHE) -+ goto end; -+ -+ /* -+ * The estimation of the start period in the freq mode is -+ * based on the below assumption. -+ * -+ * For a cycles or an instructions event, 1GHZ of the -+ * underlying platform, 1 IPC. The workload is idle 50% time. -+ * The start period = 1,000,000,000 * 1 / freq / 2. -+ * = 500,000,000 / freq -+ * -+ * Usually, the branch-related events occur less than the -+ * instructions event. According to the Intel event list JSON -+ * file, the SAV (sample after value) of a branch-related event -+ * is usually 1/4 of an instruction event. -+ * The start period of branch-related events = 125,000,000 / freq. -+ * -+ * The cache-related events occurs even less. The SAV is usually -+ * 1/20 of an instruction event. -+ * The start period of cache-related events = 25,000,000 / freq. -+ */ -+ config = event->attr.config & PERF_HW_EVENT_MASK; -+ if (type == PERF_TYPE_HARDWARE) { -+ switch (config) { -+ case PERF_COUNT_HW_CPU_CYCLES: -+ case PERF_COUNT_HW_INSTRUCTIONS: -+ case PERF_COUNT_HW_BUS_CYCLES: -+ case PERF_COUNT_HW_STALLED_CYCLES_FRONTEND: -+ case PERF_COUNT_HW_STALLED_CYCLES_BACKEND: -+ case PERF_COUNT_HW_REF_CPU_CYCLES: -+ factor = 500000000; -+ break; -+ case PERF_COUNT_HW_BRANCH_INSTRUCTIONS: -+ case PERF_COUNT_HW_BRANCH_MISSES: -+ factor = 125000000; -+ break; -+ case PERF_COUNT_HW_CACHE_REFERENCES: -+ case PERF_COUNT_HW_CACHE_MISSES: -+ factor = 25000000; -+ break; -+ default: -+ goto end; -+ } -+ } -+ -+ if (type == PERF_TYPE_HW_CACHE) -+ factor = 25000000; -+end: -+ /* -+ * Usually, a prime or a number with less factors (close to prime) -+ * is chosen as an SAV, which makes it less likely that the sampling -+ * period synchronizes with some periodic event in the workload. -+ * Minus 1 to make it at least avoiding values near power of twos -+ * for the default freq. -+ */ -+ start = DIV_ROUND_UP_ULL(factor, event->attr.sample_freq) - 1; -+ -+ if (start > x86_pmu.max_period) -+ start = x86_pmu.max_period; -+ -+ if (x86_pmu.limit_period) -+ x86_pmu.limit_period(event, &start); -+ -+ return start; -+} -+ - static int intel_pmu_hw_config(struct perf_event *event) - { - int ret = x86_pmu_hw_config(event); -@@ -3728,6 +3807,12 @@ static int intel_pmu_hw_config(struct perf_event *event) - if (ret) - return ret; - -+ if (event->attr.freq && event->attr.sample_freq) { -+ event->hw.sample_period = intel_pmu_freq_start_period(event); -+ event->hw.last_period = event->hw.sample_period; -+ local64_set(&event->hw.period_left, event->hw.sample_period); -+ } -+ - if (event->attr.precise_ip) { - if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) - return -EINVAL; --- -2.39.5 - diff --git a/queue-5.15/series b/queue-5.15/series index b1cbff4e67..4707d2f947 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -46,7 +46,6 @@ asoc-tas2770-fix-volume-scale.patch asoc-tas2764-fix-power-control-mask.patch asoc-tas2764-set-the-sdout-polarity-correctly.patch fuse-don-t-truncate-cached-mutated-symlink.patch -perf-x86-intel-use-better-start-period-for-frequency.patch x86-irq-define-trace-events-conditionally.patch mptcp-safety-check-before-fallback.patch drm-nouveau-do-not-override-forced-connector-status.patch