From: Greg Kroah-Hartman Date: Sun, 15 Jan 2023 14:31:56 +0000 (+0100) Subject: 6.1-stable patches X-Git-Tag: v4.14.303~41 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=7d5d2ff3a07c0634dca292e74c40f035fef2f059;p=thirdparty%2Fkernel%2Fstable-queue.git 6.1-stable patches added patches: edac-device-fix-period-calculation-in-edac_device_reset_delay_period.patch x86-pat-fix-pat_x_mtrr_type-for-mtrr-disabled-case.patch x86-resctrl-fix-event-counts-regression-in-reused-rmids.patch x86-resctrl-fix-task-closid-rmid-update-race.patch --- diff --git a/queue-6.1/edac-device-fix-period-calculation-in-edac_device_reset_delay_period.patch b/queue-6.1/edac-device-fix-period-calculation-in-edac_device_reset_delay_period.patch new file mode 100644 index 00000000000..305e4fd02eb --- /dev/null +++ b/queue-6.1/edac-device-fix-period-calculation-in-edac_device_reset_delay_period.patch @@ -0,0 +1,65 @@ +From e84077437902ec99eba0a6b516df772653f142c7 Mon Sep 17 00:00:00 2001 +From: Eliav Farber +Date: Thu, 20 Oct 2022 12:44:58 +0000 +Subject: EDAC/device: Fix period calculation in edac_device_reset_delay_period() + +From: Eliav Farber + +commit e84077437902ec99eba0a6b516df772653f142c7 upstream. + +Fix period calculation in case user sets a value of 1000. The input of +round_jiffies_relative() should be in jiffies and not in milli-seconds. + + [ bp: Use the same code pattern as in edac_device_workq_setup() for + clarity. ] + +Fixes: c4cf3b454eca ("EDAC: Rework workqueue handling") +Signed-off-by: Eliav Farber +Signed-off-by: Borislav Petkov (AMD) +Cc: +Link: https://lore.kernel.org/r/20221020124458.22153-1-farbere@amazon.com +Signed-off-by: Greg Kroah-Hartman +--- + drivers/edac/edac_device.c | 17 ++++++++--------- + drivers/edac/edac_module.h | 2 +- + 2 files changed, 9 insertions(+), 10 deletions(-) + +--- a/drivers/edac/edac_device.c ++++ b/drivers/edac/edac_device.c +@@ -394,17 +394,16 @@ static void edac_device_workq_teardown(s + * Then restart the workq on the new delay + */ + void edac_device_reset_delay_period(struct edac_device_ctl_info *edac_dev, +- unsigned long value) ++ unsigned long msec) + { +- unsigned long jiffs = msecs_to_jiffies(value); ++ edac_dev->poll_msec = msec; ++ edac_dev->delay = msecs_to_jiffies(msec); + +- if (value == 1000) +- jiffs = round_jiffies_relative(value); +- +- edac_dev->poll_msec = value; +- edac_dev->delay = jiffs; +- +- edac_mod_work(&edac_dev->work, jiffs); ++ /* See comment in edac_device_workq_setup() above */ ++ if (edac_dev->poll_msec == 1000) ++ edac_mod_work(&edac_dev->work, round_jiffies_relative(edac_dev->delay)); ++ else ++ edac_mod_work(&edac_dev->work, edac_dev->delay); + } + + int edac_device_alloc_index(void) +--- a/drivers/edac/edac_module.h ++++ b/drivers/edac/edac_module.h +@@ -52,7 +52,7 @@ bool edac_stop_work(struct delayed_work + bool edac_mod_work(struct delayed_work *work, unsigned long delay); + + extern void edac_device_reset_delay_period(struct edac_device_ctl_info +- *edac_dev, unsigned long value); ++ *edac_dev, unsigned long msec); + extern void edac_mc_reset_delay_period(unsigned long value); + + /* diff --git a/queue-6.1/series b/queue-6.1/series index 5d50cf4e042..bdd578303bf 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -90,3 +90,7 @@ netfilter-ipset-fix-overflow-before-widen-in-the-bitmap_ip_create-function.patch selftests-netfilter-fix-transaction-test-script-timeout-handling.patch powerpc-imc-pmu-fix-use-of-mutex-in-irqs-disabled-section.patch x86-boot-avoid-using-intel-mnemonics-in-at-t-syntax-asm.patch +edac-device-fix-period-calculation-in-edac_device_reset_delay_period.patch +x86-pat-fix-pat_x_mtrr_type-for-mtrr-disabled-case.patch +x86-resctrl-fix-task-closid-rmid-update-race.patch +x86-resctrl-fix-event-counts-regression-in-reused-rmids.patch diff --git a/queue-6.1/x86-pat-fix-pat_x_mtrr_type-for-mtrr-disabled-case.patch b/queue-6.1/x86-pat-fix-pat_x_mtrr_type-for-mtrr-disabled-case.patch new file mode 100644 index 00000000000..83c51819877 --- /dev/null +++ b/queue-6.1/x86-pat-fix-pat_x_mtrr_type-for-mtrr-disabled-case.patch @@ -0,0 +1,53 @@ +From 90b926e68f500844dff16b5bcea178dc55cf580a Mon Sep 17 00:00:00 2001 +From: Juergen Gross +Date: Tue, 10 Jan 2023 07:54:27 +0100 +Subject: x86/pat: Fix pat_x_mtrr_type() for MTRR disabled case + +From: Juergen Gross + +commit 90b926e68f500844dff16b5bcea178dc55cf580a upstream. + +Since + + 72cbc8f04fe2 ("x86/PAT: Have pat_enabled() properly reflect state when running on Xen") + +PAT can be enabled without MTRR. + +This has resulted in problems e.g. for a SEV-SNP guest running under Hyper-V, +when trying to establish a new mapping via memremap() with WB caching mode, as +pat_x_mtrr_type() will call mtrr_type_lookup(), which in turn is returning +MTRR_TYPE_INVALID due to MTRR being disabled in this configuration. + +The result is a mapping with UC- caching, leading to severe performance +degradation. + +Fix that by handling MTRR_TYPE_INVALID the same way as MTRR_TYPE_WRBACK +in pat_x_mtrr_type() because MTRR_TYPE_INVALID means MTRRs are disabled. + + [ bp: Massage commit message. ] + +Fixes: 72cbc8f04fe2 ("x86/PAT: Have pat_enabled() properly reflect state when running on Xen") +Reported-by: Michael Kelley (LINUX) +Signed-off-by: Juergen Gross +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Michael Kelley +Tested-by: Michael Kelley +Cc: +Link: https://lore.kernel.org/r/20230110065427.20767-1-jgross@suse.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/mm/pat/memtype.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/x86/mm/pat/memtype.c ++++ b/arch/x86/mm/pat/memtype.c +@@ -434,7 +434,8 @@ static unsigned long pat_x_mtrr_type(u64 + u8 mtrr_type, uniform; + + mtrr_type = mtrr_type_lookup(start, end, &uniform); +- if (mtrr_type != MTRR_TYPE_WRBACK) ++ if (mtrr_type != MTRR_TYPE_WRBACK && ++ mtrr_type != MTRR_TYPE_INVALID) + return _PAGE_CACHE_MODE_UC_MINUS; + + return _PAGE_CACHE_MODE_WB; diff --git a/queue-6.1/x86-resctrl-fix-event-counts-regression-in-reused-rmids.patch b/queue-6.1/x86-resctrl-fix-event-counts-regression-in-reused-rmids.patch new file mode 100644 index 00000000000..79005ea5dfc --- /dev/null +++ b/queue-6.1/x86-resctrl-fix-event-counts-regression-in-reused-rmids.patch @@ -0,0 +1,115 @@ +From 2a81160d29d65b5876ab3f824fda99ae0219f05e Mon Sep 17 00:00:00 2001 +From: Peter Newman +Date: Tue, 20 Dec 2022 17:41:31 +0100 +Subject: x86/resctrl: Fix event counts regression in reused RMIDs + +From: Peter Newman + +commit 2a81160d29d65b5876ab3f824fda99ae0219f05e upstream. + +When creating a new monitoring group, the RMID allocated for it may have +been used by a group which was previously removed. In this case, the +hardware counters will have non-zero values which should be deducted +from what is reported in the new group's counts. + +resctrl_arch_reset_rmid() initializes the prev_msr value for counters to +0, causing the initial count to be charged to the new group. Resurrect +__rmid_read() and use it to initialize prev_msr correctly. + +Unlike before, __rmid_read() checks for error bits in the MSR read so +that callers don't need to. + +Fixes: 1d81d15db39c ("x86/resctrl: Move mbm_overflow_count() into resctrl_arch_rmid_read()") +Signed-off-by: Peter Newman +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Reinette Chatre +Tested-by: Babu Moger +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20221220164132.443083-1-peternewman@google.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/resctrl/monitor.c | 49 ++++++++++++++++++--------- + 1 file changed, 33 insertions(+), 16 deletions(-) + +diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c +index efe0c30d3a12..77538abeb72a 100644 +--- a/arch/x86/kernel/cpu/resctrl/monitor.c ++++ b/arch/x86/kernel/cpu/resctrl/monitor.c +@@ -146,6 +146,30 @@ static inline struct rmid_entry *__rmid_entry(u32 rmid) + return entry; + } + ++static int __rmid_read(u32 rmid, enum resctrl_event_id eventid, u64 *val) ++{ ++ u64 msr_val; ++ ++ /* ++ * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured ++ * with a valid event code for supported resource type and the bits ++ * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, ++ * IA32_QM_CTR.data (bits 61:0) reports the monitored data. ++ * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) ++ * are error bits. ++ */ ++ wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); ++ rdmsrl(MSR_IA32_QM_CTR, msr_val); ++ ++ if (msr_val & RMID_VAL_ERROR) ++ return -EIO; ++ if (msr_val & RMID_VAL_UNAVAIL) ++ return -EINVAL; ++ ++ *val = msr_val; ++ return 0; ++} ++ + static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_domain *hw_dom, + u32 rmid, + enum resctrl_event_id eventid) +@@ -172,8 +196,12 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_domain *d, + struct arch_mbm_state *am; + + am = get_arch_mbm_state(hw_dom, rmid, eventid); +- if (am) ++ if (am) { + memset(am, 0, sizeof(*am)); ++ ++ /* Record any initial, non-zero count value. */ ++ __rmid_read(rmid, eventid, &am->prev_msr); ++ } + } + + static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) +@@ -191,25 +219,14 @@ int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d, + struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d); + struct arch_mbm_state *am; + u64 msr_val, chunks; ++ int ret; + + if (!cpumask_test_cpu(smp_processor_id(), &d->cpu_mask)) + return -EINVAL; + +- /* +- * As per the SDM, when IA32_QM_EVTSEL.EvtID (bits 7:0) is configured +- * with a valid event code for supported resource type and the bits +- * IA32_QM_EVTSEL.RMID (bits 41:32) are configured with valid RMID, +- * IA32_QM_CTR.data (bits 61:0) reports the monitored data. +- * IA32_QM_CTR.Error (bit 63) and IA32_QM_CTR.Unavailable (bit 62) +- * are error bits. +- */ +- wrmsr(MSR_IA32_QM_EVTSEL, eventid, rmid); +- rdmsrl(MSR_IA32_QM_CTR, msr_val); +- +- if (msr_val & RMID_VAL_ERROR) +- return -EIO; +- if (msr_val & RMID_VAL_UNAVAIL) +- return -EINVAL; ++ ret = __rmid_read(rmid, eventid, &msr_val); ++ if (ret) ++ return ret; + + am = get_arch_mbm_state(hw_dom, rmid, eventid); + if (am) { +-- +2.39.0 + diff --git a/queue-6.1/x86-resctrl-fix-task-closid-rmid-update-race.patch b/queue-6.1/x86-resctrl-fix-task-closid-rmid-update-race.patch new file mode 100644 index 00000000000..fdfd989231b --- /dev/null +++ b/queue-6.1/x86-resctrl-fix-task-closid-rmid-update-race.patch @@ -0,0 +1,109 @@ +From fe1f0714385fbcf76b0cbceb02b7277d842014fc Mon Sep 17 00:00:00 2001 +From: Peter Newman +Date: Tue, 20 Dec 2022 17:11:23 +0100 +Subject: x86/resctrl: Fix task CLOSID/RMID update race + +From: Peter Newman + +commit fe1f0714385fbcf76b0cbceb02b7277d842014fc upstream. + +When the user moves a running task to a new rdtgroup using the task's +file interface or by deleting its rdtgroup, the resulting change in +CLOSID/RMID must be immediately propagated to the PQR_ASSOC MSR on the +task(s) CPUs. + +x86 allows reordering loads with prior stores, so if the task starts +running between a task_curr() check that the CPU hoisted before the +stores in the CLOSID/RMID update then it can start running with the old +CLOSID/RMID until it is switched again because __rdtgroup_move_task() +failed to determine that it needs to be interrupted to obtain the new +CLOSID/RMID. + +Refer to the diagram below: + +CPU 0 CPU 1 +----- ----- +__rdtgroup_move_task(): + curr <- t1->cpu->rq->curr + __schedule(): + rq->curr <- t1 + resctrl_sched_in(): + t1->{closid,rmid} -> {1,1} + t1->{closid,rmid} <- {2,2} + if (curr == t1) // false + IPI(t1->cpu) + +A similar race impacts rdt_move_group_tasks(), which updates tasks in a +deleted rdtgroup. + +In both cases, use smp_mb() to order the task_struct::{closid,rmid} +stores before the loads in task_curr(). In particular, in the +rdt_move_group_tasks() case, simply execute an smp_mb() on every +iteration with a matching task. + +It is possible to use a single smp_mb() in rdt_move_group_tasks(), but +this would require two passes and a means of remembering which +task_structs were updated in the first loop. However, benchmarking +results below showed too little performance impact in the simple +approach to justify implementing the two-pass approach. + +Times below were collected using `perf stat` to measure the time to +remove a group containing a 1600-task, parallel workload. + +CPU: Intel(R) Xeon(R) Platinum P-8136 CPU @ 2.00GHz (112 threads) + + # mkdir /sys/fs/resctrl/test + # echo $$ > /sys/fs/resctrl/test/tasks + # perf bench sched messaging -g 40 -l 100000 + +task-clock time ranges collected using: + + # perf stat rmdir /sys/fs/resctrl/test + +Baseline: 1.54 - 1.60 ms +smp_mb() every matching task: 1.57 - 1.67 ms + + [ bp: Massage commit message. ] + +Fixes: ae28d1aae48a ("x86/resctrl: Use an IPI instead of task_work_add() to update PQR_ASSOC MSR") +Fixes: 0efc89be9471 ("x86/intel_rdt: Update task closid immediately on CPU in rmdir and unmount") +Signed-off-by: Peter Newman +Signed-off-by: Borislav Petkov (AMD) +Reviewed-by: Reinette Chatre +Reviewed-by: Babu Moger +Cc: +Link: https://lore.kernel.org/r/20221220161123.432120-1-peternewman@google.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/kernel/cpu/resctrl/rdtgroup.c | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c ++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c +@@ -580,8 +580,10 @@ static int __rdtgroup_move_task(struct t + /* + * Ensure the task's closid and rmid are written before determining if + * the task is current that will decide if it will be interrupted. ++ * This pairs with the full barrier between the rq->curr update and ++ * resctrl_sched_in() during context switch. + */ +- barrier(); ++ smp_mb(); + + /* + * By now, the task's closid and rmid are set. If the task is current +@@ -2402,6 +2404,14 @@ static void rdt_move_group_tasks(struct + WRITE_ONCE(t->rmid, to->mon.rmid); + + /* ++ * Order the closid/rmid stores above before the loads ++ * in task_curr(). This pairs with the full barrier ++ * between the rq->curr update and resctrl_sched_in() ++ * during context switch. ++ */ ++ smp_mb(); ++ ++ /* + * If the task is on a CPU, set the CPU in the mask. + * The detection is inaccurate as tasks might move or + * schedule before the smp function call takes place.