From 813734be5b6a3b2213daaff4d83007f6b05c7d0e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 10 Mar 2023 12:24:33 +0100 Subject: [PATCH] 4.19-stable patches added patches: x86-resctl-fix-scheduler-confusion-with-current.patch --- ...ec-fix-unsigned-comparison-with-less.patch | 9 +- queue-4.19/series | 1 + ...fix-scheduler-confusion-with-current.patch | 189 ++++++++++++++++++ 3 files changed, 192 insertions(+), 7 deletions(-) create mode 100644 queue-4.19/x86-resctl-fix-scheduler-confusion-with-current.patch diff --git a/queue-4.19/phy-rockchip-typec-fix-unsigned-comparison-with-less.patch b/queue-4.19/phy-rockchip-typec-fix-unsigned-comparison-with-less.patch index ffcd68d5b27..139e80182b3 100644 --- a/queue-4.19/phy-rockchip-typec-fix-unsigned-comparison-with-less.patch +++ b/queue-4.19/phy-rockchip-typec-fix-unsigned-comparison-with-less.patch @@ -20,14 +20,12 @@ Link: https://lore.kernel.org/r/20230213035709.99027-1-jiapeng.chong@linux.aliba Signed-off-by: Vinod Koul Signed-off-by: Sasha Levin --- - drivers/phy/rockchip/phy-rockchip-typec.c | 3 +-- + drivers/phy/rockchip/phy-rockchip-typec.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) -diff --git a/drivers/phy/rockchip/phy-rockchip-typec.c b/drivers/phy/rockchip/phy-rockchip-typec.c -index 76a4b58ec7717..ca4b80d0d8b78 100644 --- a/drivers/phy/rockchip/phy-rockchip-typec.c +++ b/drivers/phy/rockchip/phy-rockchip-typec.c -@@ -817,9 +817,8 @@ static int tcphy_get_mode(struct rockchip_typec_phy *tcphy) +@@ -817,9 +817,8 @@ static int tcphy_get_mode(struct rockchi struct extcon_dev *edev = tcphy->extcon; union extcon_property_value property; unsigned int id; @@ -38,6 +36,3 @@ index 76a4b58ec7717..ca4b80d0d8b78 100644 if (!edev) return MODE_DFP_USB; --- -2.39.2 - diff --git a/queue-4.19/series b/queue-4.19/series index 308c18be0ea..ce09305eb9b 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -242,3 +242,4 @@ usb-host-xhci-mvebu-iterate-over-array-indexes-inste.patch usb-ene_usb6250-allocate-enough-memory-for-full-obje.patch usb-uvc-enumerate-valid-values-for-color-matching.patch phy-rockchip-typec-fix-unsigned-comparison-with-less.patch +x86-resctl-fix-scheduler-confusion-with-current.patch diff --git a/queue-4.19/x86-resctl-fix-scheduler-confusion-with-current.patch b/queue-4.19/x86-resctl-fix-scheduler-confusion-with-current.patch new file mode 100644 index 00000000000..8e9337051d0 --- /dev/null +++ b/queue-4.19/x86-resctl-fix-scheduler-confusion-with-current.patch @@ -0,0 +1,189 @@ +From 7fef099702527c3b2c5234a2ea6a24411485a13a Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Tue, 7 Mar 2023 13:06:29 -0800 +Subject: x86/resctl: fix scheduler confusion with 'current' + +From: Linus Torvalds + +commit 7fef099702527c3b2c5234a2ea6a24411485a13a upstream. + +The implementation of 'current' on x86 is very intentionally special: it +is a very common thing to look up, and it uses 'this_cpu_read_stable()' +to get the current thread pointer efficiently from per-cpu storage. + +And the keyword in there is 'stable': the current thread pointer never +changes as far as a single thread is concerned. Even if when a thread +is preempted, or moved to another CPU, or even across an explicit call +'schedule()' that thread will still have the same value for 'current'. + +It is, after all, the kernel base pointer to thread-local storage. +That's why it's stable to begin with, but it's also why it's important +enough that we have that special 'this_cpu_read_stable()' access for it. + +So this is all done very intentionally to allow the compiler to treat +'current' as a value that never visibly changes, so that the compiler +can do CSE and combine multiple different 'current' accesses into one. + +However, there is obviously one very special situation when the +currently running thread does actually change: inside the scheduler +itself. + +So the scheduler code paths are special, and do not have a 'current' +thread at all. Instead there are _two_ threads: the previous and the +next thread - typically called 'prev' and 'next' (or prev_p/next_p) +internally. + +So this is all actually quite straightforward and simple, and not all +that complicated. + +Except for when you then have special code that is run in scheduler +context, that code then has to be aware that 'current' isn't really a +valid thing. Did you mean 'prev'? Did you mean 'next'? + +In fact, even if then look at the code, and you use 'current' after the +new value has been assigned to the percpu variable, we have explicitly +told the compiler that 'current' is magical and always stable. So the +compiler is quite free to use an older (or newer) value of 'current', +and the actual assignment to the percpu storage is not relevant even if +it might look that way. + +Which is exactly what happened in the resctl code, that blithely used +'current' in '__resctrl_sched_in()' when it really wanted the new +process state (as implied by the name: we're scheduling 'into' that new +resctl state). And clang would end up just using the old thread pointer +value at least in some configurations. + +This could have happened with gcc too, and purely depends on random +compiler details. Clang just seems to have been more aggressive about +moving the read of the per-cpu current_task pointer around. + +The fix is trivial: just make the resctl code adhere to the scheduler +rules of using the prev/next thread pointer explicitly, instead of using +'current' in a situation where it just wasn't valid. + +That same code is then also used outside of the scheduler context (when +a thread resctl state is explicitly changed), and then we will just pass +in 'current' as that pointer, of course. There is no ambiguity in that +case. + +The fix may be trivial, but noticing and figuring out what went wrong +was not. The credit for that goes to Stephane Eranian. + +Reported-by: Stephane Eranian +Link: https://lore.kernel.org/lkml/20230303231133.1486085-1-eranian@google.com/ +Link: https://lore.kernel.org/lkml/alpine.LFD.2.01.0908011214330.3304@localhost.localdomain/ +Reviewed-by: Nick Desaulniers +Tested-by: Tony Luck +Tested-by: Stephane Eranian +Tested-by: Babu Moger +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/resctrl.h | 12 ++++++------ + arch/x86/kernel/cpu/resctrl/rdtgroup.c | 4 ++-- + arch/x86/kernel/process_32.c | 2 +- + arch/x86/kernel/process_64.c | 2 +- + 4 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h +index 52788f79786f..255a78d9d906 100644 +--- a/arch/x86/include/asm/resctrl.h ++++ b/arch/x86/include/asm/resctrl.h +@@ -49,7 +49,7 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key); + * simple as possible. + * Must be called with preemption disabled. + */ +-static void __resctrl_sched_in(void) ++static inline void __resctrl_sched_in(struct task_struct *tsk) + { + struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); + u32 closid = state->default_closid; +@@ -61,13 +61,13 @@ static void __resctrl_sched_in(void) + * Else use the closid/rmid assigned to this cpu. + */ + if (static_branch_likely(&rdt_alloc_enable_key)) { +- tmp = READ_ONCE(current->closid); ++ tmp = READ_ONCE(tsk->closid); + if (tmp) + closid = tmp; + } + + if (static_branch_likely(&rdt_mon_enable_key)) { +- tmp = READ_ONCE(current->rmid); ++ tmp = READ_ONCE(tsk->rmid); + if (tmp) + rmid = tmp; + } +@@ -88,17 +88,17 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val) + return val * scale; + } + +-static inline void resctrl_sched_in(void) ++static inline void resctrl_sched_in(struct task_struct *tsk) + { + if (static_branch_likely(&rdt_enable_key)) +- __resctrl_sched_in(); ++ __resctrl_sched_in(tsk); + } + + void resctrl_cpu_detect(struct cpuinfo_x86 *c); + + #else + +-static inline void resctrl_sched_in(void) {} ++static inline void resctrl_sched_in(struct task_struct *tsk) {} + static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {} + + #endif /* CONFIG_X86_CPU_RESCTRL */ +diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c +index e2c1599d1b37..884b6e9a7e31 100644 +--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c ++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c +@@ -314,7 +314,7 @@ static void update_cpu_closid_rmid(void *info) + * executing task might have its own closid selected. Just reuse + * the context switch code. + */ +- resctrl_sched_in(); ++ resctrl_sched_in(current); + } + + /* +@@ -530,7 +530,7 @@ static void _update_task_closid_rmid(void *task) + * Otherwise, the MSR is updated when the task is scheduled in. + */ + if (task == current) +- resctrl_sched_in(); ++ resctrl_sched_in(task); + } + + static void update_task_closid_rmid(struct task_struct *t) +diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c +index 470c128759ea..708c87b88cc1 100644 +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -212,7 +212,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) + switch_fpu_finish(); + + /* Load the Intel cache allocation PQR MSR. */ +- resctrl_sched_in(); ++ resctrl_sched_in(next_p); + + return prev_p; + } +diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c +index 4e34b3b68ebd..bb65a68b4b49 100644 +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -656,7 +656,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) + } + + /* Load the Intel cache allocation PQR MSR. */ +- resctrl_sched_in(); ++ resctrl_sched_in(next_p); + + return prev_p; + } +-- +2.39.2 + -- 2.47.3