--- /dev/null
+From 7fef099702527c3b2c5234a2ea6a24411485a13a Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 7 Mar 2023 13:06:29 -0800
+Subject: x86/resctl: fix scheduler confusion with 'current'
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 7fef099702527c3b2c5234a2ea6a24411485a13a upstream.
+
+The implementation of 'current' on x86 is very intentionally special: it
+is a very common thing to look up, and it uses 'this_cpu_read_stable()'
+to get the current thread pointer efficiently from per-cpu storage.
+
+And the keyword in there is 'stable': the current thread pointer never
+changes as far as a single thread is concerned. Even if when a thread
+is preempted, or moved to another CPU, or even across an explicit call
+'schedule()' that thread will still have the same value for 'current'.
+
+It is, after all, the kernel base pointer to thread-local storage.
+That's why it's stable to begin with, but it's also why it's important
+enough that we have that special 'this_cpu_read_stable()' access for it.
+
+So this is all done very intentionally to allow the compiler to treat
+'current' as a value that never visibly changes, so that the compiler
+can do CSE and combine multiple different 'current' accesses into one.
+
+However, there is obviously one very special situation when the
+currently running thread does actually change: inside the scheduler
+itself.
+
+So the scheduler code paths are special, and do not have a 'current'
+thread at all. Instead there are _two_ threads: the previous and the
+next thread - typically called 'prev' and 'next' (or prev_p/next_p)
+internally.
+
+So this is all actually quite straightforward and simple, and not all
+that complicated.
+
+Except for when you then have special code that is run in scheduler
+context, that code then has to be aware that 'current' isn't really a
+valid thing. Did you mean 'prev'? Did you mean 'next'?
+
+In fact, even if then look at the code, and you use 'current' after the
+new value has been assigned to the percpu variable, we have explicitly
+told the compiler that 'current' is magical and always stable. So the
+compiler is quite free to use an older (or newer) value of 'current',
+and the actual assignment to the percpu storage is not relevant even if
+it might look that way.
+
+Which is exactly what happened in the resctl code, that blithely used
+'current' in '__resctrl_sched_in()' when it really wanted the new
+process state (as implied by the name: we're scheduling 'into' that new
+resctl state). And clang would end up just using the old thread pointer
+value at least in some configurations.
+
+This could have happened with gcc too, and purely depends on random
+compiler details. Clang just seems to have been more aggressive about
+moving the read of the per-cpu current_task pointer around.
+
+The fix is trivial: just make the resctl code adhere to the scheduler
+rules of using the prev/next thread pointer explicitly, instead of using
+'current' in a situation where it just wasn't valid.
+
+That same code is then also used outside of the scheduler context (when
+a thread resctl state is explicitly changed), and then we will just pass
+in 'current' as that pointer, of course. There is no ambiguity in that
+case.
+
+The fix may be trivial, but noticing and figuring out what went wrong
+was not. The credit for that goes to Stephane Eranian.
+
+Reported-by: Stephane Eranian <eranian@google.com>
+Link: https://lore.kernel.org/lkml/20230303231133.1486085-1-eranian@google.com/
+Link: https://lore.kernel.org/lkml/alpine.LFD.2.01.0908011214330.3304@localhost.localdomain/
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Tested-by: Tony Luck <tony.luck@intel.com>
+Tested-by: Stephane Eranian <eranian@google.com>
+Tested-by: Babu Moger <babu.moger@amd.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/resctrl.h | 12 ++++++------
+ arch/x86/kernel/cpu/resctrl/rdtgroup.c | 4 ++--
+ arch/x86/kernel/process_32.c | 2 +-
+ arch/x86/kernel/process_64.c | 2 +-
+ 4 files changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
+index 52788f79786f..255a78d9d906 100644
+--- a/arch/x86/include/asm/resctrl.h
++++ b/arch/x86/include/asm/resctrl.h
+@@ -49,7 +49,7 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
+ * simple as possible.
+ * Must be called with preemption disabled.
+ */
+-static void __resctrl_sched_in(void)
++static inline void __resctrl_sched_in(struct task_struct *tsk)
+ {
+ struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
+ u32 closid = state->default_closid;
+@@ -61,13 +61,13 @@ static void __resctrl_sched_in(void)
+ * Else use the closid/rmid assigned to this cpu.
+ */
+ if (static_branch_likely(&rdt_alloc_enable_key)) {
+- tmp = READ_ONCE(current->closid);
++ tmp = READ_ONCE(tsk->closid);
+ if (tmp)
+ closid = tmp;
+ }
+
+ if (static_branch_likely(&rdt_mon_enable_key)) {
+- tmp = READ_ONCE(current->rmid);
++ tmp = READ_ONCE(tsk->rmid);
+ if (tmp)
+ rmid = tmp;
+ }
+@@ -88,17 +88,17 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
+ return val * scale;
+ }
+
+-static inline void resctrl_sched_in(void)
++static inline void resctrl_sched_in(struct task_struct *tsk)
+ {
+ if (static_branch_likely(&rdt_enable_key))
+- __resctrl_sched_in();
++ __resctrl_sched_in(tsk);
+ }
+
+ void resctrl_cpu_detect(struct cpuinfo_x86 *c);
+
+ #else
+
+-static inline void resctrl_sched_in(void) {}
++static inline void resctrl_sched_in(struct task_struct *tsk) {}
+ static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}
+
+ #endif /* CONFIG_X86_CPU_RESCTRL */
+diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+index e2c1599d1b37..884b6e9a7e31 100644
+--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+@@ -314,7 +314,7 @@ static void update_cpu_closid_rmid(void *info)
+ * executing task might have its own closid selected. Just reuse
+ * the context switch code.
+ */
+- resctrl_sched_in();
++ resctrl_sched_in(current);
+ }
+
+ /*
+@@ -530,7 +530,7 @@ static void _update_task_closid_rmid(void *task)
+ * Otherwise, the MSR is updated when the task is scheduled in.
+ */
+ if (task == current)
+- resctrl_sched_in();
++ resctrl_sched_in(task);
+ }
+
+ static void update_task_closid_rmid(struct task_struct *t)
+diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
+index 470c128759ea..708c87b88cc1 100644
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -212,7 +212,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ switch_fpu_finish();
+
+ /* Load the Intel cache allocation PQR MSR. */
+- resctrl_sched_in();
++ resctrl_sched_in(next_p);
+
+ return prev_p;
+ }
+diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
+index 4e34b3b68ebd..bb65a68b4b49 100644
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -656,7 +656,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+ }
+
+ /* Load the Intel cache allocation PQR MSR. */
+- resctrl_sched_in();
++ resctrl_sched_in(next_p);
+
+ return prev_p;
+ }
+--
+2.39.2
+