]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 10 Mar 2023 11:25:03 +0000 (12:25 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Fri, 10 Mar 2023 11:25:03 +0000 (12:25 +0100)
added patches:
net-tls-avoid-hanging-tasks-on-the-tx_lock.patch
x86-resctl-fix-scheduler-confusion-with-current.patch
x86-resctrl-apply-read_once-write_once-to-task_struct.-rmid-closid.patch

queue-5.10/net-tls-avoid-hanging-tasks-on-the-tx_lock.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/x86-resctl-fix-scheduler-confusion-with-current.patch [new file with mode: 0644]
queue-5.10/x86-resctrl-apply-read_once-write_once-to-task_struct.-rmid-closid.patch [new file with mode: 0644]

diff --git a/queue-5.10/net-tls-avoid-hanging-tasks-on-the-tx_lock.patch b/queue-5.10/net-tls-avoid-hanging-tasks-on-the-tx_lock.patch
new file mode 100644 (file)
index 0000000..b523ebe
--- /dev/null
@@ -0,0 +1,78 @@
+From f3221361dc85d4de22586ce8441ec2c67b454f5d Mon Sep 17 00:00:00 2001
+From: Jakub Kicinski <kuba@kernel.org>
+Date: Tue, 28 Feb 2023 16:28:57 -0800
+Subject: net: tls: avoid hanging tasks on the tx_lock
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+commit f3221361dc85d4de22586ce8441ec2c67b454f5d upstream.
+
+syzbot sent a hung task report and Eric explains that adversarial
+receiver may keep RWIN at 0 for a long time, so we are not guaranteed
+to make forward progress. Thread which took tx_lock and went to sleep
+may not release tx_lock for hours. Use interruptible sleep where
+possible and reschedule the work if it can't take the lock.
+
+Testing: existing selftest passes
+
+Reported-by: syzbot+9c0268252b8ef967c62e@syzkaller.appspotmail.com
+Fixes: 79ffe6087e91 ("net/tls: add a TX lock")
+Link: https://lore.kernel.org/all/000000000000e412e905f5b46201@google.com/
+Cc: stable@vger.kernel.org # wait 4 weeks
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230301002857.2101894-1-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tls/tls_sw.c |   26 +++++++++++++++++++-------
+ 1 file changed, 19 insertions(+), 7 deletions(-)
+
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -949,7 +949,9 @@ int tls_sw_sendmsg(struct sock *sk, stru
+                              MSG_CMSG_COMPAT))
+               return -EOPNOTSUPP;
+-      mutex_lock(&tls_ctx->tx_lock);
++      ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
++      if (ret)
++              return ret;
+       lock_sock(sk);
+       if (unlikely(msg->msg_controllen)) {
+@@ -1283,7 +1285,9 @@ int tls_sw_sendpage(struct sock *sk, str
+                     MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
+               return -EOPNOTSUPP;
+-      mutex_lock(&tls_ctx->tx_lock);
++      ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
++      if (ret)
++              return ret;
+       lock_sock(sk);
+       ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
+       release_sock(sk);
+@@ -2266,11 +2270,19 @@ static void tx_work_handler(struct work_
+       if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
+               return;
+-      mutex_lock(&tls_ctx->tx_lock);
+-      lock_sock(sk);
+-      tls_tx_records(sk, -1);
+-      release_sock(sk);
+-      mutex_unlock(&tls_ctx->tx_lock);
++
++      if (mutex_trylock(&tls_ctx->tx_lock)) {
++              lock_sock(sk);
++              tls_tx_records(sk, -1);
++              release_sock(sk);
++              mutex_unlock(&tls_ctx->tx_lock);
++      } else if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
++              /* Someone is holding the tx_lock, they will likely run Tx
++               * and cancel the work on their way out of the lock section.
++               * Schedule a long delay just in case.
++               */
++              schedule_delayed_work(&ctx->tx_work.work, msecs_to_jiffies(10));
++      }
+ }
+ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
index 0253175995b98b454b4512ded261310034e44465..360d094b9ae88f5f3c8de6c94ae6dbb8d46b8155 100644 (file)
@@ -510,3 +510,6 @@ pci-add-acs-quirk-for-wangxun-nics.patch
 phy-rockchip-typec-fix-unsigned-comparison-with-less.patch
 soundwire-cadence-remove-wasted-space-in-response_bu.patch
 soundwire-cadence-drain-the-rx-fifo-after-an-io-time.patch
+net-tls-avoid-hanging-tasks-on-the-tx_lock.patch
+x86-resctrl-apply-read_once-write_once-to-task_struct.-rmid-closid.patch
+x86-resctl-fix-scheduler-confusion-with-current.patch
diff --git a/queue-5.10/x86-resctl-fix-scheduler-confusion-with-current.patch b/queue-5.10/x86-resctl-fix-scheduler-confusion-with-current.patch
new file mode 100644 (file)
index 0000000..dd4c21d
--- /dev/null
@@ -0,0 +1,178 @@
+From 7fef099702527c3b2c5234a2ea6a24411485a13a Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Tue, 7 Mar 2023 13:06:29 -0800
+Subject: x86/resctl: fix scheduler confusion with 'current'
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit 7fef099702527c3b2c5234a2ea6a24411485a13a upstream.
+
+The implementation of 'current' on x86 is very intentionally special: it
+is a very common thing to look up, and it uses 'this_cpu_read_stable()'
+to get the current thread pointer efficiently from per-cpu storage.
+
+And the keyword in there is 'stable': the current thread pointer never
+changes as far as a single thread is concerned.  Even if when a thread
+is preempted, or moved to another CPU, or even across an explicit call
+'schedule()' that thread will still have the same value for 'current'.
+
+It is, after all, the kernel base pointer to thread-local storage.
+That's why it's stable to begin with, but it's also why it's important
+enough that we have that special 'this_cpu_read_stable()' access for it.
+
+So this is all done very intentionally to allow the compiler to treat
+'current' as a value that never visibly changes, so that the compiler
+can do CSE and combine multiple different 'current' accesses into one.
+
+However, there is obviously one very special situation when the
+currently running thread does actually change: inside the scheduler
+itself.
+
+So the scheduler code paths are special, and do not have a 'current'
+thread at all.  Instead there are _two_ threads: the previous and the
+next thread - typically called 'prev' and 'next' (or prev_p/next_p)
+internally.
+
+So this is all actually quite straightforward and simple, and not all
+that complicated.
+
+Except for when you then have special code that is run in scheduler
+context, that code then has to be aware that 'current' isn't really a
+valid thing.  Did you mean 'prev'? Did you mean 'next'?
+
+In fact, even if then look at the code, and you use 'current' after the
+new value has been assigned to the percpu variable, we have explicitly
+told the compiler that 'current' is magical and always stable.  So the
+compiler is quite free to use an older (or newer) value of 'current',
+and the actual assignment to the percpu storage is not relevant even if
+it might look that way.
+
+Which is exactly what happened in the resctl code, that blithely used
+'current' in '__resctrl_sched_in()' when it really wanted the new
+process state (as implied by the name: we're scheduling 'into' that new
+resctl state).  And clang would end up just using the old thread pointer
+value at least in some configurations.
+
+This could have happened with gcc too, and purely depends on random
+compiler details.  Clang just seems to have been more aggressive about
+moving the read of the per-cpu current_task pointer around.
+
+The fix is trivial: just make the resctl code adhere to the scheduler
+rules of using the prev/next thread pointer explicitly, instead of using
+'current' in a situation where it just wasn't valid.
+
+That same code is then also used outside of the scheduler context (when
+a thread resctl state is explicitly changed), and then we will just pass
+in 'current' as that pointer, of course.  There is no ambiguity in that
+case.
+
+The fix may be trivial, but noticing and figuring out what went wrong
+was not.  The credit for that goes to Stephane Eranian.
+
+Reported-by: Stephane Eranian <eranian@google.com>
+Link: https://lore.kernel.org/lkml/20230303231133.1486085-1-eranian@google.com/
+Link: https://lore.kernel.org/lkml/alpine.LFD.2.01.0908011214330.3304@localhost.localdomain/
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Tested-by: Tony Luck <tony.luck@intel.com>
+Tested-by: Stephane Eranian <eranian@google.com>
+Tested-by: Babu Moger <babu.moger@amd.com>
+Cc: stable@kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/resctrl.h         |   12 ++++++------
+ arch/x86/kernel/cpu/resctrl/rdtgroup.c |    4 ++--
+ arch/x86/kernel/process_32.c           |    2 +-
+ arch/x86/kernel/process_64.c           |    2 +-
+ 4 files changed, 10 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/include/asm/resctrl.h
++++ b/arch/x86/include/asm/resctrl.h
+@@ -51,7 +51,7 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_
+  *   simple as possible.
+  * Must be called with preemption disabled.
+  */
+-static void __resctrl_sched_in(void)
++static inline void __resctrl_sched_in(struct task_struct *tsk)
+ {
+       struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
+       u32 closid = state->default_closid;
+@@ -63,13 +63,13 @@ static void __resctrl_sched_in(void)
+        * Else use the closid/rmid assigned to this cpu.
+        */
+       if (static_branch_likely(&rdt_alloc_enable_key)) {
+-              tmp = READ_ONCE(current->closid);
++              tmp = READ_ONCE(tsk->closid);
+               if (tmp)
+                       closid = tmp;
+       }
+       if (static_branch_likely(&rdt_mon_enable_key)) {
+-              tmp = READ_ONCE(current->rmid);
++              tmp = READ_ONCE(tsk->rmid);
+               if (tmp)
+                       rmid = tmp;
+       }
+@@ -81,17 +81,17 @@ static void __resctrl_sched_in(void)
+       }
+ }
+-static inline void resctrl_sched_in(void)
++static inline void resctrl_sched_in(struct task_struct *tsk)
+ {
+       if (static_branch_likely(&rdt_enable_key))
+-              __resctrl_sched_in();
++              __resctrl_sched_in(tsk);
+ }
+ void resctrl_cpu_detect(struct cpuinfo_x86 *c);
+ #else
+-static inline void resctrl_sched_in(void) {}
++static inline void resctrl_sched_in(struct task_struct *tsk) {}
+ static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {}
+ #endif /* CONFIG_X86_CPU_RESCTRL */
+--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+@@ -311,7 +311,7 @@ static void update_cpu_closid_rmid(void
+        * executing task might have its own closid selected. Just reuse
+        * the context switch code.
+        */
+-      resctrl_sched_in();
++      resctrl_sched_in(current);
+ }
+ /*
+@@ -532,7 +532,7 @@ static void _update_task_closid_rmid(voi
+        * Otherwise, the MSR is updated when the task is scheduled in.
+        */
+       if (task == current)
+-              resctrl_sched_in();
++              resctrl_sched_in(task);
+ }
+ static void update_task_closid_rmid(struct task_struct *t)
+--- a/arch/x86/kernel/process_32.c
++++ b/arch/x86/kernel/process_32.c
+@@ -214,7 +214,7 @@ __switch_to(struct task_struct *prev_p,
+       switch_fpu_finish(next_p);
+       /* Load the Intel cache allocation PQR MSR. */
+-      resctrl_sched_in();
++      resctrl_sched_in(next_p);
+       return prev_p;
+ }
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -629,7 +629,7 @@ __switch_to(struct task_struct *prev_p,
+       }
+       /* Load the Intel cache allocation PQR MSR. */
+-      resctrl_sched_in();
++      resctrl_sched_in(next_p);
+       return prev_p;
+ }
diff --git a/queue-5.10/x86-resctrl-apply-read_once-write_once-to-task_struct.-rmid-closid.patch b/queue-5.10/x86-resctrl-apply-read_once-write_once-to-task_struct.-rmid-closid.patch
new file mode 100644 (file)
index 0000000..f0f4226
--- /dev/null
@@ -0,0 +1,84 @@
+From 6d3b47ddffed70006cf4ba360eef61e9ce097d8f Mon Sep 17 00:00:00 2001
+From: Valentin Schneider <valentin.schneider@arm.com>
+Date: Thu, 17 Dec 2020 14:31:21 -0800
+Subject: x86/resctrl: Apply READ_ONCE/WRITE_ONCE to task_struct.{rmid,closid}
+
+From: Valentin Schneider <valentin.schneider@arm.com>
+
+commit 6d3b47ddffed70006cf4ba360eef61e9ce097d8f upstream.
+
+A CPU's current task can have its {closid, rmid} fields read locally
+while they are being concurrently written to from another CPU.
+This can happen anytime __resctrl_sched_in() races with either
+__rdtgroup_move_task() or rdt_move_group_tasks().
+
+Prevent load / store tearing for those accesses by giving them the
+READ_ONCE() / WRITE_ONCE() treatment.
+
+Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
+Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/9921fda88ad81afb9885b517fbe864a2bc7c35a9.1608243147.git.reinette.chatre@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/resctrl.h         |   11 +++++++----
+ arch/x86/kernel/cpu/resctrl/rdtgroup.c |   10 +++++-----
+ 2 files changed, 12 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/resctrl.h
++++ b/arch/x86/include/asm/resctrl.h
+@@ -56,19 +56,22 @@ static void __resctrl_sched_in(void)
+       struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
+       u32 closid = state->default_closid;
+       u32 rmid = state->default_rmid;
++      u32 tmp;
+       /*
+        * If this task has a closid/rmid assigned, use it.
+        * Else use the closid/rmid assigned to this cpu.
+        */
+       if (static_branch_likely(&rdt_alloc_enable_key)) {
+-              if (current->closid)
+-                      closid = current->closid;
++              tmp = READ_ONCE(current->closid);
++              if (tmp)
++                      closid = tmp;
+       }
+       if (static_branch_likely(&rdt_mon_enable_key)) {
+-              if (current->rmid)
+-                      rmid = current->rmid;
++              tmp = READ_ONCE(current->rmid);
++              if (tmp)
++                      rmid = tmp;
+       }
+       if (closid != state->cur_closid || rmid != state->cur_rmid) {
+--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+@@ -563,11 +563,11 @@ static int __rdtgroup_move_task(struct t
+        */
+       if (rdtgrp->type == RDTCTRL_GROUP) {
+-              tsk->closid = rdtgrp->closid;
+-              tsk->rmid = rdtgrp->mon.rmid;
++              WRITE_ONCE(tsk->closid, rdtgrp->closid);
++              WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
+       } else if (rdtgrp->type == RDTMON_GROUP) {
+               if (rdtgrp->mon.parent->closid == tsk->closid) {
+-                      tsk->rmid = rdtgrp->mon.rmid;
++                      WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
+               } else {
+                       rdt_last_cmd_puts("Can't move task to different control group\n");
+                       return -EINVAL;
+@@ -2312,8 +2312,8 @@ static void rdt_move_group_tasks(struct
+       for_each_process_thread(p, t) {
+               if (!from || is_closid_match(t, from) ||
+                   is_rmid_match(t, from)) {
+-                      t->closid = to->closid;
+-                      t->rmid = to->mon.rmid;
++                      WRITE_ONCE(t->closid, to->closid);
++                      WRITE_ONCE(t->rmid, to->mon.rmid);
+                       /*
+                        * Order the closid/rmid stores above before the loads