From: Greg Kroah-Hartman Date: Fri, 10 Mar 2023 11:25:20 +0000 (+0100) Subject: 5.15-stable patches X-Git-Tag: v6.1.17~40 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=58ae2acc25882682e1574cebd09f441400a3c293;p=thirdparty%2Fkernel%2Fstable-queue.git 5.15-stable patches added patches: net-tls-avoid-hanging-tasks-on-the-tx_lock.patch x86-resctl-fix-scheduler-confusion-with-current.patch --- diff --git a/queue-5.15/net-tls-avoid-hanging-tasks-on-the-tx_lock.patch b/queue-5.15/net-tls-avoid-hanging-tasks-on-the-tx_lock.patch new file mode 100644 index 00000000000..027a807077e --- /dev/null +++ b/queue-5.15/net-tls-avoid-hanging-tasks-on-the-tx_lock.patch @@ -0,0 +1,78 @@ +From f3221361dc85d4de22586ce8441ec2c67b454f5d Mon Sep 17 00:00:00 2001 +From: Jakub Kicinski +Date: Tue, 28 Feb 2023 16:28:57 -0800 +Subject: net: tls: avoid hanging tasks on the tx_lock + +From: Jakub Kicinski + +commit f3221361dc85d4de22586ce8441ec2c67b454f5d upstream. + +syzbot sent a hung task report and Eric explains that adversarial +receiver may keep RWIN at 0 for a long time, so we are not guaranteed +to make forward progress. Thread which took tx_lock and went to sleep +may not release tx_lock for hours. Use interruptible sleep where +possible and reschedule the work if it can't take the lock. + +Testing: existing selftest passes + +Reported-by: syzbot+9c0268252b8ef967c62e@syzkaller.appspotmail.com +Fixes: 79ffe6087e91 ("net/tls: add a TX lock") +Link: https://lore.kernel.org/all/000000000000e412e905f5b46201@google.com/ +Cc: stable@vger.kernel.org # wait 4 weeks +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230301002857.2101894-1-kuba@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/tls/tls_sw.c | 26 +++++++++++++++++++------- + 1 file changed, 19 insertions(+), 7 deletions(-) + +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -950,7 +950,9 @@ int tls_sw_sendmsg(struct sock *sk, stru + MSG_CMSG_COMPAT)) + return -EOPNOTSUPP; + +- mutex_lock(&tls_ctx->tx_lock); ++ ret = mutex_lock_interruptible(&tls_ctx->tx_lock); ++ if (ret) ++ return ret; + lock_sock(sk); + + if (unlikely(msg->msg_controllen)) { +@@ -1284,7 +1286,9 @@ int tls_sw_sendpage(struct sock *sk, str + MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) + return -EOPNOTSUPP; + +- mutex_lock(&tls_ctx->tx_lock); ++ ret = mutex_lock_interruptible(&tls_ctx->tx_lock); ++ if (ret) ++ return ret; + lock_sock(sk); + ret = tls_sw_do_sendpage(sk, page, offset, size, flags); + release_sock(sk); +@@ -2284,11 +2288,19 @@ static void tx_work_handler(struct work_ + + if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) + return; +- mutex_lock(&tls_ctx->tx_lock); +- lock_sock(sk); +- tls_tx_records(sk, -1); +- release_sock(sk); +- mutex_unlock(&tls_ctx->tx_lock); ++ ++ if (mutex_trylock(&tls_ctx->tx_lock)) { ++ lock_sock(sk); ++ tls_tx_records(sk, -1); ++ release_sock(sk); ++ mutex_unlock(&tls_ctx->tx_lock); ++ } else if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { ++ /* Someone is holding the tx_lock, they will likely run Tx ++ * and cancel the work on their way out of the lock section. ++ * Schedule a long delay just in case. ++ */ ++ schedule_delayed_work(&ctx->tx_work.work, msecs_to_jiffies(10)); ++ } + } + + void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) diff --git a/queue-5.15/series b/queue-5.15/series index 7e9b775ebcb..0604d63971b 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -121,3 +121,5 @@ pci-add-acs-quirk-for-wangxun-nics.patch phy-rockchip-typec-fix-unsigned-comparison-with-less.patch soundwire-cadence-remove-wasted-space-in-response_bu.patch soundwire-cadence-drain-the-rx-fifo-after-an-io-time.patch +net-tls-avoid-hanging-tasks-on-the-tx_lock.patch +x86-resctl-fix-scheduler-confusion-with-current.patch diff --git a/queue-5.15/x86-resctl-fix-scheduler-confusion-with-current.patch b/queue-5.15/x86-resctl-fix-scheduler-confusion-with-current.patch new file mode 100644 index 00000000000..ee096da2ca1 --- /dev/null +++ b/queue-5.15/x86-resctl-fix-scheduler-confusion-with-current.patch @@ -0,0 +1,178 @@ +From 7fef099702527c3b2c5234a2ea6a24411485a13a Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Tue, 7 Mar 2023 13:06:29 -0800 +Subject: x86/resctl: fix scheduler confusion with 'current' + +From: Linus Torvalds + +commit 7fef099702527c3b2c5234a2ea6a24411485a13a upstream. + +The implementation of 'current' on x86 is very intentionally special: it +is a very common thing to look up, and it uses 'this_cpu_read_stable()' +to get the current thread pointer efficiently from per-cpu storage. + +And the keyword in there is 'stable': the current thread pointer never +changes as far as a single thread is concerned. Even if when a thread +is preempted, or moved to another CPU, or even across an explicit call +'schedule()' that thread will still have the same value for 'current'. + +It is, after all, the kernel base pointer to thread-local storage. +That's why it's stable to begin with, but it's also why it's important +enough that we have that special 'this_cpu_read_stable()' access for it. + +So this is all done very intentionally to allow the compiler to treat +'current' as a value that never visibly changes, so that the compiler +can do CSE and combine multiple different 'current' accesses into one. + +However, there is obviously one very special situation when the +currently running thread does actually change: inside the scheduler +itself. + +So the scheduler code paths are special, and do not have a 'current' +thread at all. Instead there are _two_ threads: the previous and the +next thread - typically called 'prev' and 'next' (or prev_p/next_p) +internally. + +So this is all actually quite straightforward and simple, and not all +that complicated. + +Except for when you then have special code that is run in scheduler +context, that code then has to be aware that 'current' isn't really a +valid thing. Did you mean 'prev'? Did you mean 'next'? + +In fact, even if then look at the code, and you use 'current' after the +new value has been assigned to the percpu variable, we have explicitly +told the compiler that 'current' is magical and always stable. So the +compiler is quite free to use an older (or newer) value of 'current', +and the actual assignment to the percpu storage is not relevant even if +it might look that way. + +Which is exactly what happened in the resctl code, that blithely used +'current' in '__resctrl_sched_in()' when it really wanted the new +process state (as implied by the name: we're scheduling 'into' that new +resctl state). And clang would end up just using the old thread pointer +value at least in some configurations. + +This could have happened with gcc too, and purely depends on random +compiler details. Clang just seems to have been more aggressive about +moving the read of the per-cpu current_task pointer around. + +The fix is trivial: just make the resctl code adhere to the scheduler +rules of using the prev/next thread pointer explicitly, instead of using +'current' in a situation where it just wasn't valid. + +That same code is then also used outside of the scheduler context (when +a thread resctl state is explicitly changed), and then we will just pass +in 'current' as that pointer, of course. There is no ambiguity in that +case. + +The fix may be trivial, but noticing and figuring out what went wrong +was not. The credit for that goes to Stephane Eranian. + +Reported-by: Stephane Eranian +Link: https://lore.kernel.org/lkml/20230303231133.1486085-1-eranian@google.com/ +Link: https://lore.kernel.org/lkml/alpine.LFD.2.01.0908011214330.3304@localhost.localdomain/ +Reviewed-by: Nick Desaulniers +Tested-by: Tony Luck +Tested-by: Stephane Eranian +Tested-by: Babu Moger +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/resctrl.h | 12 ++++++------ + arch/x86/kernel/cpu/resctrl/rdtgroup.c | 4 ++-- + arch/x86/kernel/process_32.c | 2 +- + arch/x86/kernel/process_64.c | 2 +- + 4 files changed, 10 insertions(+), 10 deletions(-) + +--- a/arch/x86/include/asm/resctrl.h ++++ b/arch/x86/include/asm/resctrl.h +@@ -51,7 +51,7 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_ + * simple as possible. + * Must be called with preemption disabled. + */ +-static void __resctrl_sched_in(void) ++static inline void __resctrl_sched_in(struct task_struct *tsk) + { + struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); + u32 closid = state->default_closid; +@@ -63,13 +63,13 @@ static void __resctrl_sched_in(void) + * Else use the closid/rmid assigned to this cpu. + */ + if (static_branch_likely(&rdt_alloc_enable_key)) { +- tmp = READ_ONCE(current->closid); ++ tmp = READ_ONCE(tsk->closid); + if (tmp) + closid = tmp; + } + + if (static_branch_likely(&rdt_mon_enable_key)) { +- tmp = READ_ONCE(current->rmid); ++ tmp = READ_ONCE(tsk->rmid); + if (tmp) + rmid = tmp; + } +@@ -81,17 +81,17 @@ static void __resctrl_sched_in(void) + } + } + +-static inline void resctrl_sched_in(void) ++static inline void resctrl_sched_in(struct task_struct *tsk) + { + if (static_branch_likely(&rdt_enable_key)) +- __resctrl_sched_in(); ++ __resctrl_sched_in(tsk); + } + + void resctrl_cpu_detect(struct cpuinfo_x86 *c); + + #else + +-static inline void resctrl_sched_in(void) {} ++static inline void resctrl_sched_in(struct task_struct *tsk) {} + static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {} + + #endif /* CONFIG_X86_CPU_RESCTRL */ +--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c ++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c +@@ -314,7 +314,7 @@ static void update_cpu_closid_rmid(void + * executing task might have its own closid selected. Just reuse + * the context switch code. + */ +- resctrl_sched_in(); ++ resctrl_sched_in(current); + } + + /* +@@ -535,7 +535,7 @@ static void _update_task_closid_rmid(voi + * Otherwise, the MSR is updated when the task is scheduled in. + */ + if (task == current) +- resctrl_sched_in(); ++ resctrl_sched_in(task); + } + + static void update_task_closid_rmid(struct task_struct *t) +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -216,7 +216,7 @@ __switch_to(struct task_struct *prev_p, + switch_fpu_finish(next_fpu); + + /* Load the Intel cache allocation PQR MSR. */ +- resctrl_sched_in(); ++ resctrl_sched_in(next_p); + + return prev_p; + } +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -656,7 +656,7 @@ __switch_to(struct task_struct *prev_p, + } + + /* Load the Intel cache allocation PQR MSR. */ +- resctrl_sched_in(); ++ resctrl_sched_in(next_p); + + return prev_p; + }