From 57e86c1ec83045fe159f78ca184a75a1b351d6ee Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 10 Mar 2023 12:24:47 +0100 Subject: [PATCH] 5.4-stable patches added patches: net-tls-avoid-hanging-tasks-on-the-tx_lock.patch x86-resctl-fix-scheduler-confusion-with-current.patch x86-resctrl-apply-read_once-write_once-to-task_struct.-rmid-closid.patch --- ...s-avoid-hanging-tasks-on-the-tx_lock.patch | 78 ++++++++ queue-5.4/series | 3 + ...fix-scheduler-confusion-with-current.patch | 176 ++++++++++++++++++ ...ite_once-to-task_struct.-rmid-closid.patch | 84 +++++++++ 4 files changed, 341 insertions(+) create mode 100644 queue-5.4/net-tls-avoid-hanging-tasks-on-the-tx_lock.patch create mode 100644 queue-5.4/x86-resctl-fix-scheduler-confusion-with-current.patch create mode 100644 queue-5.4/x86-resctrl-apply-read_once-write_once-to-task_struct.-rmid-closid.patch diff --git a/queue-5.4/net-tls-avoid-hanging-tasks-on-the-tx_lock.patch b/queue-5.4/net-tls-avoid-hanging-tasks-on-the-tx_lock.patch new file mode 100644 index 00000000000..17a6b12b02f --- /dev/null +++ b/queue-5.4/net-tls-avoid-hanging-tasks-on-the-tx_lock.patch @@ -0,0 +1,78 @@ +From f3221361dc85d4de22586ce8441ec2c67b454f5d Mon Sep 17 00:00:00 2001 +From: Jakub Kicinski +Date: Tue, 28 Feb 2023 16:28:57 -0800 +Subject: net: tls: avoid hanging tasks on the tx_lock + +From: Jakub Kicinski + +commit f3221361dc85d4de22586ce8441ec2c67b454f5d upstream. + +syzbot sent a hung task report and Eric explains that adversarial +receiver may keep RWIN at 0 for a long time, so we are not guaranteed +to make forward progress. Thread which took tx_lock and went to sleep +may not release tx_lock for hours. Use interruptible sleep where +possible and reschedule the work if it can't take the lock. + +Testing: existing selftest passes + +Reported-by: syzbot+9c0268252b8ef967c62e@syzkaller.appspotmail.com +Fixes: 79ffe6087e91 ("net/tls: add a TX lock") +Link: https://lore.kernel.org/all/000000000000e412e905f5b46201@google.com/ +Cc: stable@vger.kernel.org # wait 4 weeks +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20230301002857.2101894-1-kuba@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + net/tls/tls_sw.c | 26 +++++++++++++++++++------- + 1 file changed, 19 insertions(+), 7 deletions(-) + +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -945,7 +945,9 @@ int tls_sw_sendmsg(struct sock *sk, stru + if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL)) + return -EOPNOTSUPP; + +- mutex_lock(&tls_ctx->tx_lock); ++ ret = mutex_lock_interruptible(&tls_ctx->tx_lock); ++ if (ret) ++ return ret; + lock_sock(sk); + + if (unlikely(msg->msg_controllen)) { +@@ -1279,7 +1281,9 @@ int tls_sw_sendpage(struct sock *sk, str + MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY)) + return -EOPNOTSUPP; + +- mutex_lock(&tls_ctx->tx_lock); ++ ret = mutex_lock_interruptible(&tls_ctx->tx_lock); ++ if (ret) ++ return ret; + lock_sock(sk); + ret = tls_sw_do_sendpage(sk, page, offset, size, flags); + release_sock(sk); +@@ -2263,11 +2267,19 @@ static void tx_work_handler(struct work_ + + if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) + return; +- mutex_lock(&tls_ctx->tx_lock); +- lock_sock(sk); +- tls_tx_records(sk, -1); +- release_sock(sk); +- mutex_unlock(&tls_ctx->tx_lock); ++ ++ if (mutex_trylock(&tls_ctx->tx_lock)) { ++ lock_sock(sk); ++ tls_tx_records(sk, -1); ++ release_sock(sk); ++ mutex_unlock(&tls_ctx->tx_lock); ++ } else if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) { ++ /* Someone is holding the tx_lock, they will likely run Tx ++ * and cancel the work on their way out of the lock section. ++ * Schedule a long delay just in case. ++ */ ++ schedule_delayed_work(&ctx->tx_work.work, msecs_to_jiffies(10)); ++ } + } + + void tls_sw_write_space(struct sock *sk, struct tls_context *ctx) diff --git a/queue-5.4/series b/queue-5.4/series index 1fc4b8e8995..39163bab60f 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -346,3 +346,6 @@ usb-uvc-enumerate-valid-values-for-color-matching.patch kernel-fail_function-fix-memory-leak-with-using-debu.patch pci-add-acs-quirk-for-wangxun-nics.patch phy-rockchip-typec-fix-unsigned-comparison-with-less.patch +net-tls-avoid-hanging-tasks-on-the-tx_lock.patch +x86-resctrl-apply-read_once-write_once-to-task_struct.-rmid-closid.patch +x86-resctl-fix-scheduler-confusion-with-current.patch diff --git a/queue-5.4/x86-resctl-fix-scheduler-confusion-with-current.patch b/queue-5.4/x86-resctl-fix-scheduler-confusion-with-current.patch new file mode 100644 index 00000000000..20c96644101 --- /dev/null +++ b/queue-5.4/x86-resctl-fix-scheduler-confusion-with-current.patch @@ -0,0 +1,176 @@ +From 7fef099702527c3b2c5234a2ea6a24411485a13a Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Tue, 7 Mar 2023 13:06:29 -0800 +Subject: x86/resctl: fix scheduler confusion with 'current' + +From: Linus Torvalds + +commit 7fef099702527c3b2c5234a2ea6a24411485a13a upstream. + +The implementation of 'current' on x86 is very intentionally special: it +is a very common thing to look up, and it uses 'this_cpu_read_stable()' +to get the current thread pointer efficiently from per-cpu storage. + +And the keyword in there is 'stable': the current thread pointer never +changes as far as a single thread is concerned. Even if when a thread +is preempted, or moved to another CPU, or even across an explicit call +'schedule()' that thread will still have the same value for 'current'. + +It is, after all, the kernel base pointer to thread-local storage. +That's why it's stable to begin with, but it's also why it's important +enough that we have that special 'this_cpu_read_stable()' access for it. + +So this is all done very intentionally to allow the compiler to treat +'current' as a value that never visibly changes, so that the compiler +can do CSE and combine multiple different 'current' accesses into one. + +However, there is obviously one very special situation when the +currently running thread does actually change: inside the scheduler +itself. + +So the scheduler code paths are special, and do not have a 'current' +thread at all. Instead there are _two_ threads: the previous and the +next thread - typically called 'prev' and 'next' (or prev_p/next_p) +internally. + +So this is all actually quite straightforward and simple, and not all +that complicated. + +Except for when you then have special code that is run in scheduler +context, that code then has to be aware that 'current' isn't really a +valid thing. Did you mean 'prev'? Did you mean 'next'? + +In fact, even if then look at the code, and you use 'current' after the +new value has been assigned to the percpu variable, we have explicitly +told the compiler that 'current' is magical and always stable. So the +compiler is quite free to use an older (or newer) value of 'current', +and the actual assignment to the percpu storage is not relevant even if +it might look that way. + +Which is exactly what happened in the resctl code, that blithely used +'current' in '__resctrl_sched_in()' when it really wanted the new +process state (as implied by the name: we're scheduling 'into' that new +resctl state). And clang would end up just using the old thread pointer +value at least in some configurations. + +This could have happened with gcc too, and purely depends on random +compiler details. Clang just seems to have been more aggressive about +moving the read of the per-cpu current_task pointer around. + +The fix is trivial: just make the resctl code adhere to the scheduler +rules of using the prev/next thread pointer explicitly, instead of using +'current' in a situation where it just wasn't valid. + +That same code is then also used outside of the scheduler context (when +a thread resctl state is explicitly changed), and then we will just pass +in 'current' as that pointer, of course. There is no ambiguity in that +case. + +The fix may be trivial, but noticing and figuring out what went wrong +was not. The credit for that goes to Stephane Eranian. + +Reported-by: Stephane Eranian +Link: https://lore.kernel.org/lkml/20230303231133.1486085-1-eranian@google.com/ +Link: https://lore.kernel.org/lkml/alpine.LFD.2.01.0908011214330.3304@localhost.localdomain/ +Reviewed-by: Nick Desaulniers +Tested-by: Tony Luck +Tested-by: Stephane Eranian +Tested-by: Babu Moger +Cc: stable@kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/resctrl_sched.h | 12 ++++++------ + arch/x86/kernel/cpu/resctrl/rdtgroup.c | 4 ++-- + arch/x86/kernel/process_32.c | 2 +- + arch/x86/kernel/process_64.c | 2 +- + 4 files changed, 10 insertions(+), 10 deletions(-) + +--- a/arch/x86/include/asm/resctrl_sched.h ++++ b/arch/x86/include/asm/resctrl_sched.h +@@ -51,7 +51,7 @@ DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_ + * simple as possible. + * Must be called with preemption disabled. + */ +-static void __resctrl_sched_in(void) ++static inline void __resctrl_sched_in(struct task_struct *tsk) + { + struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); + u32 closid = state->default_closid; +@@ -63,13 +63,13 @@ static void __resctrl_sched_in(void) + * Else use the closid/rmid assigned to this cpu. + */ + if (static_branch_likely(&rdt_alloc_enable_key)) { +- tmp = READ_ONCE(current->closid); ++ tmp = READ_ONCE(tsk->closid); + if (tmp) + closid = tmp; + } + + if (static_branch_likely(&rdt_mon_enable_key)) { +- tmp = READ_ONCE(current->rmid); ++ tmp = READ_ONCE(tsk->rmid); + if (tmp) + rmid = tmp; + } +@@ -81,15 +81,15 @@ static void __resctrl_sched_in(void) + } + } + +-static inline void resctrl_sched_in(void) ++static inline void resctrl_sched_in(struct task_struct *tsk) + { + if (static_branch_likely(&rdt_enable_key)) +- __resctrl_sched_in(); ++ __resctrl_sched_in(tsk); + } + + #else + +-static inline void resctrl_sched_in(void) {} ++static inline void resctrl_sched_in(struct task_struct *tsk) {} + + #endif /* CONFIG_X86_CPU_RESCTRL */ + +--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c ++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c +@@ -311,7 +311,7 @@ static void update_cpu_closid_rmid(void + * executing task might have its own closid selected. Just reuse + * the context switch code. + */ +- resctrl_sched_in(); ++ resctrl_sched_in(current); + } + + /* +@@ -532,7 +532,7 @@ static void _update_task_closid_rmid(voi + * Otherwise, the MSR is updated when the task is scheduled in. + */ + if (task == current) +- resctrl_sched_in(); ++ resctrl_sched_in(task); + } + + static void update_task_closid_rmid(struct task_struct *t) +--- a/arch/x86/kernel/process_32.c ++++ b/arch/x86/kernel/process_32.c +@@ -293,7 +293,7 @@ __switch_to(struct task_struct *prev_p, + switch_fpu_finish(next_p); + + /* Load the Intel cache allocation PQR MSR. */ +- resctrl_sched_in(); ++ resctrl_sched_in(next_p); + + return prev_p; + } +--- a/arch/x86/kernel/process_64.c ++++ b/arch/x86/kernel/process_64.c +@@ -610,7 +610,7 @@ __switch_to(struct task_struct *prev_p, + } + + /* Load the Intel cache allocation PQR MSR. */ +- resctrl_sched_in(); ++ resctrl_sched_in(next_p); + + return prev_p; + } diff --git a/queue-5.4/x86-resctrl-apply-read_once-write_once-to-task_struct.-rmid-closid.patch b/queue-5.4/x86-resctrl-apply-read_once-write_once-to-task_struct.-rmid-closid.patch new file mode 100644 index 00000000000..099d324f3ee --- /dev/null +++ b/queue-5.4/x86-resctrl-apply-read_once-write_once-to-task_struct.-rmid-closid.patch @@ -0,0 +1,84 @@ +From 6d3b47ddffed70006cf4ba360eef61e9ce097d8f Mon Sep 17 00:00:00 2001 +From: Valentin Schneider +Date: Thu, 17 Dec 2020 14:31:21 -0800 +Subject: x86/resctrl: Apply READ_ONCE/WRITE_ONCE to task_struct.{rmid,closid} + +From: Valentin Schneider + +commit 6d3b47ddffed70006cf4ba360eef61e9ce097d8f upstream. + +A CPU's current task can have its {closid, rmid} fields read locally +while they are being concurrently written to from another CPU. +This can happen anytime __resctrl_sched_in() races with either +__rdtgroup_move_task() or rdt_move_group_tasks(). + +Prevent load / store tearing for those accesses by giving them the +READ_ONCE() / WRITE_ONCE() treatment. + +Signed-off-by: Valentin Schneider +Signed-off-by: Reinette Chatre +Signed-off-by: Borislav Petkov +Link: https://lkml.kernel.org/r/9921fda88ad81afb9885b517fbe864a2bc7c35a9.1608243147.git.reinette.chatre@intel.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/include/asm/resctrl_sched.h | 11 +++++++---- + arch/x86/kernel/cpu/resctrl/rdtgroup.c | 10 +++++----- + 2 files changed, 12 insertions(+), 9 deletions(-) + +--- a/arch/x86/include/asm/resctrl_sched.h ++++ b/arch/x86/include/asm/resctrl_sched.h +@@ -56,19 +56,22 @@ static void __resctrl_sched_in(void) + struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state); + u32 closid = state->default_closid; + u32 rmid = state->default_rmid; ++ u32 tmp; + + /* + * If this task has a closid/rmid assigned, use it. + * Else use the closid/rmid assigned to this cpu. + */ + if (static_branch_likely(&rdt_alloc_enable_key)) { +- if (current->closid) +- closid = current->closid; ++ tmp = READ_ONCE(current->closid); ++ if (tmp) ++ closid = tmp; + } + + if (static_branch_likely(&rdt_mon_enable_key)) { +- if (current->rmid) +- rmid = current->rmid; ++ tmp = READ_ONCE(current->rmid); ++ if (tmp) ++ rmid = tmp; + } + + if (closid != state->cur_closid || rmid != state->cur_rmid) { +--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c ++++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c +@@ -563,11 +563,11 @@ static int __rdtgroup_move_task(struct t + */ + + if (rdtgrp->type == RDTCTRL_GROUP) { +- tsk->closid = rdtgrp->closid; +- tsk->rmid = rdtgrp->mon.rmid; ++ WRITE_ONCE(tsk->closid, rdtgrp->closid); ++ WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid); + } else if (rdtgrp->type == RDTMON_GROUP) { + if (rdtgrp->mon.parent->closid == tsk->closid) { +- tsk->rmid = rdtgrp->mon.rmid; ++ WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid); + } else { + rdt_last_cmd_puts("Can't move task to different control group\n"); + return -EINVAL; +@@ -2177,8 +2177,8 @@ static void rdt_move_group_tasks(struct + for_each_process_thread(p, t) { + if (!from || is_closid_match(t, from) || + is_rmid_match(t, from)) { +- t->closid = to->closid; +- t->rmid = to->mon.rmid; ++ WRITE_ONCE(t->closid, to->closid); ++ WRITE_ONCE(t->rmid, to->mon.rmid); + + /* + * Order the closid/rmid stores above before the loads -- 2.47.3