From c78b459c7ee32289cb88e9137906ba879a0f2a22 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 27 Apr 2023 10:37:17 +0200 Subject: [PATCH] 6.2-stable patches added patches: rcu-kvfree-avoid-freeing-new-kfree_rcu-memory-after-old-grace-period.patch rust-arch-um-disable-fp-simd-instruction-to-match-x86.patch series um-only-disable-sse-on-clang-to-work-around-old-gcc-bugs.patch --- ...ee_rcu-memory-after-old-grace-period.patch | 152 ++++++++++++++++++ ...ble-fp-simd-instruction-to-match-x86.patch | 48 ++++++ queue-6.2/series | 3 + ...on-clang-to-work-around-old-gcc-bugs.patch | 54 +++++++ 4 files changed, 257 insertions(+) create mode 100644 queue-6.2/rcu-kvfree-avoid-freeing-new-kfree_rcu-memory-after-old-grace-period.patch create mode 100644 queue-6.2/rust-arch-um-disable-fp-simd-instruction-to-match-x86.patch create mode 100644 queue-6.2/series create mode 100644 queue-6.2/um-only-disable-sse-on-clang-to-work-around-old-gcc-bugs.patch diff --git a/queue-6.2/rcu-kvfree-avoid-freeing-new-kfree_rcu-memory-after-old-grace-period.patch b/queue-6.2/rcu-kvfree-avoid-freeing-new-kfree_rcu-memory-after-old-grace-period.patch new file mode 100644 index 00000000000..d08b1fc11ce --- /dev/null +++ b/queue-6.2/rcu-kvfree-avoid-freeing-new-kfree_rcu-memory-after-old-grace-period.patch @@ -0,0 +1,152 @@ +From 5da7cb193db32da783a3f3e77d8b639989321d48 Mon Sep 17 00:00:00 2001 +From: Ziwei Dai +Date: Fri, 31 Mar 2023 20:42:09 +0800 +Subject: rcu/kvfree: Avoid freeing new kfree_rcu() memory after old grace period + +From: Ziwei Dai + +commit 5da7cb193db32da783a3f3e77d8b639989321d48 upstream. + +Memory passed to kvfree_rcu() that is to be freed is tracked by a +per-CPU kfree_rcu_cpu structure, which in turn contains pointers +to kvfree_rcu_bulk_data structures that contain pointers to memory +that has not yet been handed to RCU, along with an kfree_rcu_cpu_work +structure that tracks the memory that has already been handed to RCU. +These structures track three categories of memory: (1) Memory for +kfree(), (2) Memory for kvfree(), and (3) Memory for both that arrived +during an OOM episode. The first two categories are tracked in a +cache-friendly manner involving a dynamically allocated page of pointers +(the aforementioned kvfree_rcu_bulk_data structures), while the third +uses a simple (but decidedly cache-unfriendly) linked list through the +rcu_head structures in each block of memory. + +On a given CPU, these three categories are handled as a unit, with that +CPU's kfree_rcu_cpu_work structure having one pointer for each of the +three categories. Clearly, new memory for a given category cannot be +placed in the corresponding kfree_rcu_cpu_work structure until any old +memory has had its grace period elapse and thus has been removed. And +the kfree_rcu_monitor() function does in fact check for this. + +Except that the kfree_rcu_monitor() function checks these pointers one +at a time. This means that if the previous kfree_rcu() memory passed +to RCU had only category 1 and the current one has only category 2, the +kfree_rcu_monitor() function will send that current category-2 memory +along immediately. This can result in memory being freed too soon, +that is, out from under unsuspecting RCU readers. + +To see this, consider the following sequence of events, in which: + +o Task A on CPU 0 calls rcu_read_lock(), then uses "from_cset", + then is preempted. + +o CPU 1 calls kfree_rcu(cset, rcu_head) in order to free "from_cset" + after a later grace period. Except that "from_cset" is freed + right after the previous grace period ended, so that "from_cset" + is immediately freed. Task A resumes and references "from_cset"'s + member, after which nothing good happens. + +In full detail: + +CPU 0 CPU 1 +---------------------- ---------------------- +count_memcg_event_mm() +|rcu_read_lock() <--- +|mem_cgroup_from_task() + |// css_set_ptr is the "from_cset" mentioned on CPU 1 + |css_set_ptr = rcu_dereference((task)->cgroups) + |// Hard irq comes, current task is scheduled out. + + cgroup_attach_task() + |cgroup_migrate() + |cgroup_migrate_execute() + |css_set_move_task(task, from_cset, to_cset, true) + |cgroup_move_task(task, to_cset) + |rcu_assign_pointer(.., to_cset) + |... + |cgroup_migrate_finish() + |put_css_set_locked(from_cset) + |from_cset->refcount return 0 + |kfree_rcu(cset, rcu_head) // free from_cset after new gp + |add_ptr_to_bulk_krc_lock() + |schedule_delayed_work(&krcp->monitor_work, ..) + + kfree_rcu_monitor() + |krcp->bulk_head[0]'s work attached to krwp->bulk_head_free[] + |queue_rcu_work(system_wq, &krwp->rcu_work) + |if rwork->rcu.work is not in WORK_STRUCT_PENDING_BIT state, + |call_rcu(&rwork->rcu, rcu_work_rcufn) <--- request new gp + + // There is a perious call_rcu(.., rcu_work_rcufn) + // gp end, rcu_work_rcufn() is called. + rcu_work_rcufn() + |__queue_work(.., rwork->wq, &rwork->work); + + |kfree_rcu_work() + |krwp->bulk_head_free[0] bulk is freed before new gp end!!! + |The "from_cset" is freed before new gp end. + +// the task resumes some time later. + |css_set_ptr->subsys[(subsys_id) <--- Caused kernel crash, because css_set_ptr is freed. + +This commit therefore causes kfree_rcu_monitor() to refrain from moving +kfree_rcu() memory to the kfree_rcu_cpu_work structure until the RCU +grace period has completed for all three categories. + +v2: Use helper function instead of inserted code block at kfree_rcu_monitor(). + +Fixes: 34c881745549 ("rcu: Support kfree_bulk() interface in kfree_rcu()") +Fixes: 5f3c8d620447 ("rcu/tree: Maintain separate array for vmalloc ptrs") +Reported-by: Mukesh Ojha +Signed-off-by: Ziwei Dai +Reviewed-by: Uladzislau Rezki (Sony) +Tested-by: Uladzislau Rezki (Sony) +Signed-off-by: Paul E. McKenney +Signed-off-by: Uladzislau Rezki (Sony) +Signed-off-by: Greg Kroah-Hartman +--- + kernel/rcu/tree.c | 27 +++++++++++++++++++-------- + 1 file changed, 19 insertions(+), 8 deletions(-) + +--- a/kernel/rcu/tree.c ++++ b/kernel/rcu/tree.c +@@ -3131,6 +3131,18 @@ need_offload_krc(struct kfree_rcu_cpu *k + return !!krcp->head; + } + ++static bool ++need_wait_for_krwp_work(struct kfree_rcu_cpu_work *krwp) ++{ ++ int i; ++ ++ for (i = 0; i < FREE_N_CHANNELS; i++) ++ if (krwp->bkvhead_free[i]) ++ return true; ++ ++ return !!krwp->head_free; ++} ++ + static void + schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp) + { +@@ -3162,14 +3174,13 @@ static void kfree_rcu_monitor(struct wor + for (i = 0; i < KFREE_N_BATCHES; i++) { + struct kfree_rcu_cpu_work *krwp = &(krcp->krw_arr[i]); + +- // Try to detach bkvhead or head and attach it over any +- // available corresponding free channel. It can be that +- // a previous RCU batch is in progress, it means that +- // immediately to queue another one is not possible so +- // in that case the monitor work is rearmed. +- if ((krcp->bkvhead[0] && !krwp->bkvhead_free[0]) || +- (krcp->bkvhead[1] && !krwp->bkvhead_free[1]) || +- (krcp->head && !krwp->head_free)) { ++ // Try to detach bulk_head or head and attach it, only when ++ // all channels are free. Any channel is not free means at krwp ++ // there is on-going rcu work to handle krwp's free business. ++ if (need_wait_for_krwp_work(krwp)) ++ continue; ++ ++ if (need_offload_krc(krcp)) { + // Channel 1 corresponds to the SLAB-pointer bulk path. + // Channel 2 corresponds to vmalloc-pointer bulk path. + for (j = 0; j < FREE_N_CHANNELS; j++) { diff --git a/queue-6.2/rust-arch-um-disable-fp-simd-instruction-to-match-x86.patch b/queue-6.2/rust-arch-um-disable-fp-simd-instruction-to-match-x86.patch new file mode 100644 index 00000000000..77747b00a64 --- /dev/null +++ b/queue-6.2/rust-arch-um-disable-fp-simd-instruction-to-match-x86.patch @@ -0,0 +1,48 @@ +From 8849818679478933dd1d9718741f4daa3f4e8b86 Mon Sep 17 00:00:00 2001 +From: David Gow +Date: Sat, 17 Dec 2022 12:44:35 +0800 +Subject: rust: arch/um: Disable FP/SIMD instruction to match x86 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: David Gow + +commit 8849818679478933dd1d9718741f4daa3f4e8b86 upstream. + +The kernel disables all SSE and similar FP/SIMD instructions on +x86-based architectures (partly because we shouldn't be using floats in +the kernel, and partly to avoid the need for stack alignment, see: +https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53383 ) + +UML does not do the same thing, which isn't in itself a problem, but +does add to the list of differences between UML and "normal" x86 builds. + +In addition, there was a crash bug with LLVM < 15 / rustc < 1.65 when +building with SSE, so disabling it fixes rust builds with earlier +compiler versions, see: +https://github.com/Rust-for-Linux/linux/pull/881 + +Signed-off-by: David Gow +Reviewed-by: Sergio González Collado +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/Makefile.um | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/arch/x86/Makefile.um ++++ b/arch/x86/Makefile.um +@@ -1,6 +1,12 @@ + # SPDX-License-Identifier: GPL-2.0 + core-y += arch/x86/crypto/ + ++# ++# Disable SSE and other FP/SIMD instructions to match normal x86 ++# ++KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx ++KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 ++ + ifeq ($(CONFIG_X86_32),y) + START := 0x8048000 + diff --git a/queue-6.2/series b/queue-6.2/series new file mode 100644 index 00000000000..bc18bdcfa16 --- /dev/null +++ b/queue-6.2/series @@ -0,0 +1,3 @@ +rust-arch-um-disable-fp-simd-instruction-to-match-x86.patch +um-only-disable-sse-on-clang-to-work-around-old-gcc-bugs.patch +rcu-kvfree-avoid-freeing-new-kfree_rcu-memory-after-old-grace-period.patch diff --git a/queue-6.2/um-only-disable-sse-on-clang-to-work-around-old-gcc-bugs.patch b/queue-6.2/um-only-disable-sse-on-clang-to-work-around-old-gcc-bugs.patch new file mode 100644 index 00000000000..7f7c35fec01 --- /dev/null +++ b/queue-6.2/um-only-disable-sse-on-clang-to-work-around-old-gcc-bugs.patch @@ -0,0 +1,54 @@ +From a3046a618a284579d1189af8711765f553eed707 Mon Sep 17 00:00:00 2001 +From: David Gow +Date: Sat, 18 Mar 2023 12:15:54 +0800 +Subject: um: Only disable SSE on clang to work around old GCC bugs + +From: David Gow + +commit a3046a618a284579d1189af8711765f553eed707 upstream. + +As part of the Rust support for UML, we disable SSE (and similar flags) +to match the normal x86 builds. This both makes sense (we ideally want a +similar configuration to x86), and works around a crash bug with SSE +generation under Rust with LLVM. + +However, this breaks compiling stdlib.h under gcc < 11, as the x86_64 +ABI requires floating-point return values be stored in an SSE register. +gcc 11 fixes this by only doing register allocation when a function is +actually used, and since we never use atof(), it shouldn't be a problem: +https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99652 + +Nevertheless, only disable SSE on clang setups, as that's a simple way +of working around everyone's bugs. + +Fixes: 884981867947 ("rust: arch/um: Disable FP/SIMD instruction to match x86") +Reported-by: Roberto Sassu +Link: https://lore.kernel.org/linux-um/6df2ecef9011d85654a82acd607fdcbc93ad593c.camel@huaweicloud.com/ +Tested-by: Roberto Sassu +Tested-by: SeongJae Park +Signed-off-by: David Gow +Reviewed-by: Vincenzo Palazzo +Tested-by: Arthur Grillo +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + arch/x86/Makefile.um | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/arch/x86/Makefile.um ++++ b/arch/x86/Makefile.um +@@ -3,9 +3,14 @@ core-y += arch/x86/crypto/ + + # + # Disable SSE and other FP/SIMD instructions to match normal x86 ++# This is required to work around issues in older LLVM versions, but breaks ++# GCC versions < 11. See: ++# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99652 + # ++ifeq ($(CONFIG_CC_IS_CLANG),y) + KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx + KBUILD_RUSTFLAGS += -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 ++endif + + ifeq ($(CONFIG_X86_32),y) + START := 0x8048000 -- 2.47.3