From: Greg Kroah-Hartman Date: Fri, 18 Oct 2024 08:28:35 +0000 (+0200) Subject: 6.11-stable patches X-Git-Tag: v5.10.228~64 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=db9e4dd7751507bc559504f69045dc86a540ce29;p=thirdparty%2Fkernel%2Fstable-queue.git 6.11-stable patches added patches: fat-fix-uninitialized-variable.patch lib-alloc_tag_module_unload-must-wait-for-pending-kfree_rcu-calls.patch maple_tree-correct-tree-corruption-on-spanning-store.patch mm-damon-tests-sysfs-kunit.h-fix-memory-leak-in-damon_sysfs_test_add_targets.patch mm-khugepaged-fix-the-arguments-order-in-khugepaged_collapse_file-trace-point.patch mm-mglru-only-clear-kswapd_failures-if-reclaimable.patch mm-mremap-fix-move_normal_pmd-retract_page_tables-race.patch mm-swapfile-skip-hugetlb-pages-for-unuse_vma.patch nilfs2-propagate-directory-read-errors-from-nilfs_find_entry.patch selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch --- diff --git a/queue-6.11/fat-fix-uninitialized-variable.patch b/queue-6.11/fat-fix-uninitialized-variable.patch new file mode 100644 index 00000000000..edea2c3a493 --- /dev/null +++ b/queue-6.11/fat-fix-uninitialized-variable.patch @@ -0,0 +1,36 @@ +From 963a7f4d3b90ee195b895ca06b95757fcba02d1a Mon Sep 17 00:00:00 2001 +From: OGAWA Hirofumi +Date: Fri, 4 Oct 2024 15:03:49 +0900 +Subject: fat: fix uninitialized variable + +From: OGAWA Hirofumi + +commit 963a7f4d3b90ee195b895ca06b95757fcba02d1a upstream. + +syszbot produced this with a corrupted fs image. In theory, however an IO +error would trigger this also. + +This affects just an error report, so should not be a serious error. + +Link: https://lkml.kernel.org/r/87r08wjsnh.fsf@mail.parknet.co.jp +Link: https://lkml.kernel.org/r/66ff2c95.050a0220.49194.03e9.GAE@google.com +Signed-off-by: OGAWA Hirofumi +Reported-by: syzbot+ef0d7bc412553291aa86@syzkaller.appspotmail.com +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/fat/namei_vfat.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/fat/namei_vfat.c ++++ b/fs/fat/namei_vfat.c +@@ -1037,7 +1037,7 @@ error_inode: + if (corrupt < 0) { + fat_fs_error(new_dir->i_sb, + "%s: Filesystem corrupted (i_pos %lld)", +- __func__, sinfo.i_pos); ++ __func__, new_i_pos); + } + goto out; + } diff --git a/queue-6.11/lib-alloc_tag_module_unload-must-wait-for-pending-kfree_rcu-calls.patch b/queue-6.11/lib-alloc_tag_module_unload-must-wait-for-pending-kfree_rcu-calls.patch new file mode 100644 index 00000000000..8362f1173c7 --- /dev/null +++ b/queue-6.11/lib-alloc_tag_module_unload-must-wait-for-pending-kfree_rcu-calls.patch @@ -0,0 +1,66 @@ +From dc783ba4b9df3fb3e76e968b2cbeb9960069263c Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Mon, 7 Oct 2024 22:52:24 +0200 +Subject: lib: alloc_tag_module_unload must wait for pending kfree_rcu calls + +From: Florian Westphal + +commit dc783ba4b9df3fb3e76e968b2cbeb9960069263c upstream. + +Ben Greear reports following splat: + ------------[ cut here ]------------ + net/netfilter/nf_nat_core.c:1114 module nf_nat func:nf_nat_register_fn has 256 allocated at module unload + WARNING: CPU: 1 PID: 10421 at lib/alloc_tag.c:168 alloc_tag_module_unload+0x22b/0x3f0 + Modules linked in: nf_nat(-) btrfs ufs qnx4 hfsplus hfs minix vfat msdos fat +... + Hardware name: Default string Default string/SKYBAY, BIOS 5.12 08/04/2020 + RIP: 0010:alloc_tag_module_unload+0x22b/0x3f0 + codetag_unload_module+0x19b/0x2a0 + ? codetag_load_module+0x80/0x80 + +nf_nat module exit calls kfree_rcu on those addresses, but the free +operation is likely still pending by the time alloc_tag checks for leaks. + +Wait for outstanding kfree_rcu operations to complete before checking +resolves this warning. + +Reproducer: +unshare -n iptables-nft -t nat -A PREROUTING -p tcp +grep nf_nat /proc/allocinfo # will list 4 allocations +rmmod nft_chain_nat +rmmod nf_nat # will WARN. + +[akpm@linux-foundation.org: add comment] +Link: https://lkml.kernel.org/r/20241007205236.11847-1-fw@strlen.de +Fixes: a473573964e5 ("lib: code tagging module support") +Signed-off-by: Florian Westphal +Reported-by: Ben Greear +Closes: https://lore.kernel.org/netdev/bdaaef9d-4364-4171-b82b-bcfc12e207eb@candelatech.com/ +Cc: Uladzislau Rezki +Cc: Vlastimil Babka +Cc: Suren Baghdasaryan +Cc: Kent Overstreet +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + lib/codetag.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/lib/codetag.c b/lib/codetag.c +index afa8a2d4f317..d1fbbb7c2ec3 100644 +--- a/lib/codetag.c ++++ b/lib/codetag.c +@@ -228,6 +228,9 @@ bool codetag_unload_module(struct module *mod) + if (!mod) + return true; + ++ /* await any module's kfree_rcu() operations to complete */ ++ kvfree_rcu_barrier(); ++ + mutex_lock(&codetag_lock); + list_for_each_entry(cttype, &codetag_types, link) { + struct codetag_module *found = NULL; +-- +2.47.0 + diff --git a/queue-6.11/maple_tree-correct-tree-corruption-on-spanning-store.patch b/queue-6.11/maple_tree-correct-tree-corruption-on-spanning-store.patch new file mode 100644 index 00000000000..e31ba2c34b7 --- /dev/null +++ b/queue-6.11/maple_tree-correct-tree-corruption-on-spanning-store.patch @@ -0,0 +1,245 @@ +From bea07fd63192b61209d48cbb81ef474cc3ee4c62 Mon Sep 17 00:00:00 2001 +From: Lorenzo Stoakes +Date: Mon, 7 Oct 2024 16:28:32 +0100 +Subject: maple_tree: correct tree corruption on spanning store + +From: Lorenzo Stoakes + +commit bea07fd63192b61209d48cbb81ef474cc3ee4c62 upstream. + +Patch series "maple_tree: correct tree corruption on spanning store", v3. + +There has been a nasty yet subtle maple tree corruption bug that appears +to have been in existence since the inception of the algorithm. + +This bug seems far more likely to happen since commit f8d112a4e657 +("mm/mmap: avoid zeroing vma tree in mmap_region()"), which is the point +at which reports started to be submitted concerning this bug. + +We were made definitely aware of the bug thanks to the kind efforts of +Bert Karwatzki who helped enormously in my being able to track this down +and identify the cause of it. + +The bug arises when an attempt is made to perform a spanning store across +two leaf nodes, where the right leaf node is the rightmost child of the +shared parent, AND the store completely consumes the right-mode node. + +This results in mas_wr_spanning_store() mitakenly duplicating the new and +existing entries at the maximum pivot within the range, and thus maple +tree corruption. + +The fix patch corrects this by detecting this scenario and disallowing the +mistaken duplicate copy. + +The fix patch commit message goes into great detail as to how this occurs. + +This series also includes a test which reliably reproduces the issue, and +asserts that the fix works correctly. + +Bert has kindly tested the fix and confirmed it resolved his issues. Also +Mikhail Gavrilov kindly reported what appears to be precisely the same +bug, which this fix should also resolve. + + +This patch (of 2): + +There has been a subtle bug present in the maple tree implementation from +its inception. + +This arises from how stores are performed - when a store occurs, it will +overwrite overlapping ranges and adjust the tree as necessary to +accommodate this. + +A range may always ultimately span two leaf nodes. In this instance we +walk the two leaf nodes, determine which elements are not overwritten to +the left and to the right of the start and end of the ranges respectively +and then rebalance the tree to contain these entries and the newly +inserted one. + +This kind of store is dubbed a 'spanning store' and is implemented by +mas_wr_spanning_store(). + +In order to reach this stage, mas_store_gfp() invokes +mas_wr_preallocate(), mas_wr_store_type() and mas_wr_walk() in turn to +walk the tree and update the object (mas) to traverse to the location +where the write should be performed, determining its store type. + +When a spanning store is required, this function returns false stopping at +the parent node which contains the target range, and mas_wr_store_type() +marks the mas->store_type as wr_spanning_store to denote this fact. + +When we go to perform the store in mas_wr_spanning_store(), we first +determine the elements AFTER the END of the range we wish to store (that +is, to the right of the entry to be inserted) - we do this by walking to +the NEXT pivot in the tree (i.e. r_mas.last + 1), starting at the node we +have just determined contains the range over which we intend to write. + +We then turn our attention to the entries to the left of the entry we are +inserting, whose state is represented by l_mas, and copy these into a 'big +node', which is a special node which contains enough slots to contain two +leaf node's worth of data. + +We then copy the entry we wish to store immediately after this - the copy +and the insertion of the new entry is performed by mas_store_b_node(). + +After this we copy the elements to the right of the end of the range which +we are inserting, if we have not exceeded the length of the node (i.e. +r_mas.offset <= r_mas.end). + +Herein lies the bug - under very specific circumstances, this logic can +break and corrupt the maple tree. + +Consider the following tree: + +Height + 0 Root Node + / \ + pivot = 0xffff / \ pivot = ULONG_MAX + / \ + 1 A [-----] ... + / \ + pivot = 0x4fff / \ pivot = 0xffff + / \ + 2 (LEAVES) B [-----] [-----] C + ^--- Last pivot 0xffff. + +Now imagine we wish to store an entry in the range [0x4000, 0xffff] (note +that all ranges expressed in maple tree code are inclusive): + +1. mas_store_gfp() descends the tree, finds node A at <=0xffff, then + determines that this is a spanning store across nodes B and C. The mas + state is set such that the current node from which we traverse further + is node A. + +2. In mas_wr_spanning_store() we try to find elements to the right of pivot + 0xffff by searching for an index of 0x10000: + + - mas_wr_walk_index() invokes mas_wr_walk_descend() and + mas_wr_node_walk() in turn. + + - mas_wr_node_walk() loops over entries in node A until EITHER it + finds an entry whose pivot equals or exceeds 0x10000 OR it + reaches the final entry. + + - Since no entry has a pivot equal to or exceeding 0x10000, pivot + 0xffff is selected, leading to node C. + + - mas_wr_walk_traverse() resets the mas state to traverse node C. We + loop around and invoke mas_wr_walk_descend() and mas_wr_node_walk() + in turn once again. + + - Again, we reach the last entry in node C, which has a pivot of + 0xffff. + +3. We then copy the elements to the left of 0x4000 in node B to the big + node via mas_store_b_node(), and insert the new [0x4000, 0xffff] entry + too. + +4. We determine whether we have any entries to copy from the right of the + end of the range via - and with r_mas set up at the entry at pivot + 0xffff, r_mas.offset <= r_mas.end, and then we DUPLICATE the entry at + pivot 0xffff. + +5. BUG! The maple tree is corrupted with a duplicate entry. + +This requires a very specific set of circumstances - we must be spanning +the last element in a leaf node, which is the last element in the parent +node. + +spanning store across two leaf nodes with a range that ends at that shared +pivot. + +A potential solution to this problem would simply be to reset the walk +each time we traverse r_mas, however given the rarity of this situation it +seems that would be rather inefficient. + +Instead, this patch detects if the right hand node is populated, i.e. has +anything we need to copy. + +We do so by only copying elements from the right of the entry being +inserted when the maximum value present exceeds the last, rather than +basing this on offset position. + +The patch also updates some comments and eliminates the unused bool return +value in mas_wr_walk_index(). + +The work performed in commit f8d112a4e657 ("mm/mmap: avoid zeroing vma +tree in mmap_region()") seems to have made the probability of this event +much more likely, which is the point at which reports started to be +submitted concerning this bug. + +The motivation for this change arose from Bert Karwatzki's report of +encountering mm instability after the release of kernel v6.12-rc1 which, +after the use of CONFIG_DEBUG_VM_MAPLE_TREE and similar configuration +options, was identified as maple tree corruption. + +After Bert very generously provided his time and ability to reproduce this +event consistently, I was able to finally identify that the issue +discussed in this commit message was occurring for him. + +Link: https://lkml.kernel.org/r/cover.1728314402.git.lorenzo.stoakes@oracle.com +Link: https://lkml.kernel.org/r/48b349a2a0f7c76e18772712d0997a5e12ab0a3b.1728314403.git.lorenzo.stoakes@oracle.com +Fixes: 54a611b60590 ("Maple Tree: add new data structure") +Signed-off-by: Lorenzo Stoakes +Reported-by: Bert Karwatzki +Closes: https://lore.kernel.org/all/20241001023402.3374-1-spasswolf@web.de/ +Tested-by: Bert Karwatzki +Reported-by: Mikhail Gavrilov +Closes: https://lore.kernel.org/all/CABXGCsOPwuoNOqSMmAvWO2Fz4TEmPnjFj-b7iF+XFRu1h7-+Dg@mail.gmail.com/ +Acked-by: Vlastimil Babka +Reviewed-by: Liam R. Howlett +Tested-by: Mikhail Gavrilov +Reviewed-by: Wei Yang +Cc: Matthew Wilcox +Cc: Sidhartha Kumar +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + lib/maple_tree.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +--- a/lib/maple_tree.c ++++ b/lib/maple_tree.c +@@ -2196,6 +2196,8 @@ static inline void mas_node_or_none(stru + + /* + * mas_wr_node_walk() - Find the correct offset for the index in the @mas. ++ * If @mas->index cannot be found within the containing ++ * node, we traverse to the last entry in the node. + * @wr_mas: The maple write state + * + * Uses mas_slot_locked() and does not need to worry about dead nodes. +@@ -3609,7 +3611,7 @@ static bool mas_wr_walk(struct ma_wr_sta + return true; + } + +-static bool mas_wr_walk_index(struct ma_wr_state *wr_mas) ++static void mas_wr_walk_index(struct ma_wr_state *wr_mas) + { + struct ma_state *mas = wr_mas->mas; + +@@ -3618,11 +3620,9 @@ static bool mas_wr_walk_index(struct ma_ + wr_mas->content = mas_slot_locked(mas, wr_mas->slots, + mas->offset); + if (ma_is_leaf(wr_mas->type)) +- return true; ++ return; + mas_wr_walk_traverse(wr_mas); +- + } +- return true; + } + /* + * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. +@@ -3853,8 +3853,8 @@ static inline int mas_wr_spanning_store( + memset(&b_node, 0, sizeof(struct maple_big_node)); + /* Copy l_mas and store the value in b_node. */ + mas_store_b_node(&l_wr_mas, &b_node, l_mas.end); +- /* Copy r_mas into b_node. */ +- if (r_mas.offset <= r_mas.end) ++ /* Copy r_mas into b_node if there is anything to copy. */ ++ if (r_mas.max > r_mas.last) + mas_mab_cp(&r_mas, r_mas.offset, r_mas.end, + &b_node, b_node.b_end + 1); + else diff --git a/queue-6.11/mm-damon-tests-sysfs-kunit.h-fix-memory-leak-in-damon_sysfs_test_add_targets.patch b/queue-6.11/mm-damon-tests-sysfs-kunit.h-fix-memory-leak-in-damon_sysfs_test_add_targets.patch new file mode 100644 index 00000000000..0bfb7424de3 --- /dev/null +++ b/queue-6.11/mm-damon-tests-sysfs-kunit.h-fix-memory-leak-in-damon_sysfs_test_add_targets.patch @@ -0,0 +1,48 @@ +From 2d6a1c835685de3b0c8e8dc871f60f4ef92ab01a Mon Sep 17 00:00:00 2001 +From: Jinjie Ruan +Date: Thu, 10 Oct 2024 20:53:23 +0800 +Subject: mm/damon/tests/sysfs-kunit.h: fix memory leak in damon_sysfs_test_add_targets() + +From: Jinjie Ruan + +commit 2d6a1c835685de3b0c8e8dc871f60f4ef92ab01a upstream. + +The sysfs_target->regions allocated in damon_sysfs_regions_alloc() is not +freed in damon_sysfs_test_add_targets(), which cause the following memory +leak, free it to fix it. + + unreferenced object 0xffffff80c2a8db80 (size 96): + comm "kunit_try_catch", pid 187, jiffies 4294894363 + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace (crc 0): + [<0000000001e3714d>] kmemleak_alloc+0x34/0x40 + [<000000008e6835c1>] __kmalloc_cache_noprof+0x26c/0x2f4 + [<000000001286d9f8>] damon_sysfs_test_add_targets+0x1cc/0x738 + [<0000000032ef8f77>] kunit_try_run_case+0x13c/0x3ac + [<00000000f3edea23>] kunit_generic_run_threadfn_adapter+0x80/0xec + [<00000000adf936cf>] kthread+0x2e8/0x374 + [<0000000041bb1628>] ret_from_fork+0x10/0x20 + +Link: https://lkml.kernel.org/r/20241010125323.3127187-1-ruanjinjie@huawei.com +Fixes: b8ee5575f763 ("mm/damon/sysfs-test: add a unit test for damon_sysfs_set_targets()") +Signed-off-by: Jinjie Ruan +Reviewed-by: SeongJae Park +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/sysfs-test.h | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/damon/sysfs-test.h ++++ b/mm/damon/sysfs-test.h +@@ -67,6 +67,7 @@ static void damon_sysfs_test_add_targets + damon_destroy_ctx(ctx); + kfree(sysfs_targets->targets_arr); + kfree(sysfs_targets); ++ kfree(sysfs_target->regions); + kfree(sysfs_target); + } + diff --git a/queue-6.11/mm-khugepaged-fix-the-arguments-order-in-khugepaged_collapse_file-trace-point.patch b/queue-6.11/mm-khugepaged-fix-the-arguments-order-in-khugepaged_collapse_file-trace-point.patch new file mode 100644 index 00000000000..faa58e03d4f --- /dev/null +++ b/queue-6.11/mm-khugepaged-fix-the-arguments-order-in-khugepaged_collapse_file-trace-point.patch @@ -0,0 +1,75 @@ +From 37f0b47c5143c2957909ced44fc09ffb118c99f7 Mon Sep 17 00:00:00 2001 +From: Yang Shi +Date: Fri, 11 Oct 2024 18:17:02 -0700 +Subject: mm: khugepaged: fix the arguments order in khugepaged_collapse_file trace point + +From: Yang Shi + +commit 37f0b47c5143c2957909ced44fc09ffb118c99f7 upstream. + +The "addr" and "is_shmem" arguments have different order in TP_PROTO and +TP_ARGS. This resulted in the incorrect trace result: + +text-hugepage-644429 [276] 392092.878683: mm_khugepaged_collapse_file: +mm=0xffff20025d52c440, hpage_pfn=0x200678c00, index=512, addr=1, is_shmem=0, +filename=text-hugepage, nr=512, result=failed + +The value of "addr" is wrong because it was treated as bool value, the +type of is_shmem. + +Fix the order in TP_PROTO to keep "addr" is before "is_shmem" since the +original patch review suggested this order to achieve best packing. + +And use "lx" for "addr" instead of "ld" in TP_printk because address is +typically shown in hex. + +After the fix, the trace result looks correct: + +text-hugepage-7291 [004] 128.627251: mm_khugepaged_collapse_file: +mm=0xffff0001328f9500, hpage_pfn=0x20016ea00, index=512, addr=0x400000, +is_shmem=0, filename=text-hugepage, nr=512, result=failed + +Link: https://lkml.kernel.org/r/20241012011702.1084846-1-yang@os.amperecomputing.com +Fixes: 4c9473e87e75 ("mm/khugepaged: add tracepoint to collapse_file()") +Signed-off-by: Yang Shi +Cc: Gautam Menghani +Cc: Steven Rostedt (Google) +Cc: [6.2+] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/trace/events/huge_memory.h | 4 ++-- + mm/khugepaged.c | 2 +- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/include/trace/events/huge_memory.h ++++ b/include/trace/events/huge_memory.h +@@ -208,7 +208,7 @@ TRACE_EVENT(mm_khugepaged_scan_file, + + TRACE_EVENT(mm_khugepaged_collapse_file, + TP_PROTO(struct mm_struct *mm, struct folio *new_folio, pgoff_t index, +- bool is_shmem, unsigned long addr, struct file *file, ++ unsigned long addr, bool is_shmem, struct file *file, + int nr, int result), + TP_ARGS(mm, new_folio, index, addr, is_shmem, file, nr, result), + TP_STRUCT__entry( +@@ -233,7 +233,7 @@ TRACE_EVENT(mm_khugepaged_collapse_file, + __entry->result = result; + ), + +- TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%ld, is_shmem=%d, filename=%s, nr=%d, result=%s", ++ TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%lx, is_shmem=%d, filename=%s, nr=%d, result=%s", + __entry->mm, + __entry->hpfn, + __entry->index, +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -2219,7 +2219,7 @@ rollback: + folio_put(new_folio); + out: + VM_BUG_ON(!list_empty(&pagelist)); +- trace_mm_khugepaged_collapse_file(mm, new_folio, index, is_shmem, addr, file, HPAGE_PMD_NR, result); ++ trace_mm_khugepaged_collapse_file(mm, new_folio, index, addr, is_shmem, file, HPAGE_PMD_NR, result); + return result; + } + diff --git a/queue-6.11/mm-mglru-only-clear-kswapd_failures-if-reclaimable.patch b/queue-6.11/mm-mglru-only-clear-kswapd_failures-if-reclaimable.patch new file mode 100644 index 00000000000..2913ed6f93d --- /dev/null +++ b/queue-6.11/mm-mglru-only-clear-kswapd_failures-if-reclaimable.patch @@ -0,0 +1,51 @@ +From b130ba4a6259f6b64d8af15e9e7ab1e912bcb7ad Mon Sep 17 00:00:00 2001 +From: Wei Xu +Date: Mon, 14 Oct 2024 22:12:11 +0000 +Subject: mm/mglru: only clear kswapd_failures if reclaimable + +From: Wei Xu + +commit b130ba4a6259f6b64d8af15e9e7ab1e912bcb7ad upstream. + +lru_gen_shrink_node() unconditionally clears kswapd_failures, which can +prevent kswapd from sleeping and cause 100% kswapd cpu usage even when +kswapd repeatedly fails to make progress in reclaim. + +Only clear kswap_failures in lru_gen_shrink_node() if reclaim makes some +progress, similar to shrink_node(). + +I happened to run into this problem in one of my tests recently. It +requires a combination of several conditions: The allocator needs to +allocate a right amount of pages such that it can wake up kswapd +without itself being OOM killed; there is no memory for kswapd to +reclaim (My test disables swap and cleans page cache first); no other +process frees enough memory at the same time. + +Link: https://lkml.kernel.org/r/20241014221211.832591-1-weixugc@google.com +Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists") +Signed-off-by: Wei Xu +Cc: Axel Rasmussen +Cc: Brian Geffon +Cc: Jan Alexander Steffens +Cc: Suleiman Souhlal +Cc: Yu Zhao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmscan.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -4940,8 +4940,8 @@ static void lru_gen_shrink_node(struct p + + blk_finish_plug(&plug); + done: +- /* kswapd should never fail */ +- pgdat->kswapd_failures = 0; ++ if (sc->nr_reclaimed > reclaimed) ++ pgdat->kswapd_failures = 0; + } + + /****************************************************************************** diff --git a/queue-6.11/mm-mremap-fix-move_normal_pmd-retract_page_tables-race.patch b/queue-6.11/mm-mremap-fix-move_normal_pmd-retract_page_tables-race.patch new file mode 100644 index 00000000000..5821cbfb193 --- /dev/null +++ b/queue-6.11/mm-mremap-fix-move_normal_pmd-retract_page_tables-race.patch @@ -0,0 +1,134 @@ +From 6fa1066fc5d00cb9f1b0e83b7ff6ef98d26ba2aa Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Mon, 7 Oct 2024 23:42:04 +0200 +Subject: mm/mremap: fix move_normal_pmd/retract_page_tables race + +From: Jann Horn + +commit 6fa1066fc5d00cb9f1b0e83b7ff6ef98d26ba2aa upstream. + +In mremap(), move_page_tables() looks at the type of the PMD entry and the +specified address range to figure out by which method the next chunk of +page table entries should be moved. + +At that point, the mmap_lock is held in write mode, but no rmap locks are +held yet. For PMD entries that point to page tables and are fully covered +by the source address range, move_pgt_entry(NORMAL_PMD, ...) is called, +which first takes rmap locks, then does move_normal_pmd(). +move_normal_pmd() takes the necessary page table locks at source and +destination, then moves an entire page table from the source to the +destination. + +The problem is: The rmap locks, which protect against concurrent page +table removal by retract_page_tables() in the THP code, are only taken +after the PMD entry has been read and it has been decided how to move it. +So we can race as follows (with two processes that have mappings of the +same tmpfs file that is stored on a tmpfs mount with huge=advise); note +that process A accesses page tables through the MM while process B does it +through the file rmap: + +process A process B +========= ========= +mremap + mremap_to + move_vma + move_page_tables + get_old_pmd + alloc_new_pmd + *** PREEMPT *** + madvise(MADV_COLLAPSE) + do_madvise + madvise_walk_vmas + madvise_vma_behavior + madvise_collapse + hpage_collapse_scan_file + collapse_file + retract_page_tables + i_mmap_lock_read(mapping) + pmdp_collapse_flush + i_mmap_unlock_read(mapping) + move_pgt_entry(NORMAL_PMD, ...) + take_rmap_locks + move_normal_pmd + drop_rmap_locks + +When this happens, move_normal_pmd() can end up creating bogus PMD entries +in the line `pmd_populate(mm, new_pmd, pmd_pgtable(pmd))`. The effect +depends on arch-specific and machine-specific details; on x86, you can end +up with physical page 0 mapped as a page table, which is likely +exploitable for user->kernel privilege escalation. + +Fix the race by letting process B recheck that the PMD still points to a +page table after the rmap locks have been taken. Otherwise, we bail and +let the caller fall back to the PTE-level copying path, which will then +bail immediately at the pmd_none() check. + +Bug reachability: Reaching this bug requires that you can create +shmem/file THP mappings - anonymous THP uses different code that doesn't +zap stuff under rmap locks. File THP is gated on an experimental config +flag (CONFIG_READ_ONLY_THP_FOR_FS), so on normal distro kernels you need +shmem THP to hit this bug. As far as I know, getting shmem THP normally +requires that you can mount your own tmpfs with the right mount flags, +which would require creating your own user+mount namespace; though I don't +know if some distros maybe enable shmem THP by default or something like +that. + +Bug impact: This issue can likely be used for user->kernel privilege +escalation when it is reachable. + +Link: https://lkml.kernel.org/r/20241007-move_normal_pmd-vs-collapse-fix-2-v1-1-5ead9631f2ea@google.com +Fixes: 1d65b771bc08 ("mm/khugepaged: retract_page_tables() without mmap or vma lock") +Signed-off-by: Jann Horn +Signed-off-by: David Hildenbrand +Co-developed-by: David Hildenbrand +Closes: https://project-zero.issues.chromium.org/371047675 +Acked-by: Qi Zheng +Reviewed-by: Lorenzo Stoakes +Cc: Hugh Dickins +Cc: Joel Fernandes +Cc: Matthew Wilcox +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/mremap.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +--- a/mm/mremap.c ++++ b/mm/mremap.c +@@ -238,6 +238,7 @@ static bool move_normal_pmd(struct vm_ar + { + spinlock_t *old_ptl, *new_ptl; + struct mm_struct *mm = vma->vm_mm; ++ bool res = false; + pmd_t pmd; + + if (!arch_supports_page_table_move()) +@@ -277,19 +278,25 @@ static bool move_normal_pmd(struct vm_ar + if (new_ptl != old_ptl) + spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + +- /* Clear the pmd */ + pmd = *old_pmd; ++ ++ /* Racing with collapse? */ ++ if (unlikely(!pmd_present(pmd) || pmd_leaf(pmd))) ++ goto out_unlock; ++ /* Clear the pmd */ + pmd_clear(old_pmd); ++ res = true; + + VM_BUG_ON(!pmd_none(*new_pmd)); + + pmd_populate(mm, new_pmd, pmd_pgtable(pmd)); + flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); ++out_unlock: + if (new_ptl != old_ptl) + spin_unlock(new_ptl); + spin_unlock(old_ptl); + +- return true; ++ return res; + } + #else + static inline bool move_normal_pmd(struct vm_area_struct *vma, diff --git a/queue-6.11/mm-swapfile-skip-hugetlb-pages-for-unuse_vma.patch b/queue-6.11/mm-swapfile-skip-hugetlb-pages-for-unuse_vma.patch new file mode 100644 index 00000000000..b47305f2835 --- /dev/null +++ b/queue-6.11/mm-swapfile-skip-hugetlb-pages-for-unuse_vma.patch @@ -0,0 +1,46 @@ +From 7528c4fb1237512ee18049f852f014eba80bbe8d Mon Sep 17 00:00:00 2001 +From: Liu Shixin +Date: Tue, 15 Oct 2024 09:45:21 +0800 +Subject: mm/swapfile: skip HugeTLB pages for unuse_vma + +From: Liu Shixin + +commit 7528c4fb1237512ee18049f852f014eba80bbe8d upstream. + +I got a bad pud error and lost a 1GB HugeTLB when calling swapoff. The +problem can be reproduced by the following steps: + + 1. Allocate an anonymous 1GB HugeTLB and some other anonymous memory. + 2. Swapout the above anonymous memory. + 3. run swapoff and we will get a bad pud error in kernel message: + + mm/pgtable-generic.c:42: bad pud 00000000743d215d(84000001400000e7) + +We can tell that pud_clear_bad is called by pud_none_or_clear_bad in +unuse_pud_range() by ftrace. And therefore the HugeTLB pages will never +be freed because we lost it from page table. We can skip HugeTLB pages +for unuse_vma to fix it. + +Link: https://lkml.kernel.org/r/20241015014521.570237-1-liushixin2@huawei.com +Fixes: 0fe6e20b9c4c ("hugetlb, rmap: add reverse mapping for hugepage") +Signed-off-by: Liu Shixin +Acked-by: Muchun Song +Cc: Naoya Horiguchi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/swapfile.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/swapfile.c ++++ b/mm/swapfile.c +@@ -2106,7 +2106,7 @@ static int unuse_mm(struct mm_struct *mm + + mmap_read_lock(mm); + for_each_vma(vmi, vma) { +- if (vma->anon_vma) { ++ if (vma->anon_vma && !is_vm_hugetlb_page(vma)) { + ret = unuse_vma(vma, type); + if (ret) + break; diff --git a/queue-6.11/nilfs2-propagate-directory-read-errors-from-nilfs_find_entry.patch b/queue-6.11/nilfs2-propagate-directory-read-errors-from-nilfs_find_entry.patch new file mode 100644 index 00000000000..da34110d3a9 --- /dev/null +++ b/queue-6.11/nilfs2-propagate-directory-read-errors-from-nilfs_find_entry.patch @@ -0,0 +1,227 @@ +From 08cfa12adf888db98879dbd735bc741360a34168 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Fri, 4 Oct 2024 12:35:31 +0900 +Subject: nilfs2: propagate directory read errors from nilfs_find_entry() + +From: Ryusuke Konishi + +commit 08cfa12adf888db98879dbd735bc741360a34168 upstream. + +Syzbot reported that a task hang occurs in vcs_open() during a fuzzing +test for nilfs2. + +The root cause of this problem is that in nilfs_find_entry(), which +searches for directory entries, ignores errors when loading a directory +page/folio via nilfs_get_folio() fails. + +If the filesystem images is corrupted, and the i_size of the directory +inode is large, and the directory page/folio is successfully read but +fails the sanity check, for example when it is zero-filled, +nilfs_check_folio() may continue to spit out error messages in bursts. + +Fix this issue by propagating the error to the callers when loading a +page/folio fails in nilfs_find_entry(). + +The current interface of nilfs_find_entry() and its callers is outdated +and cannot propagate error codes such as -EIO and -ENOMEM returned via +nilfs_find_entry(), so fix it together. + +Link: https://lkml.kernel.org/r/20241004033640.6841-1-konishi.ryusuke@gmail.com +Fixes: 2ba466d74ed7 ("nilfs2: directory entry operations") +Signed-off-by: Ryusuke Konishi +Reported-by: Lizhi Xu +Closes: https://lkml.kernel.org/r/20240927013806.3577931-1-lizhi.xu@windriver.com +Reported-by: syzbot+8a192e8d090fa9a31135@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=8a192e8d090fa9a31135 +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/dir.c | 48 +++++++++++++++++++++++++----------------------- + fs/nilfs2/namei.c | 39 ++++++++++++++++++++++++++------------- + fs/nilfs2/nilfs.h | 2 +- + 3 files changed, 52 insertions(+), 37 deletions(-) + +--- a/fs/nilfs2/dir.c ++++ b/fs/nilfs2/dir.c +@@ -323,7 +323,7 @@ static int nilfs_readdir(struct file *fi + * The folio is mapped and unlocked. When the caller is finished with + * the entry, it should call folio_release_kmap(). + * +- * On failure, returns NULL and the caller should ignore foliop. ++ * On failure, returns an error pointer and the caller should ignore foliop. + */ + struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, + const struct qstr *qstr, struct folio **foliop) +@@ -346,22 +346,24 @@ struct nilfs_dir_entry *nilfs_find_entry + do { + char *kaddr = nilfs_get_folio(dir, n, foliop); + +- if (!IS_ERR(kaddr)) { +- de = (struct nilfs_dir_entry *)kaddr; +- kaddr += nilfs_last_byte(dir, n) - reclen; +- while ((char *) de <= kaddr) { +- if (de->rec_len == 0) { +- nilfs_error(dir->i_sb, +- "zero-length directory entry"); +- folio_release_kmap(*foliop, kaddr); +- goto out; +- } +- if (nilfs_match(namelen, name, de)) +- goto found; +- de = nilfs_next_entry(de); ++ if (IS_ERR(kaddr)) ++ return ERR_CAST(kaddr); ++ ++ de = (struct nilfs_dir_entry *)kaddr; ++ kaddr += nilfs_last_byte(dir, n) - reclen; ++ while ((char *)de <= kaddr) { ++ if (de->rec_len == 0) { ++ nilfs_error(dir->i_sb, ++ "zero-length directory entry"); ++ folio_release_kmap(*foliop, kaddr); ++ goto out; + } +- folio_release_kmap(*foliop, kaddr); ++ if (nilfs_match(namelen, name, de)) ++ goto found; ++ de = nilfs_next_entry(de); + } ++ folio_release_kmap(*foliop, kaddr); ++ + if (++n >= npages) + n = 0; + /* next folio is past the blocks we've got */ +@@ -374,7 +376,7 @@ struct nilfs_dir_entry *nilfs_find_entry + } + } while (n != start); + out: +- return NULL; ++ return ERR_PTR(-ENOENT); + + found: + ei->i_dir_start_lookup = n; +@@ -418,18 +420,18 @@ fail: + return NULL; + } + +-ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) ++int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino) + { +- ino_t res = 0; + struct nilfs_dir_entry *de; + struct folio *folio; + + de = nilfs_find_entry(dir, qstr, &folio); +- if (de) { +- res = le64_to_cpu(de->inode); +- folio_release_kmap(folio, de); +- } +- return res; ++ if (IS_ERR(de)) ++ return PTR_ERR(de); ++ ++ *ino = le64_to_cpu(de->inode); ++ folio_release_kmap(folio, de); ++ return 0; + } + + void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, +--- a/fs/nilfs2/namei.c ++++ b/fs/nilfs2/namei.c +@@ -55,12 +55,20 @@ nilfs_lookup(struct inode *dir, struct d + { + struct inode *inode; + ino_t ino; ++ int res; + + if (dentry->d_name.len > NILFS_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + +- ino = nilfs_inode_by_name(dir, &dentry->d_name); +- inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL; ++ res = nilfs_inode_by_name(dir, &dentry->d_name, &ino); ++ if (res) { ++ if (res != -ENOENT) ++ return ERR_PTR(res); ++ inode = NULL; ++ } else { ++ inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); ++ } ++ + return d_splice_alias(inode, dentry); + } + +@@ -263,10 +271,11 @@ static int nilfs_do_unlink(struct inode + struct folio *folio; + int err; + +- err = -ENOENT; + de = nilfs_find_entry(dir, &dentry->d_name, &folio); +- if (!de) ++ if (IS_ERR(de)) { ++ err = PTR_ERR(de); + goto out; ++ } + + inode = d_inode(dentry); + err = -EIO; +@@ -362,10 +371,11 @@ static int nilfs_rename(struct mnt_idmap + if (unlikely(err)) + return err; + +- err = -ENOENT; + old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio); +- if (!old_de) ++ if (IS_ERR(old_de)) { ++ err = PTR_ERR(old_de); + goto out; ++ } + + if (S_ISDIR(old_inode->i_mode)) { + err = -EIO; +@@ -382,10 +392,12 @@ static int nilfs_rename(struct mnt_idmap + if (dir_de && !nilfs_empty_dir(new_inode)) + goto out_dir; + +- err = -ENOENT; +- new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, &new_folio); +- if (!new_de) ++ new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, ++ &new_folio); ++ if (IS_ERR(new_de)) { ++ err = PTR_ERR(new_de); + goto out_dir; ++ } + nilfs_set_link(new_dir, new_de, new_folio, old_inode); + folio_release_kmap(new_folio, new_de); + nilfs_mark_inode_dirty(new_dir); +@@ -440,12 +452,13 @@ out: + */ + static struct dentry *nilfs_get_parent(struct dentry *child) + { +- unsigned long ino; ++ ino_t ino; ++ int res; + struct nilfs_root *root; + +- ino = nilfs_inode_by_name(d_inode(child), &dotdot_name); +- if (!ino) +- return ERR_PTR(-ENOENT); ++ res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino); ++ if (res) ++ return ERR_PTR(res); + + root = NILFS_I(d_inode(child))->i_root; + +--- a/fs/nilfs2/nilfs.h ++++ b/fs/nilfs2/nilfs.h +@@ -233,7 +233,7 @@ static inline __u32 nilfs_mask_flags(umo + + /* dir.c */ + int nilfs_add_link(struct dentry *, struct inode *); +-ino_t nilfs_inode_by_name(struct inode *, const struct qstr *); ++int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino); + int nilfs_make_empty(struct inode *, struct inode *); + struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *, + struct folio **); diff --git a/queue-6.11/selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch b/queue-6.11/selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch new file mode 100644 index 00000000000..17f13020bd7 --- /dev/null +++ b/queue-6.11/selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch @@ -0,0 +1,50 @@ +From e142cc87ac4ec618f2ccf5f68aedcd6e28a59d9d Mon Sep 17 00:00:00 2001 +From: Edward Liaw +Date: Thu, 3 Oct 2024 21:17:11 +0000 +Subject: selftests/mm: fix deadlock for fork after pthread_create on ARM + +From: Edward Liaw + +commit e142cc87ac4ec618f2ccf5f68aedcd6e28a59d9d upstream. + +On Android with arm, there is some synchronization needed to avoid a +deadlock when forking after pthread_create. + +Link: https://lkml.kernel.org/r/20241003211716.371786-3-edliaw@google.com +Fixes: cff294582798 ("selftests/mm: extend and rename uffd pagemap test") +Signed-off-by: Edward Liaw +Cc: Lokesh Gidra +Cc: Peter Xu +Cc: Shuah Khan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/mm/uffd-unit-tests.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/tools/testing/selftests/mm/uffd-unit-tests.c ++++ b/tools/testing/selftests/mm/uffd-unit-tests.c +@@ -241,6 +241,9 @@ static void *fork_event_consumer(void *d + fork_event_args *args = data; + struct uffd_msg msg = { 0 }; + ++ /* Ready for parent thread to fork */ ++ pthread_barrier_wait(&ready_for_fork); ++ + /* Read until a full msg received */ + while (uffd_read_msg(args->parent_uffd, &msg)); + +@@ -308,8 +311,12 @@ static int pagemap_test_fork(int uffd, b + + /* Prepare a thread to resolve EVENT_FORK */ + if (with_event) { ++ pthread_barrier_init(&ready_for_fork, NULL, 2); + if (pthread_create(&thread, NULL, fork_event_consumer, &args)) + err("pthread_create()"); ++ /* Wait for child thread to start before forking */ ++ pthread_barrier_wait(&ready_for_fork); ++ pthread_barrier_destroy(&ready_for_fork); + } + + child = fork(); diff --git a/queue-6.11/selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch b/queue-6.11/selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch new file mode 100644 index 00000000000..29a5e8b0dbf --- /dev/null +++ b/queue-6.11/selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch @@ -0,0 +1,129 @@ +From e61ef21e27e8deed8c474e9f47f4aa7bc37e138c Mon Sep 17 00:00:00 2001 +From: Edward Liaw +Date: Thu, 3 Oct 2024 21:17:10 +0000 +Subject: selftests/mm: replace atomic_bool with pthread_barrier_t + +From: Edward Liaw + +commit e61ef21e27e8deed8c474e9f47f4aa7bc37e138c upstream. + +Patch series "selftests/mm: fix deadlock after pthread_create". + +On Android arm, pthread_create followed by a fork caused a deadlock in the +case where the fork required work to be completed by the created thread. + +Update the synchronization primitive to use pthread_barrier instead of +atomic_bool. + +Apply the same fix to the wp-fork-with-event test. + + +This patch (of 2): + +Swap synchronization primitive with pthread_barrier, so that stdatomic.h +does not need to be included. + +The synchronization is needed on Android ARM64; we see a deadlock with +pthread_create when the parent thread races forward before the child has a +chance to start doing work. + +Link: https://lkml.kernel.org/r/20241003211716.371786-1-edliaw@google.com +Link: https://lkml.kernel.org/r/20241003211716.371786-2-edliaw@google.com +Fixes: cff294582798 ("selftests/mm: extend and rename uffd pagemap test") +Signed-off-by: Edward Liaw +Cc: Lokesh Gidra +Cc: Peter Xu +Cc: Shuah Khan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/mm/uffd-common.c | 5 +++-- + tools/testing/selftests/mm/uffd-common.h | 3 +-- + tools/testing/selftests/mm/uffd-unit-tests.c | 14 ++++++++------ + 3 files changed, 12 insertions(+), 10 deletions(-) + +--- a/tools/testing/selftests/mm/uffd-common.c ++++ b/tools/testing/selftests/mm/uffd-common.c +@@ -18,7 +18,7 @@ bool test_uffdio_wp = true; + unsigned long long *count_verify; + uffd_test_ops_t *uffd_test_ops; + uffd_test_case_ops_t *uffd_test_case_ops; +-atomic_bool ready_for_fork; ++pthread_barrier_t ready_for_fork; + + static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) + { +@@ -519,7 +519,8 @@ void *uffd_poll_thread(void *arg) + pollfd[1].fd = pipefd[cpu*2]; + pollfd[1].events = POLLIN; + +- ready_for_fork = true; ++ /* Ready for parent thread to fork */ ++ pthread_barrier_wait(&ready_for_fork); + + for (;;) { + ret = poll(pollfd, 2, -1); +--- a/tools/testing/selftests/mm/uffd-common.h ++++ b/tools/testing/selftests/mm/uffd-common.h +@@ -33,7 +33,6 @@ + #include + #include + #include +-#include + + #include "../kselftest.h" + #include "vm_util.h" +@@ -105,7 +104,7 @@ extern bool map_shared; + extern bool test_uffdio_wp; + extern unsigned long long *count_verify; + extern volatile bool test_uffdio_copy_eexist; +-extern atomic_bool ready_for_fork; ++extern pthread_barrier_t ready_for_fork; + + extern uffd_test_ops_t anon_uffd_test_ops; + extern uffd_test_ops_t shmem_uffd_test_ops; +--- a/tools/testing/selftests/mm/uffd-unit-tests.c ++++ b/tools/testing/selftests/mm/uffd-unit-tests.c +@@ -774,7 +774,7 @@ static void uffd_sigbus_test_common(bool + char c; + struct uffd_args args = { 0 }; + +- ready_for_fork = false; ++ pthread_barrier_init(&ready_for_fork, NULL, 2); + + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + +@@ -791,8 +791,9 @@ static void uffd_sigbus_test_common(bool + if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) + err("uffd_poll_thread create"); + +- while (!ready_for_fork) +- ; /* Wait for the poll_thread to start executing before forking */ ++ /* Wait for child thread to start before forking */ ++ pthread_barrier_wait(&ready_for_fork); ++ pthread_barrier_destroy(&ready_for_fork); + + pid = fork(); + if (pid < 0) +@@ -833,7 +834,7 @@ static void uffd_events_test_common(bool + char c; + struct uffd_args args = { 0 }; + +- ready_for_fork = false; ++ pthread_barrier_init(&ready_for_fork, NULL, 2); + + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + if (uffd_register(uffd, area_dst, nr_pages * page_size, +@@ -844,8 +845,9 @@ static void uffd_events_test_common(bool + if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) + err("uffd_poll_thread create"); + +- while (!ready_for_fork) +- ; /* Wait for the poll_thread to start executing before forking */ ++ /* Wait for child thread to start before forking */ ++ pthread_barrier_wait(&ready_for_fork); ++ pthread_barrier_destroy(&ready_for_fork); + + pid = fork(); + if (pid < 0) diff --git a/queue-6.11/series b/queue-6.11/series index 924c2f32608..2f1b7464c5c 100644 --- a/queue-6.11/series +++ b/queue-6.11/series @@ -18,3 +18,14 @@ arm64-probes-fix-uprobes-for-big-endian-kernels.patch net-macb-avoid-20s-boot-delay-by-skipping-mdio-bus-registration-for-fixed-link-phy.patch net-microchip-vcap-api-fix-memory-leaks-in-vcap_api_encode_rule_test.patch selftests-mptcp-join-test-for-prohibited-mpc-to-port-based-endp.patch +maple_tree-correct-tree-corruption-on-spanning-store.patch +nilfs2-propagate-directory-read-errors-from-nilfs_find_entry.patch +fat-fix-uninitialized-variable.patch +lib-alloc_tag_module_unload-must-wait-for-pending-kfree_rcu-calls.patch +selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch +selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch +mm-mremap-fix-move_normal_pmd-retract_page_tables-race.patch +mm-khugepaged-fix-the-arguments-order-in-khugepaged_collapse_file-trace-point.patch +mm-mglru-only-clear-kswapd_failures-if-reclaimable.patch +mm-swapfile-skip-hugetlb-pages-for-unuse_vma.patch +mm-damon-tests-sysfs-kunit.h-fix-memory-leak-in-damon_sysfs_test_add_targets.patch