From: Greg Kroah-Hartman Date: Fri, 18 Oct 2024 08:15:27 +0000 (+0200) Subject: 6.6-stable patches X-Git-Tag: v5.10.228~70 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b41aa5e9e21723710a7e8799605ebf5f54609ca0;p=thirdparty%2Fkernel%2Fstable-queue.git 6.6-stable patches added patches: fat-fix-uninitialized-variable.patch mm-mglru-only-clear-kswapd_failures-if-reclaimable.patch mm-mremap-fix-move_normal_pmd-retract_page_tables-race.patch mm-swapfile-skip-hugetlb-pages-for-unuse_vma.patch selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch --- diff --git a/queue-6.6/fat-fix-uninitialized-variable.patch b/queue-6.6/fat-fix-uninitialized-variable.patch new file mode 100644 index 00000000000..edea2c3a493 --- /dev/null +++ b/queue-6.6/fat-fix-uninitialized-variable.patch @@ -0,0 +1,36 @@ +From 963a7f4d3b90ee195b895ca06b95757fcba02d1a Mon Sep 17 00:00:00 2001 +From: OGAWA Hirofumi +Date: Fri, 4 Oct 2024 15:03:49 +0900 +Subject: fat: fix uninitialized variable + +From: OGAWA Hirofumi + +commit 963a7f4d3b90ee195b895ca06b95757fcba02d1a upstream. + +syszbot produced this with a corrupted fs image. In theory, however an IO +error would trigger this also. + +This affects just an error report, so should not be a serious error. + +Link: https://lkml.kernel.org/r/87r08wjsnh.fsf@mail.parknet.co.jp +Link: https://lkml.kernel.org/r/66ff2c95.050a0220.49194.03e9.GAE@google.com +Signed-off-by: OGAWA Hirofumi +Reported-by: syzbot+ef0d7bc412553291aa86@syzkaller.appspotmail.com +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/fat/namei_vfat.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/fat/namei_vfat.c ++++ b/fs/fat/namei_vfat.c +@@ -1037,7 +1037,7 @@ error_inode: + if (corrupt < 0) { + fat_fs_error(new_dir->i_sb, + "%s: Filesystem corrupted (i_pos %lld)", +- __func__, sinfo.i_pos); ++ __func__, new_i_pos); + } + goto out; + } diff --git a/queue-6.6/mm-mglru-only-clear-kswapd_failures-if-reclaimable.patch b/queue-6.6/mm-mglru-only-clear-kswapd_failures-if-reclaimable.patch new file mode 100644 index 00000000000..cea6b3c1539 --- /dev/null +++ b/queue-6.6/mm-mglru-only-clear-kswapd_failures-if-reclaimable.patch @@ -0,0 +1,51 @@ +From b130ba4a6259f6b64d8af15e9e7ab1e912bcb7ad Mon Sep 17 00:00:00 2001 +From: Wei Xu +Date: Mon, 14 Oct 2024 22:12:11 +0000 +Subject: mm/mglru: only clear kswapd_failures if reclaimable + +From: Wei Xu + +commit b130ba4a6259f6b64d8af15e9e7ab1e912bcb7ad upstream. + +lru_gen_shrink_node() unconditionally clears kswapd_failures, which can +prevent kswapd from sleeping and cause 100% kswapd cpu usage even when +kswapd repeatedly fails to make progress in reclaim. + +Only clear kswap_failures in lru_gen_shrink_node() if reclaim makes some +progress, similar to shrink_node(). + +I happened to run into this problem in one of my tests recently. It +requires a combination of several conditions: The allocator needs to +allocate a right amount of pages such that it can wake up kswapd +without itself being OOM killed; there is no memory for kswapd to +reclaim (My test disables swap and cleans page cache first); no other +process frees enough memory at the same time. + +Link: https://lkml.kernel.org/r/20241014221211.832591-1-weixugc@google.com +Fixes: e4dde56cd208 ("mm: multi-gen LRU: per-node lru_gen_folio lists") +Signed-off-by: Wei Xu +Cc: Axel Rasmussen +Cc: Brian Geffon +Cc: Jan Alexander Steffens +Cc: Suleiman Souhlal +Cc: Yu Zhao +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmscan.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/vmscan.c ++++ b/mm/vmscan.c +@@ -5603,8 +5603,8 @@ static void lru_gen_shrink_node(struct p + + blk_finish_plug(&plug); + done: +- /* kswapd should never fail */ +- pgdat->kswapd_failures = 0; ++ if (sc->nr_reclaimed > reclaimed) ++ pgdat->kswapd_failures = 0; + } + + /****************************************************************************** diff --git a/queue-6.6/mm-mremap-fix-move_normal_pmd-retract_page_tables-race.patch b/queue-6.6/mm-mremap-fix-move_normal_pmd-retract_page_tables-race.patch new file mode 100644 index 00000000000..25894cd27ec --- /dev/null +++ b/queue-6.6/mm-mremap-fix-move_normal_pmd-retract_page_tables-race.patch @@ -0,0 +1,139 @@ +From 6fa1066fc5d00cb9f1b0e83b7ff6ef98d26ba2aa Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Mon, 7 Oct 2024 23:42:04 +0200 +Subject: mm/mremap: fix move_normal_pmd/retract_page_tables race + +From: Jann Horn + +commit 6fa1066fc5d00cb9f1b0e83b7ff6ef98d26ba2aa upstream. + +In mremap(), move_page_tables() looks at the type of the PMD entry and the +specified address range to figure out by which method the next chunk of +page table entries should be moved. + +At that point, the mmap_lock is held in write mode, but no rmap locks are +held yet. For PMD entries that point to page tables and are fully covered +by the source address range, move_pgt_entry(NORMAL_PMD, ...) is called, +which first takes rmap locks, then does move_normal_pmd(). +move_normal_pmd() takes the necessary page table locks at source and +destination, then moves an entire page table from the source to the +destination. + +The problem is: The rmap locks, which protect against concurrent page +table removal by retract_page_tables() in the THP code, are only taken +after the PMD entry has been read and it has been decided how to move it. +So we can race as follows (with two processes that have mappings of the +same tmpfs file that is stored on a tmpfs mount with huge=advise); note +that process A accesses page tables through the MM while process B does it +through the file rmap: + +process A process B +========= ========= +mremap + mremap_to + move_vma + move_page_tables + get_old_pmd + alloc_new_pmd + *** PREEMPT *** + madvise(MADV_COLLAPSE) + do_madvise + madvise_walk_vmas + madvise_vma_behavior + madvise_collapse + hpage_collapse_scan_file + collapse_file + retract_page_tables + i_mmap_lock_read(mapping) + pmdp_collapse_flush + i_mmap_unlock_read(mapping) + move_pgt_entry(NORMAL_PMD, ...) + take_rmap_locks + move_normal_pmd + drop_rmap_locks + +When this happens, move_normal_pmd() can end up creating bogus PMD entries +in the line `pmd_populate(mm, new_pmd, pmd_pgtable(pmd))`. The effect +depends on arch-specific and machine-specific details; on x86, you can end +up with physical page 0 mapped as a page table, which is likely +exploitable for user->kernel privilege escalation. + +Fix the race by letting process B recheck that the PMD still points to a +page table after the rmap locks have been taken. Otherwise, we bail and +let the caller fall back to the PTE-level copying path, which will then +bail immediately at the pmd_none() check. + +Bug reachability: Reaching this bug requires that you can create +shmem/file THP mappings - anonymous THP uses different code that doesn't +zap stuff under rmap locks. File THP is gated on an experimental config +flag (CONFIG_READ_ONLY_THP_FOR_FS), so on normal distro kernels you need +shmem THP to hit this bug. As far as I know, getting shmem THP normally +requires that you can mount your own tmpfs with the right mount flags, +which would require creating your own user+mount namespace; though I don't +know if some distros maybe enable shmem THP by default or something like +that. + +Bug impact: This issue can likely be used for user->kernel privilege +escalation when it is reachable. + +Link: https://lkml.kernel.org/r/20241007-move_normal_pmd-vs-collapse-fix-2-v1-1-5ead9631f2ea@google.com +Fixes: 1d65b771bc08 ("mm/khugepaged: retract_page_tables() without mmap or vma lock") +Signed-off-by: Jann Horn +Signed-off-by: David Hildenbrand +Co-developed-by: David Hildenbrand +Closes: https://project-zero.issues.chromium.org/371047675 +Acked-by: Qi Zheng +Reviewed-by: Lorenzo Stoakes +Cc: Hugh Dickins +Cc: Joel Fernandes +Cc: Matthew Wilcox +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/mremap.c | 11 +++++++++-- + 1 file changed, 9 insertions(+), 2 deletions(-) + +diff --git a/mm/mremap.c b/mm/mremap.c +index 24712f8dbb6b..dda09e957a5d 100644 +--- a/mm/mremap.c ++++ b/mm/mremap.c +@@ -238,6 +238,7 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, + { + spinlock_t *old_ptl, *new_ptl; + struct mm_struct *mm = vma->vm_mm; ++ bool res = false; + pmd_t pmd; + + if (!arch_supports_page_table_move()) +@@ -277,19 +278,25 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr, + if (new_ptl != old_ptl) + spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); + +- /* Clear the pmd */ + pmd = *old_pmd; ++ ++ /* Racing with collapse? */ ++ if (unlikely(!pmd_present(pmd) || pmd_leaf(pmd))) ++ goto out_unlock; ++ /* Clear the pmd */ + pmd_clear(old_pmd); ++ res = true; + + VM_BUG_ON(!pmd_none(*new_pmd)); + + pmd_populate(mm, new_pmd, pmd_pgtable(pmd)); + flush_tlb_range(vma, old_addr, old_addr + PMD_SIZE); ++out_unlock: + if (new_ptl != old_ptl) + spin_unlock(new_ptl); + spin_unlock(old_ptl); + +- return true; ++ return res; + } + #else + static inline bool move_normal_pmd(struct vm_area_struct *vma, +-- +2.47.0 + diff --git a/queue-6.6/mm-swapfile-skip-hugetlb-pages-for-unuse_vma.patch b/queue-6.6/mm-swapfile-skip-hugetlb-pages-for-unuse_vma.patch new file mode 100644 index 00000000000..73092657bcb --- /dev/null +++ b/queue-6.6/mm-swapfile-skip-hugetlb-pages-for-unuse_vma.patch @@ -0,0 +1,46 @@ +From 7528c4fb1237512ee18049f852f014eba80bbe8d Mon Sep 17 00:00:00 2001 +From: Liu Shixin +Date: Tue, 15 Oct 2024 09:45:21 +0800 +Subject: mm/swapfile: skip HugeTLB pages for unuse_vma + +From: Liu Shixin + +commit 7528c4fb1237512ee18049f852f014eba80bbe8d upstream. + +I got a bad pud error and lost a 1GB HugeTLB when calling swapoff. The +problem can be reproduced by the following steps: + + 1. Allocate an anonymous 1GB HugeTLB and some other anonymous memory. + 2. Swapout the above anonymous memory. + 3. run swapoff and we will get a bad pud error in kernel message: + + mm/pgtable-generic.c:42: bad pud 00000000743d215d(84000001400000e7) + +We can tell that pud_clear_bad is called by pud_none_or_clear_bad in +unuse_pud_range() by ftrace. And therefore the HugeTLB pages will never +be freed because we lost it from page table. We can skip HugeTLB pages +for unuse_vma to fix it. + +Link: https://lkml.kernel.org/r/20241015014521.570237-1-liushixin2@huawei.com +Fixes: 0fe6e20b9c4c ("hugetlb, rmap: add reverse mapping for hugepage") +Signed-off-by: Liu Shixin +Acked-by: Muchun Song +Cc: Naoya Horiguchi +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/swapfile.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/swapfile.c ++++ b/mm/swapfile.c +@@ -2003,7 +2003,7 @@ static int unuse_mm(struct mm_struct *mm + + mmap_read_lock(mm); + for_each_vma(vmi, vma) { +- if (vma->anon_vma) { ++ if (vma->anon_vma && !is_vm_hugetlb_page(vma)) { + ret = unuse_vma(vma, type); + if (ret) + break; diff --git a/queue-6.6/selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch b/queue-6.6/selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch new file mode 100644 index 00000000000..838fa6b9ee8 --- /dev/null +++ b/queue-6.6/selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch @@ -0,0 +1,50 @@ +From e142cc87ac4ec618f2ccf5f68aedcd6e28a59d9d Mon Sep 17 00:00:00 2001 +From: Edward Liaw +Date: Thu, 3 Oct 2024 21:17:11 +0000 +Subject: selftests/mm: fix deadlock for fork after pthread_create on ARM + +From: Edward Liaw + +commit e142cc87ac4ec618f2ccf5f68aedcd6e28a59d9d upstream. + +On Android with arm, there is some synchronization needed to avoid a +deadlock when forking after pthread_create. + +Link: https://lkml.kernel.org/r/20241003211716.371786-3-edliaw@google.com +Fixes: cff294582798 ("selftests/mm: extend and rename uffd pagemap test") +Signed-off-by: Edward Liaw +Cc: Lokesh Gidra +Cc: Peter Xu +Cc: Shuah Khan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/mm/uffd-unit-tests.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/tools/testing/selftests/mm/uffd-unit-tests.c ++++ b/tools/testing/selftests/mm/uffd-unit-tests.c +@@ -237,6 +237,9 @@ static void *fork_event_consumer(void *d + fork_event_args *args = data; + struct uffd_msg msg = { 0 }; + ++ /* Ready for parent thread to fork */ ++ pthread_barrier_wait(&ready_for_fork); ++ + /* Read until a full msg received */ + while (uffd_read_msg(args->parent_uffd, &msg)); + +@@ -304,8 +307,12 @@ static int pagemap_test_fork(int uffd, b + + /* Prepare a thread to resolve EVENT_FORK */ + if (with_event) { ++ pthread_barrier_init(&ready_for_fork, NULL, 2); + if (pthread_create(&thread, NULL, fork_event_consumer, &args)) + err("pthread_create()"); ++ /* Wait for child thread to start before forking */ ++ pthread_barrier_wait(&ready_for_fork); ++ pthread_barrier_destroy(&ready_for_fork); + } + + child = fork(); diff --git a/queue-6.6/selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch b/queue-6.6/selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch new file mode 100644 index 00000000000..63f1375b2d2 --- /dev/null +++ b/queue-6.6/selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch @@ -0,0 +1,129 @@ +From e61ef21e27e8deed8c474e9f47f4aa7bc37e138c Mon Sep 17 00:00:00 2001 +From: Edward Liaw +Date: Thu, 3 Oct 2024 21:17:10 +0000 +Subject: selftests/mm: replace atomic_bool with pthread_barrier_t + +From: Edward Liaw + +commit e61ef21e27e8deed8c474e9f47f4aa7bc37e138c upstream. + +Patch series "selftests/mm: fix deadlock after pthread_create". + +On Android arm, pthread_create followed by a fork caused a deadlock in the +case where the fork required work to be completed by the created thread. + +Update the synchronization primitive to use pthread_barrier instead of +atomic_bool. + +Apply the same fix to the wp-fork-with-event test. + + +This patch (of 2): + +Swap synchronization primitive with pthread_barrier, so that stdatomic.h +does not need to be included. + +The synchronization is needed on Android ARM64; we see a deadlock with +pthread_create when the parent thread races forward before the child has a +chance to start doing work. + +Link: https://lkml.kernel.org/r/20241003211716.371786-1-edliaw@google.com +Link: https://lkml.kernel.org/r/20241003211716.371786-2-edliaw@google.com +Fixes: cff294582798 ("selftests/mm: extend and rename uffd pagemap test") +Signed-off-by: Edward Liaw +Cc: Lokesh Gidra +Cc: Peter Xu +Cc: Shuah Khan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/mm/uffd-common.c | 5 +++-- + tools/testing/selftests/mm/uffd-common.h | 3 +-- + tools/testing/selftests/mm/uffd-unit-tests.c | 14 ++++++++------ + 3 files changed, 12 insertions(+), 10 deletions(-) + +--- a/tools/testing/selftests/mm/uffd-common.c ++++ b/tools/testing/selftests/mm/uffd-common.c +@@ -17,7 +17,7 @@ bool map_shared; + bool test_uffdio_wp = true; + unsigned long long *count_verify; + uffd_test_ops_t *uffd_test_ops; +-atomic_bool ready_for_fork; ++pthread_barrier_t ready_for_fork; + + static int uffd_mem_fd_create(off_t mem_size, bool hugetlb) + { +@@ -508,7 +508,8 @@ void *uffd_poll_thread(void *arg) + pollfd[1].fd = pipefd[cpu*2]; + pollfd[1].events = POLLIN; + +- ready_for_fork = true; ++ /* Ready for parent thread to fork */ ++ pthread_barrier_wait(&ready_for_fork); + + for (;;) { + ret = poll(pollfd, 2, -1); +--- a/tools/testing/selftests/mm/uffd-common.h ++++ b/tools/testing/selftests/mm/uffd-common.h +@@ -33,7 +33,6 @@ + #include + #include + #include +-#include + + #include "../kselftest.h" + #include "vm_util.h" +@@ -99,7 +98,7 @@ extern bool map_shared; + extern bool test_uffdio_wp; + extern unsigned long long *count_verify; + extern volatile bool test_uffdio_copy_eexist; +-extern atomic_bool ready_for_fork; ++extern pthread_barrier_t ready_for_fork; + + extern uffd_test_ops_t anon_uffd_test_ops; + extern uffd_test_ops_t shmem_uffd_test_ops; +--- a/tools/testing/selftests/mm/uffd-unit-tests.c ++++ b/tools/testing/selftests/mm/uffd-unit-tests.c +@@ -770,7 +770,7 @@ static void uffd_sigbus_test_common(bool + char c; + struct uffd_args args = { 0 }; + +- ready_for_fork = false; ++ pthread_barrier_init(&ready_for_fork, NULL, 2); + + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + +@@ -787,8 +787,9 @@ static void uffd_sigbus_test_common(bool + if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) + err("uffd_poll_thread create"); + +- while (!ready_for_fork) +- ; /* Wait for the poll_thread to start executing before forking */ ++ /* Wait for child thread to start before forking */ ++ pthread_barrier_wait(&ready_for_fork); ++ pthread_barrier_destroy(&ready_for_fork); + + pid = fork(); + if (pid < 0) +@@ -829,7 +830,7 @@ static void uffd_events_test_common(bool + char c; + struct uffd_args args = { 0 }; + +- ready_for_fork = false; ++ pthread_barrier_init(&ready_for_fork, NULL, 2); + + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + if (uffd_register(uffd, area_dst, nr_pages * page_size, +@@ -840,8 +841,9 @@ static void uffd_events_test_common(bool + if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) + err("uffd_poll_thread create"); + +- while (!ready_for_fork) +- ; /* Wait for the poll_thread to start executing before forking */ ++ /* Wait for child thread to start before forking */ ++ pthread_barrier_wait(&ready_for_fork); ++ pthread_barrier_destroy(&ready_for_fork); + + pid = fork(); + if (pid < 0) diff --git a/queue-6.6/series b/queue-6.6/series index 5b6fb7a1d53..2852ec3c626 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -15,3 +15,9 @@ arm64-probes-fix-uprobes-for-big-endian-kernels.patch net-macb-avoid-20s-boot-delay-by-skipping-mdio-bus-registration-for-fixed-link-phy.patch net-microchip-vcap-api-fix-memory-leaks-in-vcap_api_encode_rule_test.patch irqchip-gic-v3-its-fix-vsync-referencing-an-unmapped-vpe-on-gic-v4.1.patch +fat-fix-uninitialized-variable.patch +selftests-mm-replace-atomic_bool-with-pthread_barrier_t.patch +selftests-mm-fix-deadlock-for-fork-after-pthread_create-on-arm.patch +mm-mremap-fix-move_normal_pmd-retract_page_tables-race.patch +mm-mglru-only-clear-kswapd_failures-if-reclaimable.patch +mm-swapfile-skip-hugetlb-pages-for-unuse_vma.patch