From f0493021a9ae38e5bf4fc1dad8355ae04b920439 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 9 Mar 2025 20:10:47 +0100 Subject: [PATCH] 6.13-stable patches added patches: arm-pgtable-fix-null-pointer-dereference-issue.patch block-fix-conversion-of-gpt-partition-name-to-7-bit.patch bluetooth-add-check-for-mgmt_alloc_skb-in-mgmt_device_connected.patch bluetooth-add-check-for-mgmt_alloc_skb-in-mgmt_remote_name.patch dma-kmsan-export-kmsan_handle_dma-for-modules.patch hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch mm-abort-vma_modify-on-merge-out-of-memory-failure.patch mm-don-t-skip-arch_sync_kernel_mappings-in-error-paths.patch mm-fix-finish_fault-handling-for-large-folios.patch mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio.patch mm-memory-hotplug-check-folio-ref-count-first-in-do_migrate_range.patch mm-page_alloc-fix-uninitialized-variable.patch nfs-fix-nfs_release_folio-to-not-deadlock-via-kcompactd-writeback.patch rapidio-add-check-for-rio_add_net-in-rio_scan_alloc_net.patch rapidio-fix-an-api-misues-when-rio_add_net-fails.patch s390-traps-fix-test_monitor_call-inline-assembly.patch selftests-damon-damon_nr_regions-set-ops-update-for-merge-results-check-to-100ms.patch selftests-damon-damon_nr_regions-sort-collected-regiosn-before-checking-with-min-max-boundaries.patch selftests-damon-damos_quota-make-real-expectation-of-quota-exceeds.patch selftests-damon-damos_quota_goal-handle-minimum-quota-that-cannot-be-further-reduced.patch userfaultfd-do-not-block-on-locking-a-large-folio-with-raised-refcount.patch wifi-cfg80211-regulatory-improve-invalid-hints-checking.patch wifi-nl80211-reject-cooked-mode-if-it-is-set-along-with-other-flags.patch --- ...e-fix-null-pointer-dereference-issue.patch | 144 +++++++++ ...rsion-of-gpt-partition-name-to-7-bit.patch | 74 +++++ ...t_alloc_skb-in-mgmt_device_connected.patch | 33 ++ ...r-mgmt_alloc_skb-in-mgmt_remote_name.patch | 32 ++ ...-export-kmsan_handle_dma-for-modules.patch | 42 +++ ...-folio-before-unmap-hwpoisoned-folio.patch | 85 ++++++ ...odify-on-merge-out-of-memory-failure.patch | 284 ++++++++++++++++++ ..._sync_kernel_mappings-in-error-paths.patch | 65 ++++ ...nish_fault-handling-for-large-folios.patch | 77 +++++ ...ttu-flag-inside-unmap_poisoned_folio.patch | 229 ++++++++++++++ ...-ref-count-first-in-do_migrate_range.patch | 68 +++++ ...age_alloc-fix-uninitialized-variable.patch | 78 +++++ ...not-deadlock-via-kcompactd-writeback.patch | 116 +++++++ ...or-rio_add_net-in-rio_scan_alloc_net.patch | 41 +++ ...an-api-misues-when-rio_add_net-fails.patch | 39 +++ ...ix-test_monitor_call-inline-assembly.patch | 44 +++ ...ate-for-merge-results-check-to-100ms.patch | 46 +++ ...ore-checking-with-min-max-boundaries.patch | 36 +++ ...ke-real-expectation-of-quota-exceeds.patch | 62 ++++ ...quota-that-cannot-be-further-reduced.patch | 43 +++ queue-6.13/series | 23 ++ ...g-a-large-folio-with-raised-refcount.patch | 100 ++++++ ...atory-improve-invalid-hints-checking.patch | 90 ++++++ ...-if-it-is-set-along-with-other-flags.patch | 48 +++ 24 files changed, 1899 insertions(+) create mode 100644 queue-6.13/arm-pgtable-fix-null-pointer-dereference-issue.patch create mode 100644 queue-6.13/block-fix-conversion-of-gpt-partition-name-to-7-bit.patch create mode 100644 queue-6.13/bluetooth-add-check-for-mgmt_alloc_skb-in-mgmt_device_connected.patch create mode 100644 queue-6.13/bluetooth-add-check-for-mgmt_alloc_skb-in-mgmt_remote_name.patch create mode 100644 queue-6.13/dma-kmsan-export-kmsan_handle_dma-for-modules.patch create mode 100644 queue-6.13/hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch create mode 100644 queue-6.13/mm-abort-vma_modify-on-merge-out-of-memory-failure.patch create mode 100644 queue-6.13/mm-don-t-skip-arch_sync_kernel_mappings-in-error-paths.patch create mode 100644 queue-6.13/mm-fix-finish_fault-handling-for-large-folios.patch create mode 100644 queue-6.13/mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio.patch create mode 100644 queue-6.13/mm-memory-hotplug-check-folio-ref-count-first-in-do_migrate_range.patch create mode 100644 queue-6.13/mm-page_alloc-fix-uninitialized-variable.patch create mode 100644 queue-6.13/nfs-fix-nfs_release_folio-to-not-deadlock-via-kcompactd-writeback.patch create mode 100644 queue-6.13/rapidio-add-check-for-rio_add_net-in-rio_scan_alloc_net.patch create mode 100644 queue-6.13/rapidio-fix-an-api-misues-when-rio_add_net-fails.patch create mode 100644 queue-6.13/s390-traps-fix-test_monitor_call-inline-assembly.patch create mode 100644 queue-6.13/selftests-damon-damon_nr_regions-set-ops-update-for-merge-results-check-to-100ms.patch create mode 100644 queue-6.13/selftests-damon-damon_nr_regions-sort-collected-regiosn-before-checking-with-min-max-boundaries.patch create mode 100644 queue-6.13/selftests-damon-damos_quota-make-real-expectation-of-quota-exceeds.patch create mode 100644 queue-6.13/selftests-damon-damos_quota_goal-handle-minimum-quota-that-cannot-be-further-reduced.patch create mode 100644 queue-6.13/userfaultfd-do-not-block-on-locking-a-large-folio-with-raised-refcount.patch create mode 100644 queue-6.13/wifi-cfg80211-regulatory-improve-invalid-hints-checking.patch create mode 100644 queue-6.13/wifi-nl80211-reject-cooked-mode-if-it-is-set-along-with-other-flags.patch diff --git a/queue-6.13/arm-pgtable-fix-null-pointer-dereference-issue.patch b/queue-6.13/arm-pgtable-fix-null-pointer-dereference-issue.patch new file mode 100644 index 0000000000..61dd507a25 --- /dev/null +++ b/queue-6.13/arm-pgtable-fix-null-pointer-dereference-issue.patch @@ -0,0 +1,144 @@ +From a564ccfe300fa6a065beda06ab7f3c140d6b4d63 Mon Sep 17 00:00:00 2001 +From: Qi Zheng +Date: Mon, 17 Feb 2025 10:49:24 +0800 +Subject: arm: pgtable: fix NULL pointer dereference issue + +From: Qi Zheng + +commit a564ccfe300fa6a065beda06ab7f3c140d6b4d63 upstream. + +When update_mmu_cache_range() is called by update_mmu_cache(), the vmf +parameter is NULL, which will cause a NULL pointer dereference issue in +adjust_pte(): + +Unable to handle kernel NULL pointer dereference at virtual address 00000030 when read +Hardware name: Atmel AT91SAM9 +PC is at update_mmu_cache_range+0x1e0/0x278 +LR is at pte_offset_map_rw_nolock+0x18/0x2c +Call trace: + update_mmu_cache_range from remove_migration_pte+0x29c/0x2ec + remove_migration_pte from rmap_walk_file+0xcc/0x130 + rmap_walk_file from remove_migration_ptes+0x90/0xa4 + remove_migration_ptes from migrate_pages_batch+0x6d4/0x858 + migrate_pages_batch from migrate_pages+0x188/0x488 + migrate_pages from compact_zone+0x56c/0x954 + compact_zone from compact_node+0x90/0xf0 + compact_node from kcompactd+0x1d4/0x204 + kcompactd from kthread+0x120/0x12c + kthread from ret_from_fork+0x14/0x38 +Exception stack(0xc0d8bfb0 to 0xc0d8bff8) + +To fix it, do not rely on whether 'ptl' is equal to decide whether to hold +the pte lock, but decide it by whether CONFIG_SPLIT_PTE_PTLOCKS is +enabled. In addition, if two vmas map to the same PTE page, there is no +need to hold the pte lock again, otherwise a deadlock will occur. Just +add the need_lock parameter to let adjust_pte() know this information. + +Link: https://lkml.kernel.org/r/20250217024924.57996-1-zhengqi.arch@bytedance.com +Fixes: fc9c45b71f43 ("arm: adjust_pte() use pte_offset_map_rw_nolock()") +Signed-off-by: Qi Zheng +Reported-by: Ezra Buehler +Closes: https://lore.kernel.org/lkml/CAM1KZSmZ2T_riHvay+7cKEFxoPgeVpHkVFTzVVEQ1BO0cLkHEQ@mail.gmail.com/ +Acked-by: David Hildenbrand +Tested-by: Ezra Buehler +Cc: Hugh Dickins +Cc: Muchun Song +Cc: Qi Zheng +Cc: Russel King +Cc: Ryan Roberts +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/mm/fault-armv.c | 37 +++++++++++++++++++++++++------------ + 1 file changed, 25 insertions(+), 12 deletions(-) + +--- a/arch/arm/mm/fault-armv.c ++++ b/arch/arm/mm/fault-armv.c +@@ -62,7 +62,7 @@ static int do_adjust_pte(struct vm_area_ + } + + static int adjust_pte(struct vm_area_struct *vma, unsigned long address, +- unsigned long pfn, struct vm_fault *vmf) ++ unsigned long pfn, bool need_lock) + { + spinlock_t *ptl; + pgd_t *pgd; +@@ -99,12 +99,11 @@ again: + if (!pte) + return 0; + +- /* +- * If we are using split PTE locks, then we need to take the page +- * lock here. Otherwise we are using shared mm->page_table_lock +- * which is already locked, thus cannot take it. +- */ +- if (ptl != vmf->ptl) { ++ if (need_lock) { ++ /* ++ * Use nested version here to indicate that we are already ++ * holding one similar spinlock. ++ */ + spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); + if (unlikely(!pmd_same(pmdval, pmdp_get_lockless(pmd)))) { + pte_unmap_unlock(pte, ptl); +@@ -114,7 +113,7 @@ again: + + ret = do_adjust_pte(vma, address, pfn, pte); + +- if (ptl != vmf->ptl) ++ if (need_lock) + spin_unlock(ptl); + pte_unmap(pte); + +@@ -123,9 +122,10 @@ again: + + static void + make_coherent(struct address_space *mapping, struct vm_area_struct *vma, +- unsigned long addr, pte_t *ptep, unsigned long pfn, +- struct vm_fault *vmf) ++ unsigned long addr, pte_t *ptep, unsigned long pfn) + { ++ const unsigned long pmd_start_addr = ALIGN_DOWN(addr, PMD_SIZE); ++ const unsigned long pmd_end_addr = pmd_start_addr + PMD_SIZE; + struct mm_struct *mm = vma->vm_mm; + struct vm_area_struct *mpnt; + unsigned long offset; +@@ -142,6 +142,14 @@ make_coherent(struct address_space *mapp + flush_dcache_mmap_lock(mapping); + vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) { + /* ++ * If we are using split PTE locks, then we need to take the pte ++ * lock. Otherwise we are using shared mm->page_table_lock which ++ * is already locked, thus cannot take it. ++ */ ++ bool need_lock = IS_ENABLED(CONFIG_SPLIT_PTE_PTLOCKS); ++ unsigned long mpnt_addr; ++ ++ /* + * If this VMA is not in our MM, we can ignore it. + * Note that we intentionally mask out the VMA + * that we are fixing up. +@@ -151,7 +159,12 @@ make_coherent(struct address_space *mapp + if (!(mpnt->vm_flags & VM_MAYSHARE)) + continue; + offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT; +- aliases += adjust_pte(mpnt, mpnt->vm_start + offset, pfn, vmf); ++ mpnt_addr = mpnt->vm_start + offset; ++ ++ /* Avoid deadlocks by not grabbing the same PTE lock again. */ ++ if (mpnt_addr >= pmd_start_addr && mpnt_addr < pmd_end_addr) ++ need_lock = false; ++ aliases += adjust_pte(mpnt, mpnt_addr, pfn, need_lock); + } + flush_dcache_mmap_unlock(mapping); + if (aliases) +@@ -194,7 +207,7 @@ void update_mmu_cache_range(struct vm_fa + __flush_dcache_folio(mapping, folio); + if (mapping) { + if (cache_is_vivt()) +- make_coherent(mapping, vma, addr, ptep, pfn, vmf); ++ make_coherent(mapping, vma, addr, ptep, pfn); + else if (vma->vm_flags & VM_EXEC) + __flush_icache_all(); + } diff --git a/queue-6.13/block-fix-conversion-of-gpt-partition-name-to-7-bit.patch b/queue-6.13/block-fix-conversion-of-gpt-partition-name-to-7-bit.patch new file mode 100644 index 0000000000..510d7d2518 --- /dev/null +++ b/queue-6.13/block-fix-conversion-of-gpt-partition-name-to-7-bit.patch @@ -0,0 +1,74 @@ +From e06472bab2a5393430cc2fbc3211cd3602422c1e Mon Sep 17 00:00:00 2001 +From: Olivier Gayot +Date: Wed, 5 Mar 2025 10:21:54 +0800 +Subject: block: fix conversion of GPT partition name to 7-bit +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Olivier Gayot + +commit e06472bab2a5393430cc2fbc3211cd3602422c1e upstream. + +The utf16_le_to_7bit function claims to, naively, convert a UTF-16 +string to a 7-bit ASCII string. By naively, we mean that it: + * drops the first byte of every character in the original UTF-16 string + * checks if all characters are printable, and otherwise replaces them + by exclamation mark "!". + +This means that theoretically, all characters outside the 7-bit ASCII +range should be replaced by another character. Examples: + + * lower-case alpha (ɒ) 0x0252 becomes 0x52 (R) + * ligature OE (œ) 0x0153 becomes 0x53 (S) + * hangul letter pieup (ㅂ) 0x3142 becomes 0x42 (B) + * upper-case gamma (Ɣ) 0x0194 becomes 0x94 (not printable) so gets + replaced by "!" + +The result of this conversion for the GPT partition name is passed to +user-space as PARTNAME via udev, which is confusing and feels questionable. + +However, there is a flaw in the conversion function itself. By dropping +one byte of each character and using isprint() to check if the remaining +byte corresponds to a printable character, we do not actually guarantee +that the resulting character is 7-bit ASCII. + +This happens because we pass 8-bit characters to isprint(), which +in the kernel returns 1 for many values > 0x7f - as defined in ctype.c. + +This results in many values which should be replaced by "!" to be kept +as-is, despite not being valid 7-bit ASCII. Examples: + + * e with acute accent (é) 0x00E9 becomes 0xE9 - kept as-is because + isprint(0xE9) returns 1. + * euro sign (€) 0x20AC becomes 0xAC - kept as-is because isprint(0xAC) + returns 1. + +This way has broken pyudev utility[1], fixes it by using a mask of 7 bits +instead of 8 bits before calling isprint. + +Link: https://github.com/pyudev/pyudev/issues/490#issuecomment-2685794648 [1] +Link: https://lore.kernel.org/linux-block/4cac90c2-e414-4ebb-ae62-2a4589d9dc6e@canonical.com/ +Cc: Mulhern +Cc: Davidlohr Bueso +Cc: stable@vger.kernel.org +Signed-off-by: Olivier Gayot +Signed-off-by: Ming Lei +Link: https://lore.kernel.org/r/20250305022154.3903128-1-ming.lei@redhat.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + block/partitions/efi.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/block/partitions/efi.c ++++ b/block/partitions/efi.c +@@ -682,7 +682,7 @@ static void utf16_le_to_7bit(const __le1 + out[size] = 0; + + while (i < size) { +- u8 c = le16_to_cpu(in[i]) & 0xff; ++ u8 c = le16_to_cpu(in[i]) & 0x7f; + + if (c && !isprint(c)) + c = '!'; diff --git a/queue-6.13/bluetooth-add-check-for-mgmt_alloc_skb-in-mgmt_device_connected.patch b/queue-6.13/bluetooth-add-check-for-mgmt_alloc_skb-in-mgmt_device_connected.patch new file mode 100644 index 0000000000..075b11ea6c --- /dev/null +++ b/queue-6.13/bluetooth-add-check-for-mgmt_alloc_skb-in-mgmt_device_connected.patch @@ -0,0 +1,33 @@ +From d8df010f72b8a32aaea393e36121738bb53ed905 Mon Sep 17 00:00:00 2001 +From: Haoxiang Li +Date: Fri, 21 Feb 2025 16:58:01 +0800 +Subject: Bluetooth: Add check for mgmt_alloc_skb() in mgmt_device_connected() + +From: Haoxiang Li + +commit d8df010f72b8a32aaea393e36121738bb53ed905 upstream. + +Add check for the return value of mgmt_alloc_skb() in +mgmt_device_connected() to prevent null pointer dereference. + +Fixes: e96741437ef0 ("Bluetooth: mgmt: Make use of mgmt_send_event_skb in MGMT_EV_DEVICE_CONNECTED") +Cc: stable@vger.kernel.org +Signed-off-by: Haoxiang Li +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/mgmt.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/bluetooth/mgmt.c ++++ b/net/bluetooth/mgmt.c +@@ -9791,6 +9791,9 @@ void mgmt_device_connected(struct hci_de + sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0) + + eir_precalc_len(sizeof(conn->dev_class))); + ++ if (!skb) ++ return; ++ + ev = skb_put(skb, sizeof(*ev)); + bacpy(&ev->addr.bdaddr, &conn->dst); + ev->addr.type = link_to_bdaddr(conn->type, conn->dst_type); diff --git a/queue-6.13/bluetooth-add-check-for-mgmt_alloc_skb-in-mgmt_remote_name.patch b/queue-6.13/bluetooth-add-check-for-mgmt_alloc_skb-in-mgmt_remote_name.patch new file mode 100644 index 0000000000..1ad046e788 --- /dev/null +++ b/queue-6.13/bluetooth-add-check-for-mgmt_alloc_skb-in-mgmt_remote_name.patch @@ -0,0 +1,32 @@ +From f2176a07e7b19f73e05c805cf3d130a2999154cb Mon Sep 17 00:00:00 2001 +From: Haoxiang Li +Date: Fri, 21 Feb 2025 16:49:47 +0800 +Subject: Bluetooth: Add check for mgmt_alloc_skb() in mgmt_remote_name() + +From: Haoxiang Li + +commit f2176a07e7b19f73e05c805cf3d130a2999154cb upstream. + +Add check for the return value of mgmt_alloc_skb() in +mgmt_remote_name() to prevent null pointer dereference. + +Fixes: ba17bb62ce41 ("Bluetooth: Fix skb allocation in mgmt_remote_name() & mgmt_device_connected()") +Cc: stable@vger.kernel.org +Signed-off-by: Haoxiang Li +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/mgmt.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/bluetooth/mgmt.c ++++ b/net/bluetooth/mgmt.c +@@ -10544,6 +10544,8 @@ void mgmt_remote_name(struct hci_dev *hd + + skb = mgmt_alloc_skb(hdev, MGMT_EV_DEVICE_FOUND, + sizeof(*ev) + (name ? eir_precalc_len(name_len) : 0)); ++ if (!skb) ++ return; + + ev = skb_put(skb, sizeof(*ev)); + bacpy(&ev->addr.bdaddr, bdaddr); diff --git a/queue-6.13/dma-kmsan-export-kmsan_handle_dma-for-modules.patch b/queue-6.13/dma-kmsan-export-kmsan_handle_dma-for-modules.patch new file mode 100644 index 0000000000..129a76db27 --- /dev/null +++ b/queue-6.13/dma-kmsan-export-kmsan_handle_dma-for-modules.patch @@ -0,0 +1,42 @@ +From 19fac3c93991502a22c5132824c40b6a2e64b136 Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior +Date: Tue, 18 Feb 2025 10:14:11 +0100 +Subject: dma: kmsan: export kmsan_handle_dma() for modules + +From: Sebastian Andrzej Siewior + +commit 19fac3c93991502a22c5132824c40b6a2e64b136 upstream. + +kmsan_handle_dma() is used by virtio_ring() which can be built as a +module. kmsan_handle_dma() needs to be exported otherwise building the +virtio_ring fails. + +Export kmsan_handle_dma for modules. + +Link: https://lkml.kernel.org/r/20250218091411.MMS3wBN9@linutronix.de +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-kbuild-all/202502150634.qjxwSeJR-lkp@intel.com/ +Fixes: 7ade4f10779c ("dma: kmsan: unpoison DMA mappings") +Signed-off-by: Sebastian Andrzej Siewior +Cc: Alexander Potapenko +Cc: Dmitriy Vyukov +Cc: Macro Elver +Cc: Peter Zijlstra (Intel) +Cc: Thomas Gleixner +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/kmsan/hooks.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/kmsan/hooks.c ++++ b/mm/kmsan/hooks.c +@@ -357,6 +357,7 @@ void kmsan_handle_dma(struct page *page, + size -= to_go; + } + } ++EXPORT_SYMBOL_GPL(kmsan_handle_dma); + + void kmsan_handle_dma_sg(struct scatterlist *sg, int nents, + enum dma_data_direction dir) diff --git a/queue-6.13/hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch b/queue-6.13/hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch new file mode 100644 index 0000000000..272587c3e8 --- /dev/null +++ b/queue-6.13/hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch @@ -0,0 +1,85 @@ +From af288a426c3e3552b62595c6138ec6371a17dbba Mon Sep 17 00:00:00 2001 +From: Ma Wupeng +Date: Mon, 17 Feb 2025 09:43:29 +0800 +Subject: hwpoison, memory_hotplug: lock folio before unmap hwpoisoned folio + +From: Ma Wupeng + +commit af288a426c3e3552b62595c6138ec6371a17dbba upstream. + +Commit b15c87263a69 ("hwpoison, memory_hotplug: allow hwpoisoned pages to +be offlined) add page poison checks in do_migrate_range in order to make +offline hwpoisoned page possible by introducing isolate_lru_page and +try_to_unmap for hwpoisoned page. However folio lock must be held before +calling try_to_unmap. Add it to fix this problem. + +Warning will be produced if folio is not locked during unmap: + + ------------[ cut here ]------------ + kernel BUG at ./include/linux/swapops.h:400! + Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP + Modules linked in: + CPU: 4 UID: 0 PID: 411 Comm: bash Tainted: G W 6.13.0-rc1-00016-g3c434c7ee82a-dirty #41 + Tainted: [W]=WARN + Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 + pstate: 40400005 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) + pc : try_to_unmap_one+0xb08/0xd3c + lr : try_to_unmap_one+0x3dc/0xd3c + Call trace: + try_to_unmap_one+0xb08/0xd3c (P) + try_to_unmap_one+0x3dc/0xd3c (L) + rmap_walk_anon+0xdc/0x1f8 + rmap_walk+0x3c/0x58 + try_to_unmap+0x88/0x90 + unmap_poisoned_folio+0x30/0xa8 + do_migrate_range+0x4a0/0x568 + offline_pages+0x5a4/0x670 + memory_block_action+0x17c/0x374 + memory_subsys_offline+0x3c/0x78 + device_offline+0xa4/0xd0 + state_store+0x8c/0xf0 + dev_attr_store+0x18/0x2c + sysfs_kf_write+0x44/0x54 + kernfs_fop_write_iter+0x118/0x1a8 + vfs_write+0x3a8/0x4bc + ksys_write+0x6c/0xf8 + __arm64_sys_write+0x1c/0x28 + invoke_syscall+0x44/0x100 + el0_svc_common.constprop.0+0x40/0xe0 + do_el0_svc+0x1c/0x28 + el0_svc+0x30/0xd0 + el0t_64_sync_handler+0xc8/0xcc + el0t_64_sync+0x198/0x19c + Code: f9407be0 b5fff320 d4210000 17ffff97 (d4210000) + ---[ end trace 0000000000000000 ]--- + +Link: https://lkml.kernel.org/r/20250217014329.3610326-4-mawupeng1@huawei.com +Fixes: b15c87263a69 ("hwpoison, memory_hotplug: allow hwpoisoned pages to be offlined") +Signed-off-by: Ma Wupeng +Acked-by: David Hildenbrand +Acked-by: Miaohe Lin +Cc: Michal Hocko +Cc: Naoya Horiguchi +Cc: Oscar Salvador +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory_hotplug.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -1805,8 +1805,11 @@ static void do_migrate_range(unsigned lo + (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) { + if (WARN_ON(folio_test_lru(folio))) + folio_isolate_lru(folio); +- if (folio_mapped(folio)) ++ if (folio_mapped(folio)) { ++ folio_lock(folio); + unmap_poisoned_folio(folio, pfn, false); ++ folio_unlock(folio); ++ } + + continue; + } diff --git a/queue-6.13/mm-abort-vma_modify-on-merge-out-of-memory-failure.patch b/queue-6.13/mm-abort-vma_modify-on-merge-out-of-memory-failure.patch new file mode 100644 index 0000000000..58945c9741 --- /dev/null +++ b/queue-6.13/mm-abort-vma_modify-on-merge-out-of-memory-failure.patch @@ -0,0 +1,284 @@ +From 47b16d0462a460000b8f05dfb1292377ac48f3ca Mon Sep 17 00:00:00 2001 +From: Lorenzo Stoakes +Date: Sat, 22 Feb 2025 16:19:52 +0000 +Subject: mm: abort vma_modify() on merge out of memory failure + +From: Lorenzo Stoakes + +commit 47b16d0462a460000b8f05dfb1292377ac48f3ca upstream. + +The remainder of vma_modify() relies upon the vmg state remaining pristine +after a merge attempt. + +Usually this is the case, however in the one edge case scenario of a merge +attempt failing not due to the specified range being unmergeable, but +rather due to an out of memory error arising when attempting to commit the +merge, this assumption becomes untrue. + +This results in vmg->start, end being modified, and thus the proceeding +attempts to split the VMA will be done with invalid start/end values. + +Thankfully, it is likely practically impossible for us to hit this in +reality, as it would require a maple tree node pre-allocation failure that +would likely never happen due to it being 'too small to fail', i.e. the +kernel would simply keep retrying reclaim until it succeeded. + +However, this scenario remains theoretically possible, and what we are +doing here is wrong so we must correct it. + +The safest option is, when this scenario occurs, to simply give up the +operation. If we cannot allocate memory to merge, then we cannot allocate +memory to split either (perhaps moreso!). + +Any scenario where this would be happening would be under very extreme +(likely fatal) memory pressure, so it's best we give up early. + +So there is no doubt it is appropriate to simply bail out in this +scenario. + +However, in general we must if at all possible never assume VMG state is +stable after a merge attempt, since merge operations update VMG fields. +As a result, additionally also make this clear by storing start, end in +local variables. + +The issue was reported originally by syzkaller, and by Brad Spengler (via +an off-list discussion), and in both instances it manifested as a +triggering of the assert: + + VM_WARN_ON_VMG(start >= end, vmg); + +In vma_merge_existing_range(). + +It seems at least one scenario in which this is occurring is one in which +the merge being attempted is due to an madvise() across multiple VMAs +which looks like this: + + start end + |<------>| + |----------|------| + | vma | next | + |----------|------| + +When madvise_walk_vmas() is invoked, we first find vma in the above +(determining prev to be equal to vma as we are offset into vma), and then +enter the loop. + +We determine the end of vma that forms part of the range we are +madvise()'ing by setting 'tmp' to this value: + + /* Here vma->vm_start <= start < (end|vma->vm_end) */ + tmp = vma->vm_end; + +We then invoke the madvise() operation via visit(), letting prev get +updated to point to vma as part of the operation: + + /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ + error = visit(vma, &prev, start, tmp, arg); + +Where the visit() function pointer in this instance is +madvise_vma_behavior(). + +As observed in syzkaller reports, it is ultimately madvise_update_vma() +that is invoked, calling vma_modify_flags_name() and vma_modify() in turn. + +Then, in vma_modify(), we attempt the merge: + + merged = vma_merge_existing_range(vmg); + if (merged) + return merged; + +We invoke this with vmg->start, end set to start, tmp as such: + + start tmp + |<--->| + |----------|------| + | vma | next | + |----------|------| + +We find ourselves in the merge right scenario, but the one in which we +cannot remove the middle (we are offset into vma). + +Here we have a special case where vmg->start, end get set to perhaps +unintuitive values - we intended to shrink the middle VMA and expand the +next. + +This means vmg->start, end are set to... vma->vm_start, start. + +Now the commit_merge() fails, and vmg->start, end are left like this. +This means we return to the rest of vma_modify() with vmg->start, end +(here denoted as start', end') set as: + + start' end' + |<-->| + |----------|------| + | vma | next | + |----------|------| + +So we now erroneously try to split accordingly. This is where the +unfortunate stuff begins. + +We start with: + + /* Split any preceding portion of the VMA. */ + if (vma->vm_start < vmg->start) { + ... + } + +This doesn't trigger as we are no longer offset into vma at the start. + +But then we invoke: + + /* Split any trailing portion of the VMA. */ + if (vma->vm_end > vmg->end) { + ... + } + +Which does get invoked. This leaves us with: + + start' end' + |<-->| + |----|-----|------| + | vma| new | next | + |----|-----|------| + +We then return ultimately to madvise_walk_vmas(). Here 'new' is unknown, +and putting back the values known in this function we are faced with: + + start tmp end + | | | + |----|-----|------| + | vma| new | next | + |----|-----|------| + prev + +Then: + + start = tmp; + +So: + + start end + | | + |----|-----|------| + | vma| new | next | + |----|-----|------| + prev + +The following code does not cause anything to happen: + + if (prev && start < prev->vm_end) + start = prev->vm_end; + if (start >= end) + break; + +And then we invoke: + + if (prev) + vma = find_vma(mm, prev->vm_end); + +Which is where a problem occurs - we don't know about 'new' so we +essentially look for the vma after prev, which is new, whereas we actually +intended to discover next! + +So we end up with: + + start end + | | + |----|-----|------| + |prev| vma | next | + |----|-----|------| + +And we have successfully bypassed all of the checks madvise_walk_vmas() +has to ensure early exit should we end up moving out of range. + +We loop around, and hit: + + /* Here vma->vm_start <= start < (end|vma->vm_end) */ + tmp = vma->vm_end; + +Oh dear. Now we have: + + tmp + start end + | | + |----|-----|------| + |prev| vma | next | + |----|-----|------| + +We then invoke: + + /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ + error = visit(vma, &prev, start, tmp, arg); + +Where start == tmp. That is, a zero range. This is not good. + +We invoke visit() which is madvise_vma_behavior() which does not check the +range (for good reason, it assumes all checks have been done before it was +called), which in turn finally calls madvise_update_vma(). + +The madvise_update_vma() function calls vma_modify_flags_name() in turn, +which ultimately invokes vma_modify() with... start == end. + +vma_modify() calls vma_merge_existing_range() and finally we hit: + + VM_WARN_ON_VMG(start >= end, vmg); + +Which triggers, as start == end. + +While it might be useful to add some CONFIG_DEBUG_VM asserts in these +instances to catch this kind of error, since we have just eliminated any +possibility of that happening, we will add such asserts separately as to +reduce churn and aid backporting. + +Link: https://lkml.kernel.org/r/20250222161952.41957-1-lorenzo.stoakes@oracle.com +Fixes: 2f1c6611b0a8 ("mm: introduce vma_merge_struct and abstract vma_merge(),vma_modify()") +Signed-off-by: Lorenzo Stoakes +Tested-by: Brad Spengler +Reported-by: Brad Spengler +Reported-by: syzbot+46423ed8fa1f1148c6e4@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/linux-mm/6774c98f.050a0220.25abdd.0991.GAE@google.com/ +Cc: Jann Horn +Cc: Liam Howlett +Cc: Vlastimil Babka +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vma.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/mm/vma.c ++++ b/mm/vma.c +@@ -1508,24 +1508,28 @@ int do_vmi_munmap(struct vma_iterator *v + static struct vm_area_struct *vma_modify(struct vma_merge_struct *vmg) + { + struct vm_area_struct *vma = vmg->vma; ++ unsigned long start = vmg->start; ++ unsigned long end = vmg->end; + struct vm_area_struct *merged; + + /* First, try to merge. */ + merged = vma_merge_existing_range(vmg); + if (merged) + return merged; ++ if (vmg_nomem(vmg)) ++ return ERR_PTR(-ENOMEM); + + /* Split any preceding portion of the VMA. */ +- if (vma->vm_start < vmg->start) { +- int err = split_vma(vmg->vmi, vma, vmg->start, 1); ++ if (vma->vm_start < start) { ++ int err = split_vma(vmg->vmi, vma, start, 1); + + if (err) + return ERR_PTR(err); + } + + /* Split any trailing portion of the VMA. */ +- if (vma->vm_end > vmg->end) { +- int err = split_vma(vmg->vmi, vma, vmg->end, 0); ++ if (vma->vm_end > end) { ++ int err = split_vma(vmg->vmi, vma, end, 0); + + if (err) + return ERR_PTR(err); diff --git a/queue-6.13/mm-don-t-skip-arch_sync_kernel_mappings-in-error-paths.patch b/queue-6.13/mm-don-t-skip-arch_sync_kernel_mappings-in-error-paths.patch new file mode 100644 index 0000000000..cd6f0bfcc6 --- /dev/null +++ b/queue-6.13/mm-don-t-skip-arch_sync_kernel_mappings-in-error-paths.patch @@ -0,0 +1,65 @@ +From 3685024edd270f7c791f993157d65d3c928f3d6e Mon Sep 17 00:00:00 2001 +From: Ryan Roberts +Date: Wed, 26 Feb 2025 12:16:09 +0000 +Subject: mm: don't skip arch_sync_kernel_mappings() in error paths + +From: Ryan Roberts + +commit 3685024edd270f7c791f993157d65d3c928f3d6e upstream. + +Fix callers that previously skipped calling arch_sync_kernel_mappings() if +an error occurred during a pgtable update. The call is still required to +sync any pgtable updates that may have occurred prior to hitting the error +condition. + +These are theoretical bugs discovered during code review. + +Link: https://lkml.kernel.org/r/20250226121610.2401743-1-ryan.roberts@arm.com +Fixes: 2ba3e6947aed ("mm/vmalloc: track which page-table levels were modified") +Fixes: 0c95cba49255 ("mm: apply_to_pte_range warn and fail if a large pte is encountered") +Signed-off-by: Ryan Roberts +Reviewed-by: Anshuman Khandual +Reviewed-by: Catalin Marinas +Cc: Christop Hellwig +Cc: "Uladzislau Rezki (Sony)" +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory.c | 6 ++++-- + mm/vmalloc.c | 4 ++-- + 2 files changed, 6 insertions(+), 4 deletions(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -2971,8 +2971,10 @@ static int __apply_to_page_range(struct + next = pgd_addr_end(addr, end); + if (pgd_none(*pgd) && !create) + continue; +- if (WARN_ON_ONCE(pgd_leaf(*pgd))) +- return -EINVAL; ++ if (WARN_ON_ONCE(pgd_leaf(*pgd))) { ++ err = -EINVAL; ++ break; ++ } + if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) { + if (!create) + continue; +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -586,13 +586,13 @@ static int vmap_small_pages_range_noflus + mask |= PGTBL_PGD_MODIFIED; + err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask); + if (err) +- return err; ++ break; + } while (pgd++, addr = next, addr != end); + + if (mask & ARCH_PAGE_TABLE_SYNC_MASK) + arch_sync_kernel_mappings(start, end); + +- return 0; ++ return err; + } + + /* diff --git a/queue-6.13/mm-fix-finish_fault-handling-for-large-folios.patch b/queue-6.13/mm-fix-finish_fault-handling-for-large-folios.patch new file mode 100644 index 0000000000..a5a15f9bb6 --- /dev/null +++ b/queue-6.13/mm-fix-finish_fault-handling-for-large-folios.patch @@ -0,0 +1,77 @@ +From 34b82f33cf3f03bc39e9a205a913d790e1520ade Mon Sep 17 00:00:00 2001 +From: Brian Geffon +Date: Wed, 26 Feb 2025 11:23:41 -0500 +Subject: mm: fix finish_fault() handling for large folios + +From: Brian Geffon + +commit 34b82f33cf3f03bc39e9a205a913d790e1520ade upstream. + +When handling faults for anon shmem finish_fault() will attempt to install +ptes for the entire folio. Unfortunately if it encounters a single +non-pte_none entry in that range it will bail, even if the pte that +triggered the fault is still pte_none. When this situation happens the +fault will be retried endlessly never making forward progress. + +This patch fixes this behavior and if it detects that a pte in the range +is not pte_none it will fall back to setting a single pte. + +[bgeffon@google.com: tweak whitespace] + Link: https://lkml.kernel.org/r/20250227133236.1296853-1-bgeffon@google.com +Link: https://lkml.kernel.org/r/20250226162341.915535-1-bgeffon@google.com +Fixes: 43e027e41423 ("mm: memory: extend finish_fault() to support large folio") +Signed-off-by: Brian Geffon +Suggested-by: Baolin Wang +Reported-by: Marek Maslanka +Cc: Hugh Dickins +Cc: David Hildenbrand +Cc: Hugh Dickens +Cc: Kefeng Wang +Cc: Matthew Wilcow (Oracle) +Cc: Suren Baghdasaryan +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -5104,7 +5104,11 @@ vm_fault_t finish_fault(struct vm_fault + bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) && + !(vma->vm_flags & VM_SHARED); + int type, nr_pages; +- unsigned long addr = vmf->address; ++ unsigned long addr; ++ bool needs_fallback = false; ++ ++fallback: ++ addr = vmf->address; + + /* Did we COW the page? */ + if (is_cow) +@@ -5143,7 +5147,8 @@ vm_fault_t finish_fault(struct vm_fault + * approach also applies to non-anonymous-shmem faults to avoid + * inflating the RSS of the process. + */ +- if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma))) { ++ if (!vma_is_anon_shmem(vma) || unlikely(userfaultfd_armed(vma)) || ++ unlikely(needs_fallback)) { + nr_pages = 1; + } else if (nr_pages > 1) { + pgoff_t idx = folio_page_idx(folio, page); +@@ -5179,9 +5184,9 @@ vm_fault_t finish_fault(struct vm_fault + ret = VM_FAULT_NOPAGE; + goto unlock; + } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) { +- update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages); +- ret = VM_FAULT_NOPAGE; +- goto unlock; ++ needs_fallback = true; ++ pte_unmap_unlock(vmf->pte, vmf->ptl); ++ goto fallback; + } + + folio_ref_add(folio, nr_pages - 1); diff --git a/queue-6.13/mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio.patch b/queue-6.13/mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio.patch new file mode 100644 index 0000000000..d4d963e73b --- /dev/null +++ b/queue-6.13/mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio.patch @@ -0,0 +1,229 @@ +From b81679b1633aa43c0d973adfa816d78c1ed0d032 Mon Sep 17 00:00:00 2001 +From: Ma Wupeng +Date: Mon, 17 Feb 2025 09:43:27 +0800 +Subject: mm: memory-failure: update ttu flag inside unmap_poisoned_folio + +From: Ma Wupeng + +commit b81679b1633aa43c0d973adfa816d78c1ed0d032 upstream. + +Patch series "mm: memory_failure: unmap poisoned folio during migrate +properly", v3. + +Fix two bugs during folio migration if the folio is poisoned. + + +This patch (of 3): + +Commit 6da6b1d4a7df ("mm/hwpoison: convert TTU_IGNORE_HWPOISON to +TTU_HWPOISON") introduce TTU_HWPOISON to replace TTU_IGNORE_HWPOISON in +order to stop send SIGBUS signal when accessing an error page after a +memory error on a clean folio. However during page migration, anon folio +must be set with TTU_HWPOISON during unmap_*(). For pagecache we need +some policy just like the one in hwpoison_user_mappings to set this flag. +So move this policy from hwpoison_user_mappings to unmap_poisoned_folio to +handle this warning properly. + +Warning will be produced during unamp poison folio with the following log: + + ------------[ cut here ]------------ + WARNING: CPU: 1 PID: 365 at mm/rmap.c:1847 try_to_unmap_one+0x8fc/0xd3c + Modules linked in: + CPU: 1 UID: 0 PID: 365 Comm: bash Tainted: G W 6.13.0-rc1-00018-gacdb4bbda7ab #42 + Tainted: [W]=WARN + Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 + pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) + pc : try_to_unmap_one+0x8fc/0xd3c + lr : try_to_unmap_one+0x3dc/0xd3c + Call trace: + try_to_unmap_one+0x8fc/0xd3c (P) + try_to_unmap_one+0x3dc/0xd3c (L) + rmap_walk_anon+0xdc/0x1f8 + rmap_walk+0x3c/0x58 + try_to_unmap+0x88/0x90 + unmap_poisoned_folio+0x30/0xa8 + do_migrate_range+0x4a0/0x568 + offline_pages+0x5a4/0x670 + memory_block_action+0x17c/0x374 + memory_subsys_offline+0x3c/0x78 + device_offline+0xa4/0xd0 + state_store+0x8c/0xf0 + dev_attr_store+0x18/0x2c + sysfs_kf_write+0x44/0x54 + kernfs_fop_write_iter+0x118/0x1a8 + vfs_write+0x3a8/0x4bc + ksys_write+0x6c/0xf8 + __arm64_sys_write+0x1c/0x28 + invoke_syscall+0x44/0x100 + el0_svc_common.constprop.0+0x40/0xe0 + do_el0_svc+0x1c/0x28 + el0_svc+0x30/0xd0 + el0t_64_sync_handler+0xc8/0xcc + el0t_64_sync+0x198/0x19c + ---[ end trace 0000000000000000 ]--- + +[mawupeng1@huawei.com: unmap_poisoned_folio(): remove shadowed local `mapping', per Miaohe] + Link: https://lkml.kernel.org/r/20250219060653.3849083-1-mawupeng1@huawei.com +Link: https://lkml.kernel.org/r/20250217014329.3610326-1-mawupeng1@huawei.com +Link: https://lkml.kernel.org/r/20250217014329.3610326-2-mawupeng1@huawei.com +Fixes: 6da6b1d4a7df ("mm/hwpoison: convert TTU_IGNORE_HWPOISON to TTU_HWPOISON") +Signed-off-by: Ma Wupeng +Suggested-by: David Hildenbrand +Acked-by: David Hildenbrand +Acked-by: Miaohe Lin +Cc: Ma Wupeng +Cc: Michal Hocko +Cc: Naoya Horiguchi +Cc: Oscar Salvador +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/internal.h | 5 ++-- + mm/memory-failure.c | 63 +++++++++++++++++++++++++--------------------------- + mm/memory_hotplug.c | 3 +- + 3 files changed, 36 insertions(+), 35 deletions(-) + +--- a/mm/internal.h ++++ b/mm/internal.h +@@ -1102,7 +1102,7 @@ static inline int find_next_best_node(in + * mm/memory-failure.c + */ + #ifdef CONFIG_MEMORY_FAILURE +-void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu); ++int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill); + void shake_folio(struct folio *folio); + extern int hwpoison_filter(struct page *p); + +@@ -1125,8 +1125,9 @@ unsigned long page_mapped_in_vma(const s + struct vm_area_struct *vma); + + #else +-static inline void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) ++static inline int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill) + { ++ return -EBUSY; + } + #endif + +--- a/mm/memory-failure.c ++++ b/mm/memory-failure.c +@@ -1556,11 +1556,35 @@ static int get_hwpoison_page(struct page + return ret; + } + +-void unmap_poisoned_folio(struct folio *folio, enum ttu_flags ttu) ++int unmap_poisoned_folio(struct folio *folio, unsigned long pfn, bool must_kill) + { +- if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) { +- struct address_space *mapping; ++ enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON; ++ struct address_space *mapping; ++ ++ if (folio_test_swapcache(folio)) { ++ pr_err("%#lx: keeping poisoned page in swap cache\n", pfn); ++ ttu &= ~TTU_HWPOISON; ++ } ++ ++ /* ++ * Propagate the dirty bit from PTEs to struct page first, because we ++ * need this to decide if we should kill or just drop the page. ++ * XXX: the dirty test could be racy: set_page_dirty() may not always ++ * be called inside page lock (it's recommended but not enforced). ++ */ ++ mapping = folio_mapping(folio); ++ if (!must_kill && !folio_test_dirty(folio) && mapping && ++ mapping_can_writeback(mapping)) { ++ if (folio_mkclean(folio)) { ++ folio_set_dirty(folio); ++ } else { ++ ttu &= ~TTU_HWPOISON; ++ pr_info("%#lx: corrupted page was clean: dropped without side effects\n", ++ pfn); ++ } ++ } + ++ if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) { + /* + * For hugetlb folios in shared mappings, try_to_unmap + * could potentially call huge_pmd_unshare. Because of +@@ -1572,7 +1596,7 @@ void unmap_poisoned_folio(struct folio * + if (!mapping) { + pr_info("%#lx: could not lock mapping for mapped hugetlb folio\n", + folio_pfn(folio)); +- return; ++ return -EBUSY; + } + + try_to_unmap(folio, ttu|TTU_RMAP_LOCKED); +@@ -1580,6 +1604,8 @@ void unmap_poisoned_folio(struct folio * + } else { + try_to_unmap(folio, ttu); + } ++ ++ return folio_mapped(folio) ? -EBUSY : 0; + } + + /* +@@ -1589,8 +1615,6 @@ void unmap_poisoned_folio(struct folio * + static bool hwpoison_user_mappings(struct folio *folio, struct page *p, + unsigned long pfn, int flags) + { +- enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC | TTU_HWPOISON; +- struct address_space *mapping; + LIST_HEAD(tokill); + bool unmap_success; + int forcekill; +@@ -1613,29 +1637,6 @@ static bool hwpoison_user_mappings(struc + if (!folio_mapped(folio)) + return true; + +- if (folio_test_swapcache(folio)) { +- pr_err("%#lx: keeping poisoned page in swap cache\n", pfn); +- ttu &= ~TTU_HWPOISON; +- } +- +- /* +- * Propagate the dirty bit from PTEs to struct page first, because we +- * need this to decide if we should kill or just drop the page. +- * XXX: the dirty test could be racy: set_page_dirty() may not always +- * be called inside page lock (it's recommended but not enforced). +- */ +- mapping = folio_mapping(folio); +- if (!(flags & MF_MUST_KILL) && !folio_test_dirty(folio) && mapping && +- mapping_can_writeback(mapping)) { +- if (folio_mkclean(folio)) { +- folio_set_dirty(folio); +- } else { +- ttu &= ~TTU_HWPOISON; +- pr_info("%#lx: corrupted page was clean: dropped without side effects\n", +- pfn); +- } +- } +- + /* + * First collect all the processes that have the page + * mapped in dirty form. This has to be done before try_to_unmap, +@@ -1643,9 +1644,7 @@ static bool hwpoison_user_mappings(struc + */ + collect_procs(folio, p, &tokill, flags & MF_ACTION_REQUIRED); + +- unmap_poisoned_folio(folio, ttu); +- +- unmap_success = !folio_mapped(folio); ++ unmap_success = !unmap_poisoned_folio(folio, pfn, flags & MF_MUST_KILL); + if (!unmap_success) + pr_err("%#lx: failed to unmap page (folio mapcount=%d)\n", + pfn, folio_mapcount(folio)); +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -1806,7 +1806,8 @@ static void do_migrate_range(unsigned lo + if (WARN_ON(folio_test_lru(folio))) + folio_isolate_lru(folio); + if (folio_mapped(folio)) +- unmap_poisoned_folio(folio, TTU_IGNORE_MLOCK); ++ unmap_poisoned_folio(folio, pfn, false); ++ + continue; + } + diff --git a/queue-6.13/mm-memory-hotplug-check-folio-ref-count-first-in-do_migrate_range.patch b/queue-6.13/mm-memory-hotplug-check-folio-ref-count-first-in-do_migrate_range.patch new file mode 100644 index 0000000000..66b32c127c --- /dev/null +++ b/queue-6.13/mm-memory-hotplug-check-folio-ref-count-first-in-do_migrate_range.patch @@ -0,0 +1,68 @@ +From 773b9a6aa6d38894b95088e3ed6f8a701d9f50fd Mon Sep 17 00:00:00 2001 +From: Ma Wupeng +Date: Mon, 17 Feb 2025 09:43:28 +0800 +Subject: mm: memory-hotplug: check folio ref count first in do_migrate_range + +From: Ma Wupeng + +commit 773b9a6aa6d38894b95088e3ed6f8a701d9f50fd upstream. + +If a folio has an increased reference count, folio_try_get() will acquire +it, perform necessary operations, and then release it. In the case of a +poisoned folio without an elevated reference count (which is unlikely for +memory-failure), folio_try_get() will simply bypass it. + +Therefore, relocate the folio_try_get() function, responsible for checking +and acquiring this reference count at first. + +Link: https://lkml.kernel.org/r/20250217014329.3610326-3-mawupeng1@huawei.com +Signed-off-by: Ma Wupeng +Acked-by: David Hildenbrand +Acked-by: Miaohe Lin +Cc: Michal Hocko +Cc: Naoya Horiguchi +Cc: Oscar Salvador +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory_hotplug.c | 20 +++++++------------- + 1 file changed, 7 insertions(+), 13 deletions(-) + +--- a/mm/memory_hotplug.c ++++ b/mm/memory_hotplug.c +@@ -1795,12 +1795,12 @@ static void do_migrate_range(unsigned lo + if (folio_test_large(folio)) + pfn = folio_pfn(folio) + folio_nr_pages(folio) - 1; + +- /* +- * HWPoison pages have elevated reference counts so the migration would +- * fail on them. It also doesn't make any sense to migrate them in the +- * first place. Still try to unmap such a page in case it is still mapped +- * (keep the unmap as the catch all safety net). +- */ ++ if (!folio_try_get(folio)) ++ continue; ++ ++ if (unlikely(page_folio(page) != folio)) ++ goto put_folio; ++ + if (folio_test_hwpoison(folio) || + (folio_test_large(folio) && folio_test_has_hwpoisoned(folio))) { + if (WARN_ON(folio_test_lru(folio))) +@@ -1811,14 +1811,8 @@ static void do_migrate_range(unsigned lo + folio_unlock(folio); + } + +- continue; +- } +- +- if (!folio_try_get(folio)) +- continue; +- +- if (unlikely(page_folio(page) != folio)) + goto put_folio; ++ } + + if (!isolate_folio_to_list(folio, &source)) { + if (__ratelimit(&migrate_rs)) { diff --git a/queue-6.13/mm-page_alloc-fix-uninitialized-variable.patch b/queue-6.13/mm-page_alloc-fix-uninitialized-variable.patch new file mode 100644 index 0000000000..7dbc1f733b --- /dev/null +++ b/queue-6.13/mm-page_alloc-fix-uninitialized-variable.patch @@ -0,0 +1,78 @@ +From 8fe9ed44dc29fba0786b7e956d2e87179e407582 Mon Sep 17 00:00:00 2001 +From: Hao Zhang +Date: Thu, 27 Feb 2025 11:41:29 +0800 +Subject: mm/page_alloc: fix uninitialized variable + +From: Hao Zhang + +commit 8fe9ed44dc29fba0786b7e956d2e87179e407582 upstream. + +The variable "compact_result" is not initialized in function +__alloc_pages_slowpath(). It causes should_compact_retry() to use an +uninitialized value. + +Initialize variable "compact_result" with the value COMPACT_SKIPPED. + +BUG: KMSAN: uninit-value in __alloc_pages_slowpath+0xee8/0x16c0 mm/page_alloc.c:4416 + __alloc_pages_slowpath+0xee8/0x16c0 mm/page_alloc.c:4416 + __alloc_frozen_pages_noprof+0xa4c/0xe00 mm/page_alloc.c:4752 + alloc_pages_mpol+0x4cd/0x890 mm/mempolicy.c:2270 + alloc_frozen_pages_noprof mm/mempolicy.c:2341 [inline] + alloc_pages_noprof mm/mempolicy.c:2361 [inline] + folio_alloc_noprof+0x1dc/0x350 mm/mempolicy.c:2371 + filemap_alloc_folio_noprof+0xa6/0x440 mm/filemap.c:1019 + __filemap_get_folio+0xb9a/0x1840 mm/filemap.c:1970 + grow_dev_folio fs/buffer.c:1039 [inline] + grow_buffers fs/buffer.c:1105 [inline] + __getblk_slow fs/buffer.c:1131 [inline] + bdev_getblk+0x2c9/0xab0 fs/buffer.c:1431 + getblk_unmovable include/linux/buffer_head.h:369 [inline] + ext4_getblk+0x3b7/0xe50 fs/ext4/inode.c:864 + ext4_bread_batch+0x9f/0x7d0 fs/ext4/inode.c:933 + __ext4_find_entry+0x1ebb/0x36c0 fs/ext4/namei.c:1627 + ext4_lookup_entry fs/ext4/namei.c:1729 [inline] + ext4_lookup+0x189/0xb40 fs/ext4/namei.c:1797 + __lookup_slow+0x538/0x710 fs/namei.c:1793 + lookup_slow+0x6a/0xd0 fs/namei.c:1810 + walk_component fs/namei.c:2114 [inline] + link_path_walk+0xf29/0x1420 fs/namei.c:2479 + path_openat+0x30f/0x6250 fs/namei.c:3985 + do_filp_open+0x268/0x600 fs/namei.c:4016 + do_sys_openat2+0x1bf/0x2f0 fs/open.c:1428 + do_sys_open fs/open.c:1443 [inline] + __do_sys_openat fs/open.c:1459 [inline] + __se_sys_openat fs/open.c:1454 [inline] + __x64_sys_openat+0x2a1/0x310 fs/open.c:1454 + x64_sys_call+0x36f5/0x3c30 arch/x86/include/generated/asm/syscalls_64.h:258 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0xcd/0x1e0 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x77/0x7f + +Local variable compact_result created at: + __alloc_pages_slowpath+0x66/0x16c0 mm/page_alloc.c:4218 + __alloc_frozen_pages_noprof+0xa4c/0xe00 mm/page_alloc.c:4752 + +Link: https://lkml.kernel.org/r/tencent_ED1032321D6510B145CDBA8CBA0093178E09@qq.com +Reported-by: syzbot+0cfd5e38e96a5596f2b6@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=0cfd5e38e96a5596f2b6 +Signed-off-by: Hao Zhang +Reviewed-by: Vlastimil Babka +Cc: Michal Hocko +Cc: Mel Gorman +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_alloc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -4243,6 +4243,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, u + restart: + compaction_retries = 0; + no_progress_loops = 0; ++ compact_result = COMPACT_SKIPPED; + compact_priority = DEF_COMPACT_PRIORITY; + cpuset_mems_cookie = read_mems_allowed_begin(); + zonelist_iter_cookie = zonelist_iter_begin(); diff --git a/queue-6.13/nfs-fix-nfs_release_folio-to-not-deadlock-via-kcompactd-writeback.patch b/queue-6.13/nfs-fix-nfs_release_folio-to-not-deadlock-via-kcompactd-writeback.patch new file mode 100644 index 0000000000..1b00c1ed50 --- /dev/null +++ b/queue-6.13/nfs-fix-nfs_release_folio-to-not-deadlock-via-kcompactd-writeback.patch @@ -0,0 +1,116 @@ +From ce6d9c1c2b5cc785016faa11b48b6cd317eb367e Mon Sep 17 00:00:00 2001 +From: Mike Snitzer +Date: Mon, 24 Feb 2025 21:20:02 -0500 +Subject: NFS: fix nfs_release_folio() to not deadlock via kcompactd writeback + +From: Mike Snitzer + +commit ce6d9c1c2b5cc785016faa11b48b6cd317eb367e upstream. + +Add PF_KCOMPACTD flag and current_is_kcompactd() helper to check for it so +nfs_release_folio() can skip calling nfs_wb_folio() from kcompactd. + +Otherwise NFS can deadlock waiting for kcompactd enduced writeback which +recurses back to NFS (which triggers writeback to NFSD via NFS loopback +mount on the same host, NFSD blocks waiting for XFS's call to +__filemap_get_folio): + +6070.550357] INFO: task kcompactd0:58 blocked for more than 4435 seconds. + +{--- +[58] "kcompactd0" +[<0>] folio_wait_bit+0xe8/0x200 +[<0>] folio_wait_writeback+0x2b/0x80 +[<0>] nfs_wb_folio+0x80/0x1b0 [nfs] +[<0>] nfs_release_folio+0x68/0x130 [nfs] +[<0>] split_huge_page_to_list_to_order+0x362/0x840 +[<0>] migrate_pages_batch+0x43d/0xb90 +[<0>] migrate_pages_sync+0x9a/0x240 +[<0>] migrate_pages+0x93c/0x9f0 +[<0>] compact_zone+0x8e2/0x1030 +[<0>] compact_node+0xdb/0x120 +[<0>] kcompactd+0x121/0x2e0 +[<0>] kthread+0xcf/0x100 +[<0>] ret_from_fork+0x31/0x40 +[<0>] ret_from_fork_asm+0x1a/0x30 +---} + +[akpm@linux-foundation.org: fix build] +Link: https://lkml.kernel.org/r/20250225022002.26141-1-snitzer@kernel.org +Fixes: 96780ca55e3c ("NFS: fix up nfs_release_folio() to try to release the page") +Signed-off-by: Mike Snitzer +Cc: Anna Schumaker +Cc: Trond Myklebust +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nfs/file.c | 3 ++- + include/linux/compaction.h | 5 +++++ + include/linux/sched.h | 2 +- + mm/compaction.c | 3 +++ + 4 files changed, 11 insertions(+), 2 deletions(-) + +--- a/fs/nfs/file.c ++++ b/fs/nfs/file.c +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -457,7 +458,7 @@ static bool nfs_release_folio(struct fol + /* If the private flag is set, then the folio is not freeable */ + if (folio_test_private(folio)) { + if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL || +- current_is_kswapd()) ++ current_is_kswapd() || current_is_kcompactd()) + return false; + if (nfs_wb_folio(folio->mapping->host, folio) < 0) + return false; +--- a/include/linux/compaction.h ++++ b/include/linux/compaction.h +@@ -80,6 +80,11 @@ static inline unsigned long compact_gap( + return 2UL << order; + } + ++static inline int current_is_kcompactd(void) ++{ ++ return current->flags & PF_KCOMPACTD; ++} ++ + #ifdef CONFIG_COMPACTION + + extern unsigned int extfrag_for_order(struct zone *zone, unsigned int order); +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1686,7 +1686,7 @@ extern struct pid *cad_pid; + #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ + #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ + #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ +-#define PF__HOLE__00010000 0x00010000 ++#define PF_KCOMPACTD 0x00010000 /* I am kcompactd */ + #define PF_KSWAPD 0x00020000 /* I am kswapd */ + #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ + #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ +--- a/mm/compaction.c ++++ b/mm/compaction.c +@@ -3164,6 +3164,7 @@ static int kcompactd(void *p) + if (!cpumask_empty(cpumask)) + set_cpus_allowed_ptr(tsk, cpumask); + ++ current->flags |= PF_KCOMPACTD; + set_freezable(); + + pgdat->kcompactd_max_order = 0; +@@ -3220,6 +3221,8 @@ static int kcompactd(void *p) + pgdat->proactive_compact_trigger = false; + } + ++ current->flags &= ~PF_KCOMPACTD; ++ + return 0; + } + diff --git a/queue-6.13/rapidio-add-check-for-rio_add_net-in-rio_scan_alloc_net.patch b/queue-6.13/rapidio-add-check-for-rio_add_net-in-rio_scan_alloc_net.patch new file mode 100644 index 0000000000..f5b9bda718 --- /dev/null +++ b/queue-6.13/rapidio-add-check-for-rio_add_net-in-rio_scan_alloc_net.patch @@ -0,0 +1,41 @@ +From e842f9a1edf306bf36fe2a4d847a0b0d458770de Mon Sep 17 00:00:00 2001 +From: Haoxiang Li +Date: Thu, 27 Feb 2025 12:11:31 +0800 +Subject: rapidio: add check for rio_add_net() in rio_scan_alloc_net() + +From: Haoxiang Li + +commit e842f9a1edf306bf36fe2a4d847a0b0d458770de upstream. + +The return value of rio_add_net() should be checked. If it fails, +put_device() should be called to free the memory and give up the reference +initialized in rio_add_net(). + +Link: https://lkml.kernel.org/r/20250227041131.3680761-1-haoxiang_li2024@163.com +Fixes: e6b585ca6e81 ("rapidio: move net allocation into core code") +Signed-off-by: Yang Yingliang +Signed-off-by: Haoxiang Li +Cc: Alexandre Bounine +Cc: Matt Porter +Cc: Dan Carpenter +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + drivers/rapidio/rio-scan.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/rapidio/rio-scan.c ++++ b/drivers/rapidio/rio-scan.c +@@ -871,7 +871,10 @@ static struct rio_net *rio_scan_alloc_ne + dev_set_name(&net->dev, "rnet_%d", net->id); + net->dev.parent = &mport->dev; + net->dev.release = rio_scan_release_dev; +- rio_add_net(net); ++ if (rio_add_net(net)) { ++ put_device(&net->dev); ++ net = NULL; ++ } + } + + return net; diff --git a/queue-6.13/rapidio-fix-an-api-misues-when-rio_add_net-fails.patch b/queue-6.13/rapidio-fix-an-api-misues-when-rio_add_net-fails.patch new file mode 100644 index 0000000000..31c1b98141 --- /dev/null +++ b/queue-6.13/rapidio-fix-an-api-misues-when-rio_add_net-fails.patch @@ -0,0 +1,39 @@ +From b2ef51c74b0171fde7eb69b6152d3d2f743ef269 Mon Sep 17 00:00:00 2001 +From: Haoxiang Li +Date: Thu, 27 Feb 2025 15:34:09 +0800 +Subject: rapidio: fix an API misues when rio_add_net() fails + +From: Haoxiang Li + +commit b2ef51c74b0171fde7eb69b6152d3d2f743ef269 upstream. + +rio_add_net() calls device_register() and fails when device_register() +fails. Thus, put_device() should be used rather than kfree(). Add +"mport->net = NULL;" to avoid a use after free issue. + +Link: https://lkml.kernel.org/r/20250227073409.3696854-1-haoxiang_li2024@163.com +Fixes: e8de370188d0 ("rapidio: add mport char device driver") +Signed-off-by: Haoxiang Li +Reviewed-by: Dan Carpenter +Cc: Alexandre Bounine +Cc: Matt Porter +Cc: Yang Yingliang +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + drivers/rapidio/devices/rio_mport_cdev.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/rapidio/devices/rio_mport_cdev.c ++++ b/drivers/rapidio/devices/rio_mport_cdev.c +@@ -1742,7 +1742,8 @@ static int rio_mport_add_riodev(struct m + err = rio_add_net(net); + if (err) { + rmcd_debug(RDEV, "failed to register net, err=%d", err); +- kfree(net); ++ put_device(&net->dev); ++ mport->net = NULL; + goto cleanup; + } + } diff --git a/queue-6.13/s390-traps-fix-test_monitor_call-inline-assembly.patch b/queue-6.13/s390-traps-fix-test_monitor_call-inline-assembly.patch new file mode 100644 index 0000000000..16f11cf107 --- /dev/null +++ b/queue-6.13/s390-traps-fix-test_monitor_call-inline-assembly.patch @@ -0,0 +1,44 @@ +From 5623bc23a1cb9f9a9470fa73b3a20321dc4c4870 Mon Sep 17 00:00:00 2001 +From: Heiko Carstens +Date: Tue, 25 Feb 2025 10:53:10 +0100 +Subject: s390/traps: Fix test_monitor_call() inline assembly + +From: Heiko Carstens + +commit 5623bc23a1cb9f9a9470fa73b3a20321dc4c4870 upstream. + +The test_monitor_call() inline assembly uses the xgr instruction, which +also modifies the condition code, to clear a register. However the clobber +list of the inline assembly does not specify that the condition code is +modified, which may lead to incorrect code generation. + +Use the lhi instruction instead to clear the register without that the +condition code is modified. Furthermore this limits clearing to the lower +32 bits of val, since its type is int. + +Fixes: 17248ea03674 ("s390: fix __EMIT_BUG() macro") +Cc: stable@vger.kernel.org +Reviewed-by: Juergen Christ +Signed-off-by: Heiko Carstens +Signed-off-by: Vasily Gorbik +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/kernel/traps.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/s390/kernel/traps.c ++++ b/arch/s390/kernel/traps.c +@@ -285,10 +285,10 @@ static void __init test_monitor_call(voi + return; + asm volatile( + " mc 0,0\n" +- "0: xgr %0,%0\n" ++ "0: lhi %[val],0\n" + "1:\n" +- EX_TABLE(0b,1b) +- : "+d" (val)); ++ EX_TABLE(0b, 1b) ++ : [val] "+d" (val)); + if (!val) + panic("Monitor call doesn't work!\n"); + } diff --git a/queue-6.13/selftests-damon-damon_nr_regions-set-ops-update-for-merge-results-check-to-100ms.patch b/queue-6.13/selftests-damon-damon_nr_regions-set-ops-update-for-merge-results-check-to-100ms.patch new file mode 100644 index 0000000000..61facb2c68 --- /dev/null +++ b/queue-6.13/selftests-damon-damon_nr_regions-set-ops-update-for-merge-results-check-to-100ms.patch @@ -0,0 +1,46 @@ +From 695469c07a65547acb6e229b3fdf6aaa881817e3 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Tue, 25 Feb 2025 14:23:32 -0800 +Subject: selftests/damon/damon_nr_regions: set ops update for merge results check to 100ms + +From: SeongJae Park + +commit 695469c07a65547acb6e229b3fdf6aaa881817e3 upstream. + +damon_nr_regions.py updates max_nr_regions to a number smaller than +expected number of real regions and confirms DAMON respect the harsh +limit. To give time for DAMON to make changes for the regions, 3 +aggregation intervals (300 milliseconds) are given. + +The internal mechanism works with not only the max_nr_regions, but also +sz_limit, though. It avoids merging region if that casn make region of +size larger than sz_limit. In the test, sz_limit is set too small to +achive the new max_nr_regions, unless it is updated for the new +min_nr_regions. But the update is done only once per operations set +update interval, which is one second by default. + +Hence, the test randomly incurs false positive failures. Fix it by +setting the ops interval same to aggregation interval, to make sure +sz_limit is updated by the time of the check. + +Link: https://lkml.kernel.org/r/20250225222333.505646-3-sj@kernel.org +Fixes: 8bf890c81612 ("selftests/damon/damon_nr_regions: test online-tuned max_nr_regions") +Signed-off-by: SeongJae Park +Cc: Shuah Khan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/damon/damon_nr_regions.py | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/testing/selftests/damon/damon_nr_regions.py ++++ b/tools/testing/selftests/damon/damon_nr_regions.py +@@ -109,6 +109,7 @@ def main(): + attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs + attrs.min_nr_regions = 3 + attrs.max_nr_regions = 7 ++ attrs.update_us = 100000 + err = kdamonds.kdamonds[0].commit() + if err is not None: + proc.terminate() diff --git a/queue-6.13/selftests-damon-damon_nr_regions-sort-collected-regiosn-before-checking-with-min-max-boundaries.patch b/queue-6.13/selftests-damon-damon_nr_regions-sort-collected-regiosn-before-checking-with-min-max-boundaries.patch new file mode 100644 index 0000000000..666072f1c3 --- /dev/null +++ b/queue-6.13/selftests-damon-damon_nr_regions-sort-collected-regiosn-before-checking-with-min-max-boundaries.patch @@ -0,0 +1,36 @@ +From 582ccf78f6090d88b1c7066b1e90b3d9ec952d08 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Tue, 25 Feb 2025 14:23:33 -0800 +Subject: selftests/damon/damon_nr_regions: sort collected regiosn before checking with min/max boundaries + +From: SeongJae Park + +commit 582ccf78f6090d88b1c7066b1e90b3d9ec952d08 upstream. + +damon_nr_regions.py starts DAMON, periodically collect number of regions +in snapshots, and see if it is in the requested range. The check code +assumes the numbers are sorted on the collection list, but there is no +such guarantee. Hence this can result in false positive test success. +Sort the list before doing the check. + +Link: https://lkml.kernel.org/r/20250225222333.505646-4-sj@kernel.org +Fixes: 781497347d1b ("selftests/damon: implement test for min/max_nr_regions") +Signed-off-by: SeongJae Park +Cc: Shuah Khan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/damon/damon_nr_regions.py | 1 + + 1 file changed, 1 insertion(+) + +--- a/tools/testing/selftests/damon/damon_nr_regions.py ++++ b/tools/testing/selftests/damon/damon_nr_regions.py +@@ -65,6 +65,7 @@ def test_nr_regions(real_nr_regions, min + + test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % ( + real_nr_regions, min_nr_regions, max_nr_regions) ++ collected_nr_regions.sort() + if (collected_nr_regions[0] < min_nr_regions or + collected_nr_regions[-1] > max_nr_regions): + print('fail %s' % test_name) diff --git a/queue-6.13/selftests-damon-damos_quota-make-real-expectation-of-quota-exceeds.patch b/queue-6.13/selftests-damon-damos_quota-make-real-expectation-of-quota-exceeds.patch new file mode 100644 index 0000000000..38dc0f44ac --- /dev/null +++ b/queue-6.13/selftests-damon-damos_quota-make-real-expectation-of-quota-exceeds.patch @@ -0,0 +1,62 @@ +From 1c684d77dfbcf926e0dd28f6d260e8fdd8a58e85 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Tue, 25 Feb 2025 14:23:31 -0800 +Subject: selftests/damon/damos_quota: make real expectation of quota exceeds + +From: SeongJae Park + +commit 1c684d77dfbcf926e0dd28f6d260e8fdd8a58e85 upstream. + +Patch series "selftests/damon: three fixes for false results". + +Fix three DAMON selftest bugs that cause two and one false positive +failures and successes. + + +This patch (of 3): + +damos_quota.py assumes the quota will always exceeded. But whether quota +will be exceeded or not depend on the monitoring results. Actually the +monitored workload has chaning access pattern and hence sometimes the +quota may not really be exceeded. As a result, false positive test +failures happen. Expect how much time the quota will be exceeded by +checking the monitoring results, and use it instead of the naive +assumption. + +Link: https://lkml.kernel.org/r/20250225222333.505646-1-sj@kernel.org +Link: https://lkml.kernel.org/r/20250225222333.505646-2-sj@kernel.org +Fixes: 51f58c9da14b ("selftests/damon: add a test for DAMOS quota") +Signed-off-by: SeongJae Park +Cc: Shuah Khan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/damon/damos_quota.py | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/tools/testing/selftests/damon/damos_quota.py ++++ b/tools/testing/selftests/damon/damos_quota.py +@@ -51,16 +51,19 @@ def main(): + nr_quota_exceeds = scheme.stats.qt_exceeds + + wss_collected.sort() ++ nr_expected_quota_exceeds = 0 + for wss in wss_collected: + if wss > sz_quota: + print('quota is not kept: %s > %s' % (wss, sz_quota)) + print('collected samples are as below') + print('\n'.join(['%d' % wss for wss in wss_collected])) + exit(1) ++ if wss == sz_quota: ++ nr_expected_quota_exceeds += 1 + +- if nr_quota_exceeds < len(wss_collected): +- print('quota is not always exceeded: %d > %d' % +- (len(wss_collected), nr_quota_exceeds)) ++ if nr_quota_exceeds < nr_expected_quota_exceeds: ++ print('quota is exceeded less than expected: %d < %d' % ++ (nr_quota_exceeds, nr_expected_quota_exceeds)) + exit(1) + + if __name__ == '__main__': diff --git a/queue-6.13/selftests-damon-damos_quota_goal-handle-minimum-quota-that-cannot-be-further-reduced.patch b/queue-6.13/selftests-damon-damos_quota_goal-handle-minimum-quota-that-cannot-be-further-reduced.patch new file mode 100644 index 0000000000..afa17d8333 --- /dev/null +++ b/queue-6.13/selftests-damon-damos_quota_goal-handle-minimum-quota-that-cannot-be-further-reduced.patch @@ -0,0 +1,43 @@ +From 349db086a66051bc6114b64b4446787c20ac3f00 Mon Sep 17 00:00:00 2001 +From: SeongJae Park +Date: Mon, 17 Feb 2025 10:23:04 -0800 +Subject: selftests/damon/damos_quota_goal: handle minimum quota that cannot be further reduced + +From: SeongJae Park + +commit 349db086a66051bc6114b64b4446787c20ac3f00 upstream. + +damos_quota_goal.py selftest see if DAMOS quota goals tuning feature +increases or reduces the effective size quota for given score as expected. +The tuning feature sets the minimum quota size as one byte, so if the +effective size quota is already one, we cannot expect it further be +reduced. However the test is not aware of the edge case, and fails since +it shown no expected change of the effective quota. Handle the case by +updating the failure logic for no change to see if it was the case, and +simply skips to next test input. + +Link: https://lkml.kernel.org/r/20250217182304.45215-1-sj@kernel.org +Fixes: f1c07c0a1662 ("selftests/damon: add a test for DAMOS quota goal") +Signed-off-by: SeongJae Park +Reported-by: kernel test robot +Closes: https://lore.kernel.org/oe-lkp/202502171423.b28a918d-lkp@intel.com +Cc: Shuah Khan +Cc: [6.10.x] +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/damon/damos_quota_goal.py | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/tools/testing/selftests/damon/damos_quota_goal.py ++++ b/tools/testing/selftests/damon/damos_quota_goal.py +@@ -63,6 +63,9 @@ def main(): + if last_effective_bytes != 0 else -1.0)) + + if last_effective_bytes == goal.effective_bytes: ++ # effective quota was already minimum that cannot be more reduced ++ if expect_increase is False and last_effective_bytes == 1: ++ continue + print('efective bytes not changed: %d' % goal.effective_bytes) + exit(1) + diff --git a/queue-6.13/series b/queue-6.13/series index 2cc58f01dc..378d754d08 100644 --- a/queue-6.13/series +++ b/queue-6.13/series @@ -55,3 +55,26 @@ drm-xe-fix-gt-for-each-engine-workarounds.patch drm-xe-fix-fault-mode-invalidation-with-unbind.patch drm-xe-userptr-properly-setup-pfn_flags_mask.patch drm-xe-userptr-unmap-userptrs-in-the-mmu-notifier.patch +bluetooth-add-check-for-mgmt_alloc_skb-in-mgmt_remote_name.patch +bluetooth-add-check-for-mgmt_alloc_skb-in-mgmt_device_connected.patch +wifi-cfg80211-regulatory-improve-invalid-hints-checking.patch +wifi-nl80211-reject-cooked-mode-if-it-is-set-along-with-other-flags.patch +selftests-damon-damos_quota_goal-handle-minimum-quota-that-cannot-be-further-reduced.patch +selftests-damon-damos_quota-make-real-expectation-of-quota-exceeds.patch +selftests-damon-damon_nr_regions-set-ops-update-for-merge-results-check-to-100ms.patch +selftests-damon-damon_nr_regions-sort-collected-regiosn-before-checking-with-min-max-boundaries.patch +rapidio-add-check-for-rio_add_net-in-rio_scan_alloc_net.patch +rapidio-fix-an-api-misues-when-rio_add_net-fails.patch +dma-kmsan-export-kmsan_handle_dma-for-modules.patch +s390-traps-fix-test_monitor_call-inline-assembly.patch +nfs-fix-nfs_release_folio-to-not-deadlock-via-kcompactd-writeback.patch +userfaultfd-do-not-block-on-locking-a-large-folio-with-raised-refcount.patch +arm-pgtable-fix-null-pointer-dereference-issue.patch +block-fix-conversion-of-gpt-partition-name-to-7-bit.patch +mm-page_alloc-fix-uninitialized-variable.patch +mm-abort-vma_modify-on-merge-out-of-memory-failure.patch +mm-memory-failure-update-ttu-flag-inside-unmap_poisoned_folio.patch +mm-don-t-skip-arch_sync_kernel_mappings-in-error-paths.patch +mm-fix-finish_fault-handling-for-large-folios.patch +hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch +mm-memory-hotplug-check-folio-ref-count-first-in-do_migrate_range.patch diff --git a/queue-6.13/userfaultfd-do-not-block-on-locking-a-large-folio-with-raised-refcount.patch b/queue-6.13/userfaultfd-do-not-block-on-locking-a-large-folio-with-raised-refcount.patch new file mode 100644 index 0000000000..1d0c0b1cab --- /dev/null +++ b/queue-6.13/userfaultfd-do-not-block-on-locking-a-large-folio-with-raised-refcount.patch @@ -0,0 +1,100 @@ +From 37b338eed10581784e854d4262da05c8d960c748 Mon Sep 17 00:00:00 2001 +From: Suren Baghdasaryan +Date: Wed, 26 Feb 2025 10:55:08 -0800 +Subject: userfaultfd: do not block on locking a large folio with raised refcount + +From: Suren Baghdasaryan + +commit 37b338eed10581784e854d4262da05c8d960c748 upstream. + +Lokesh recently raised an issue about UFFDIO_MOVE getting into a deadlock +state when it goes into split_folio() with raised folio refcount. +split_folio() expects the reference count to be exactly mapcount + +num_pages_in_folio + 1 (see can_split_folio()) and fails with EAGAIN +otherwise. + +If multiple processes are trying to move the same large folio, they raise +the refcount (all tasks succeed in that) then one of them succeeds in +locking the folio, while others will block in folio_lock() while keeping +the refcount raised. The winner of this race will proceed with calling +split_folio() and will fail returning EAGAIN to the caller and unlocking +the folio. The next competing process will get the folio locked and will +go through the same flow. In the meantime the original winner will be +retried and will block in folio_lock(), getting into the queue of waiting +processes only to repeat the same path. All this results in a livelock. + +An easy fix would be to avoid waiting for the folio lock while holding +folio refcount, similar to madvise_free_huge_pmd() where folio lock is +acquired before raising the folio refcount. Since we lock and take a +refcount of the folio while holding the PTE lock, changing the order of +these operations should not break anything. + +Modify move_pages_pte() to try locking the folio first and if that fails +and the folio is large then return EAGAIN without touching the folio +refcount. If the folio is single-page then split_folio() is not called, +so we don't have this issue. Lokesh has a reproducer [1] and I verified +that this change fixes the issue. + +[1] https://github.com/lokeshgidra/uffd_move_ioctl_deadlock + +[akpm@linux-foundation.org: reflow comment to 80 cols, s/end/end up/] +Link: https://lkml.kernel.org/r/20250226185510.2732648-2-surenb@google.com +Fixes: adef440691ba ("userfaultfd: UFFDIO_MOVE uABI") +Signed-off-by: Suren Baghdasaryan +Reported-by: Lokesh Gidra +Reviewed-by: Peter Xu +Acked-by: Liam R. Howlett +Cc: Andrea Arcangeli +Cc: Barry Song <21cnbao@gmail.com> +Cc: Barry Song +Cc: David Hildenbrand +Cc: Hugh Dickins +Cc: Jann Horn +Cc: Kalesh Singh +Cc: Lorenzo Stoakes +Cc: Matthew Wilcow (Oracle) +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/userfaultfd.c | 17 ++++++++++++++++- + 1 file changed, 16 insertions(+), 1 deletion(-) + +--- a/mm/userfaultfd.c ++++ b/mm/userfaultfd.c +@@ -1224,6 +1224,7 @@ retry: + */ + if (!src_folio) { + struct folio *folio; ++ bool locked; + + /* + * Pin the page while holding the lock to be sure the +@@ -1243,12 +1244,26 @@ retry: + goto out; + } + ++ locked = folio_trylock(folio); ++ /* ++ * We avoid waiting for folio lock with a raised ++ * refcount for large folios because extra refcounts ++ * will result in split_folio() failing later and ++ * retrying. If multiple tasks are trying to move a ++ * large folio we can end up livelocking. ++ */ ++ if (!locked && folio_test_large(folio)) { ++ spin_unlock(src_ptl); ++ err = -EAGAIN; ++ goto out; ++ } ++ + folio_get(folio); + src_folio = folio; + src_folio_pte = orig_src_pte; + spin_unlock(src_ptl); + +- if (!folio_trylock(src_folio)) { ++ if (!locked) { + pte_unmap(&orig_src_pte); + pte_unmap(&orig_dst_pte); + src_pte = dst_pte = NULL; diff --git a/queue-6.13/wifi-cfg80211-regulatory-improve-invalid-hints-checking.patch b/queue-6.13/wifi-cfg80211-regulatory-improve-invalid-hints-checking.patch new file mode 100644 index 0000000000..51d9ff4845 --- /dev/null +++ b/queue-6.13/wifi-cfg80211-regulatory-improve-invalid-hints-checking.patch @@ -0,0 +1,90 @@ +From 59b348be7597c4a9903cb003c69e37df20c04a30 Mon Sep 17 00:00:00 2001 +From: Nikita Zhandarovich +Date: Fri, 28 Feb 2025 16:46:57 +0300 +Subject: wifi: cfg80211: regulatory: improve invalid hints checking +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Nikita Zhandarovich + +commit 59b348be7597c4a9903cb003c69e37df20c04a30 upstream. + +Syzbot keeps reporting an issue [1] that occurs when erroneous symbols +sent from userspace get through into user_alpha2[] via +regulatory_hint_user() call. Such invalid regulatory hints should be +rejected. + +While a sanity check from commit 47caf685a685 ("cfg80211: regulatory: +reject invalid hints") looks to be enough to deter these very cases, +there is a way to get around it due to 2 reasons. + +1) The way isalpha() works, symbols other than latin lower and +upper letters may be used to determine a country/domain. +For instance, greek letters will also be considered upper/lower +letters and for such characters isalpha() will return true as well. +However, ISO-3166-1 alpha2 codes should only hold latin +characters. + +2) While processing a user regulatory request, between +reg_process_hint_user() and regulatory_hint_user() there happens to +be a call to queue_regulatory_request() which modifies letters in +request->alpha2[] with toupper(). This works fine for latin symbols, +less so for weird letter characters from the second part of _ctype[]. + +Syzbot triggers a warning in is_user_regdom_saved() by first sending +over an unexpected non-latin letter that gets malformed by toupper() +into a character that ends up failing isalpha() check. + +Prevent this by enhancing is_an_alpha2() to ensure that incoming +symbols are latin letters and nothing else. + +[1] Syzbot report: +------------[ cut here ]------------ +Unexpected user alpha2: A� +WARNING: CPU: 1 PID: 964 at net/wireless/reg.c:442 is_user_regdom_saved net/wireless/reg.c:440 [inline] +WARNING: CPU: 1 PID: 964 at net/wireless/reg.c:442 restore_alpha2 net/wireless/reg.c:3424 [inline] +WARNING: CPU: 1 PID: 964 at net/wireless/reg.c:442 restore_regulatory_settings+0x3c0/0x1e50 net/wireless/reg.c:3516 +Modules linked in: +CPU: 1 UID: 0 PID: 964 Comm: kworker/1:2 Not tainted 6.12.0-rc5-syzkaller-00044-gc1e939a21eb1 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 +Workqueue: events_power_efficient crda_timeout_work +RIP: 0010:is_user_regdom_saved net/wireless/reg.c:440 [inline] +RIP: 0010:restore_alpha2 net/wireless/reg.c:3424 [inline] +RIP: 0010:restore_regulatory_settings+0x3c0/0x1e50 net/wireless/reg.c:3516 +... +Call Trace: + + crda_timeout_work+0x27/0x50 net/wireless/reg.c:542 + process_one_work kernel/workqueue.c:3229 [inline] + process_scheduled_works+0xa65/0x1850 kernel/workqueue.c:3310 + worker_thread+0x870/0xd30 kernel/workqueue.c:3391 + kthread+0x2f2/0x390 kernel/kthread.c:389 + ret_from_fork+0x4d/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244 + + +Reported-by: syzbot+e10709ac3c44f3d4e800@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=e10709ac3c44f3d4e800 +Fixes: 09d989d179d0 ("cfg80211: add regulatory hint disconnect support") +Cc: stable@kernel.org +Signed-off-by: Nikita Zhandarovich +Link: https://patch.msgid.link/20250228134659.1577656-1-n.zhandarovich@fintech.ru +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + net/wireless/reg.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/wireless/reg.c ++++ b/net/wireless/reg.c +@@ -407,7 +407,8 @@ static bool is_an_alpha2(const char *alp + { + if (!alpha2) + return false; +- return isalpha(alpha2[0]) && isalpha(alpha2[1]); ++ return isascii(alpha2[0]) && isalpha(alpha2[0]) && ++ isascii(alpha2[1]) && isalpha(alpha2[1]); + } + + static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) diff --git a/queue-6.13/wifi-nl80211-reject-cooked-mode-if-it-is-set-along-with-other-flags.patch b/queue-6.13/wifi-nl80211-reject-cooked-mode-if-it-is-set-along-with-other-flags.patch new file mode 100644 index 0000000000..cca490d068 --- /dev/null +++ b/queue-6.13/wifi-nl80211-reject-cooked-mode-if-it-is-set-along-with-other-flags.patch @@ -0,0 +1,48 @@ +From 49f27f29446a5bfe633dd2cc0cfebd48a1a5e77f Mon Sep 17 00:00:00 2001 +From: Vitaliy Shevtsov +Date: Fri, 31 Jan 2025 20:26:55 +0500 +Subject: wifi: nl80211: reject cooked mode if it is set along with other flags + +From: Vitaliy Shevtsov + +commit 49f27f29446a5bfe633dd2cc0cfebd48a1a5e77f upstream. + +It is possible to set both MONITOR_FLAG_COOK_FRAMES and MONITOR_FLAG_ACTIVE +flags simultaneously on the same monitor interface from the userspace. This +causes a sub-interface to be created with no IEEE80211_SDATA_IN_DRIVER bit +set because the monitor interface is in the cooked state and it takes +precedence over all other states. When the interface is then being deleted +the kernel calls WARN_ONCE() from check_sdata_in_driver() because of missing +that bit. + +Fix this by rejecting MONITOR_FLAG_COOK_FRAMES if it is set along with +other flags. + +Found by Linux Verification Center (linuxtesting.org) with Syzkaller. + +Fixes: 66f7ac50ed7c ("nl80211: Add monitor interface configuration flags") +Cc: stable@vger.kernel.org +Reported-by: syzbot+2e5c1e55b9e5c28a3da7@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=2e5c1e55b9e5c28a3da7 +Signed-off-by: Vitaliy Shevtsov +Link: https://patch.msgid.link/20250131152657.5606-1-v.shevtsov@mt-integration.ru +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + net/wireless/nl80211.c | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/net/wireless/nl80211.c ++++ b/net/wireless/nl80211.c +@@ -4221,6 +4221,11 @@ static int parse_monitor_flags(struct nl + if (flags[flag]) + *mntrflags |= (1<