From b55f8451d20d0b9079a542dd65907ed02f987d72 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 26 May 2025 14:18:13 +0200 Subject: [PATCH] 6.12-stable patches added patches: bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch highmem-add-folio_test_partial_kmap.patch kasan-avoid-sleepable-page-allocation-from-atomic-context.patch memcg-always-call-cond_resched-after-fn.patch mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch --- ...-function-enabled-before-doing-close.patch | 33 +++ ...e-if-sdio-card-removed-without-close.patch | 41 ++++ .../highmem-add-folio_test_partial_kmap.patch | 93 +++++++++ ...-page-allocation-from-atomic-context.patch | 194 ++++++++++++++++++ ...cg-always-call-cond_resched-after-fn.patch | 80 ++++++++ ...vm_nohugepage-only-if-thp-is-enabled.patch | 47 +++++ ...finite-retries-caused-by-cpuset-race.patch | 79 +++++++ ...lly-use-the-in-place-vrealloc-region.patch | 45 ++++ ...oc-only-zero-init-on-vrealloc-shrink.patch | 57 +++++ ...sed-by-lock-dependency-in-init_nilfs.patch | 67 ++++++ queue-6.12/series | 10 + 11 files changed, 746 insertions(+) create mode 100644 queue-6.12/bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch create mode 100644 queue-6.12/bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch create mode 100644 queue-6.12/highmem-add-folio_test_partial_kmap.patch create mode 100644 queue-6.12/kasan-avoid-sleepable-page-allocation-from-atomic-context.patch create mode 100644 queue-6.12/memcg-always-call-cond_resched-after-fn.patch create mode 100644 queue-6.12/mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch create mode 100644 queue-6.12/mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch create mode 100644 queue-6.12/mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch create mode 100644 queue-6.12/mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch create mode 100644 queue-6.12/nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch diff --git a/queue-6.12/bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch b/queue-6.12/bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch new file mode 100644 index 0000000000..5b22e09dce --- /dev/null +++ b/queue-6.12/bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch @@ -0,0 +1,33 @@ +From 07e90048e356a29079fbc011cfc2e1fa1d1c5ac9 Mon Sep 17 00:00:00 2001 +From: Chris Lu +Date: Tue, 22 Apr 2025 09:21:55 +0800 +Subject: Bluetooth: btmtksdio: Check function enabled before doing close + +From: Chris Lu + +commit 07e90048e356a29079fbc011cfc2e1fa1d1c5ac9 upstream. + +Check BTMTKSDIO_FUNC_ENABLED flag before doing close to prevent +btmtksdio_close been called twice. + +Fixes: 6ac4233afb9a ("Bluetooth: btmtksdio: Prevent enabling interrupts after IRQ handler removal") +Signed-off-by: Chris Lu +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bluetooth/btmtksdio.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/bluetooth/btmtksdio.c ++++ b/drivers/bluetooth/btmtksdio.c +@@ -723,6 +723,10 @@ static int btmtksdio_close(struct hci_de + { + struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); + ++ /* Skip btmtksdio_close if BTMTKSDIO_FUNC_ENABLED isn't set */ ++ if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) ++ return 0; ++ + sdio_claim_host(bdev->func); + + /* Disable interrupt */ diff --git a/queue-6.12/bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch b/queue-6.12/bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch new file mode 100644 index 0000000000..a9cae505ad --- /dev/null +++ b/queue-6.12/bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch @@ -0,0 +1,41 @@ +From 0b6d58bc6ea85e57de25c828444928e4a0aa79cb Mon Sep 17 00:00:00 2001 +From: Chris Lu +Date: Tue, 22 Apr 2025 09:21:56 +0800 +Subject: Bluetooth: btmtksdio: Do close if SDIO card removed without close + +From: Chris Lu + +commit 0b6d58bc6ea85e57de25c828444928e4a0aa79cb upstream. + +To prevent Bluetooth SDIO card from be physically removed suddenly, +driver needs to ensure btmtksdio_close is called before +btmtksdio_remove to disable interrupts and txrx workqueue. + +Fixes: 6ac4233afb9a ("Bluetooth: btmtksdio: Prevent enabling interrupts after IRQ handler removal") +Signed-off-by: Chris Lu +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bluetooth/btmtksdio.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/bluetooth/btmtksdio.c ++++ b/drivers/bluetooth/btmtksdio.c +@@ -1434,11 +1434,15 @@ static void btmtksdio_remove(struct sdio + if (!bdev) + return; + ++ hdev = bdev->hdev; ++ ++ /* Make sure to call btmtksdio_close before removing sdio card */ ++ if (test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) ++ btmtksdio_close(hdev); ++ + /* Be consistent the state in btmtksdio_probe */ + pm_runtime_get_noresume(bdev->dev); + +- hdev = bdev->hdev; +- + sdio_set_drvdata(func, NULL); + hci_unregister_dev(hdev); + hci_free_dev(hdev); diff --git a/queue-6.12/highmem-add-folio_test_partial_kmap.patch b/queue-6.12/highmem-add-folio_test_partial_kmap.patch new file mode 100644 index 0000000000..9a7141d6e6 --- /dev/null +++ b/queue-6.12/highmem-add-folio_test_partial_kmap.patch @@ -0,0 +1,93 @@ +From 97dfbbd135cb5e4426f37ca53a8fa87eaaa4e376 Mon Sep 17 00:00:00 2001 +From: "Matthew Wilcox (Oracle)" +Date: Wed, 14 May 2025 18:06:02 +0100 +Subject: highmem: add folio_test_partial_kmap() + +From: Matthew Wilcox (Oracle) + +commit 97dfbbd135cb5e4426f37ca53a8fa87eaaa4e376 upstream. + +In commit c749d9b7ebbc ("iov_iter: fix copy_page_from_iter_atomic() if +KMAP_LOCAL_FORCE_MAP"), Hugh correctly noted that if KMAP_LOCAL_FORCE_MAP +is enabled, we must limit ourselves to PAGE_SIZE bytes per call to +kmap_local(). The same problem exists in memcpy_from_folio(), +memcpy_to_folio(), folio_zero_tail(), folio_fill_tail() and +memcpy_from_file_folio(), so add folio_test_partial_kmap() to do this more +succinctly. + +Link: https://lkml.kernel.org/r/20250514170607.3000994-2-willy@infradead.org +Fixes: 00cdf76012ab ("mm: add memcpy_from_file_folio()") +Signed-off-by: Matthew Wilcox (Oracle) +Cc: Al Viro +Cc: Hugh Dickins +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/highmem.h | 10 +++++----- + include/linux/page-flags.h | 7 +++++++ + 2 files changed, 12 insertions(+), 5 deletions(-) + +--- a/include/linux/highmem.h ++++ b/include/linux/highmem.h +@@ -461,7 +461,7 @@ static inline void memcpy_from_folio(cha + const char *from = kmap_local_folio(folio, offset); + size_t chunk = len; + +- if (folio_test_highmem(folio) && ++ if (folio_test_partial_kmap(folio) && + chunk > PAGE_SIZE - offset_in_page(offset)) + chunk = PAGE_SIZE - offset_in_page(offset); + memcpy(to, from, chunk); +@@ -489,7 +489,7 @@ static inline void memcpy_to_folio(struc + char *to = kmap_local_folio(folio, offset); + size_t chunk = len; + +- if (folio_test_highmem(folio) && ++ if (folio_test_partial_kmap(folio) && + chunk > PAGE_SIZE - offset_in_page(offset)) + chunk = PAGE_SIZE - offset_in_page(offset); + memcpy(to, from, chunk); +@@ -522,7 +522,7 @@ static inline __must_check void *folio_z + { + size_t len = folio_size(folio) - offset; + +- if (folio_test_highmem(folio)) { ++ if (folio_test_partial_kmap(folio)) { + size_t max = PAGE_SIZE - offset_in_page(offset); + + while (len > max) { +@@ -560,7 +560,7 @@ static inline void folio_fill_tail(struc + + VM_BUG_ON(offset + len > folio_size(folio)); + +- if (folio_test_highmem(folio)) { ++ if (folio_test_partial_kmap(folio)) { + size_t max = PAGE_SIZE - offset_in_page(offset); + + while (len > max) { +@@ -597,7 +597,7 @@ static inline size_t memcpy_from_file_fo + size_t offset = offset_in_folio(folio, pos); + char *from = kmap_local_folio(folio, offset); + +- if (folio_test_highmem(folio)) { ++ if (folio_test_partial_kmap(folio)) { + offset = offset_in_page(offset); + len = min_t(size_t, len, PAGE_SIZE - offset); + } else +--- a/include/linux/page-flags.h ++++ b/include/linux/page-flags.h +@@ -573,6 +573,13 @@ FOLIO_FLAG(readahead, FOLIO_HEAD_PAGE) + PAGEFLAG_FALSE(HighMem, highmem) + #endif + ++/* Does kmap_local_folio() only allow access to one page of the folio? */ ++#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP ++#define folio_test_partial_kmap(f) true ++#else ++#define folio_test_partial_kmap(f) folio_test_highmem(f) ++#endif ++ + #ifdef CONFIG_SWAP + static __always_inline bool folio_test_swapcache(const struct folio *folio) + { diff --git a/queue-6.12/kasan-avoid-sleepable-page-allocation-from-atomic-context.patch b/queue-6.12/kasan-avoid-sleepable-page-allocation-from-atomic-context.patch new file mode 100644 index 0000000000..18b6ab865e --- /dev/null +++ b/queue-6.12/kasan-avoid-sleepable-page-allocation-from-atomic-context.patch @@ -0,0 +1,194 @@ +From b6ea95a34cbd014ab6ade4248107b86b0aaf2d6c Mon Sep 17 00:00:00 2001 +From: Alexander Gordeev +Date: Thu, 15 May 2025 15:55:38 +0200 +Subject: kasan: avoid sleepable page allocation from atomic context + +From: Alexander Gordeev + +commit b6ea95a34cbd014ab6ade4248107b86b0aaf2d6c upstream. + +apply_to_pte_range() enters the lazy MMU mode and then invokes +kasan_populate_vmalloc_pte() callback on each page table walk iteration. +However, the callback can go into sleep when trying to allocate a single +page, e.g. if an architecutre disables preemption on lazy MMU mode enter. + +On s390 if make arch_enter_lazy_mmu_mode() -> preempt_enable() and +arch_leave_lazy_mmu_mode() -> preempt_disable(), such crash occurs: + +[ 0.663336] BUG: sleeping function called from invalid context at ./include/linux/sched/mm.h:321 +[ 0.663348] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2, name: kthreadd +[ 0.663358] preempt_count: 1, expected: 0 +[ 0.663366] RCU nest depth: 0, expected: 0 +[ 0.663375] no locks held by kthreadd/2. +[ 0.663383] Preemption disabled at: +[ 0.663386] [<0002f3284cbb4eda>] apply_to_pte_range+0xfa/0x4a0 +[ 0.663405] CPU: 0 UID: 0 PID: 2 Comm: kthreadd Not tainted 6.15.0-rc5-gcc-kasan-00043-gd76bb1ebb558-dirty #162 PREEMPT +[ 0.663408] Hardware name: IBM 3931 A01 701 (KVM/Linux) +[ 0.663409] Call Trace: +[ 0.663410] [<0002f3284c385f58>] dump_stack_lvl+0xe8/0x140 +[ 0.663413] [<0002f3284c507b9e>] __might_resched+0x66e/0x700 +[ 0.663415] [<0002f3284cc4f6c0>] __alloc_frozen_pages_noprof+0x370/0x4b0 +[ 0.663419] [<0002f3284ccc73c0>] alloc_pages_mpol+0x1a0/0x4a0 +[ 0.663421] [<0002f3284ccc8518>] alloc_frozen_pages_noprof+0x88/0xc0 +[ 0.663424] [<0002f3284ccc8572>] alloc_pages_noprof+0x22/0x120 +[ 0.663427] [<0002f3284cc341ac>] get_free_pages_noprof+0x2c/0xc0 +[ 0.663429] [<0002f3284cceba70>] kasan_populate_vmalloc_pte+0x50/0x120 +[ 0.663433] [<0002f3284cbb4ef8>] apply_to_pte_range+0x118/0x4a0 +[ 0.663435] [<0002f3284cbc7c14>] apply_to_pmd_range+0x194/0x3e0 +[ 0.663437] [<0002f3284cbc99be>] __apply_to_page_range+0x2fe/0x7a0 +[ 0.663440] [<0002f3284cbc9e88>] apply_to_page_range+0x28/0x40 +[ 0.663442] [<0002f3284ccebf12>] kasan_populate_vmalloc+0x82/0xa0 +[ 0.663445] [<0002f3284cc1578c>] alloc_vmap_area+0x34c/0xc10 +[ 0.663448] [<0002f3284cc1c2a6>] __get_vm_area_node+0x186/0x2a0 +[ 0.663451] [<0002f3284cc1e696>] __vmalloc_node_range_noprof+0x116/0x310 +[ 0.663454] [<0002f3284cc1d950>] __vmalloc_node_noprof+0xd0/0x110 +[ 0.663457] [<0002f3284c454b88>] alloc_thread_stack_node+0xf8/0x330 +[ 0.663460] [<0002f3284c458d56>] dup_task_struct+0x66/0x4d0 +[ 0.663463] [<0002f3284c45be90>] copy_process+0x280/0x4b90 +[ 0.663465] [<0002f3284c460940>] kernel_clone+0xd0/0x4b0 +[ 0.663467] [<0002f3284c46115e>] kernel_thread+0xbe/0xe0 +[ 0.663469] [<0002f3284c4e440e>] kthreadd+0x50e/0x7f0 +[ 0.663472] [<0002f3284c38c04a>] __ret_from_fork+0x8a/0xf0 +[ 0.663475] [<0002f3284ed57ff2>] ret_from_fork+0xa/0x38 + +Instead of allocating single pages per-PTE, bulk-allocate the shadow +memory prior to applying kasan_populate_vmalloc_pte() callback on a page +range. + +Link: https://lkml.kernel.org/r/c61d3560297c93ed044f0b1af085610353a06a58.1747316918.git.agordeev@linux.ibm.com +Fixes: 3c5c3cfb9ef4 ("kasan: support backing vmalloc space with real shadow memory") +Signed-off-by: Alexander Gordeev +Suggested-by: Andrey Ryabinin +Reviewed-by: Harry Yoo +Cc: Daniel Axtens +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/kasan/shadow.c | 92 +++++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 78 insertions(+), 14 deletions(-) + +--- a/mm/kasan/shadow.c ++++ b/mm/kasan/shadow.c +@@ -292,33 +292,99 @@ void __init __weak kasan_populate_early_ + { + } + ++struct vmalloc_populate_data { ++ unsigned long start; ++ struct page **pages; ++}; ++ + static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, +- void *unused) ++ void *_data) + { +- unsigned long page; ++ struct vmalloc_populate_data *data = _data; ++ struct page *page; + pte_t pte; ++ int index; + + if (likely(!pte_none(ptep_get(ptep)))) + return 0; + +- page = __get_free_page(GFP_KERNEL); +- if (!page) +- return -ENOMEM; +- +- __memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE); +- pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL); ++ index = PFN_DOWN(addr - data->start); ++ page = data->pages[index]; ++ __memset(page_to_virt(page), KASAN_VMALLOC_INVALID, PAGE_SIZE); ++ pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL); + + spin_lock(&init_mm.page_table_lock); + if (likely(pte_none(ptep_get(ptep)))) { + set_pte_at(&init_mm, addr, ptep, pte); +- page = 0; ++ data->pages[index] = NULL; + } + spin_unlock(&init_mm.page_table_lock); +- if (page) +- free_page(page); ++ ++ return 0; ++} ++ ++static void ___free_pages_bulk(struct page **pages, int nr_pages) ++{ ++ int i; ++ ++ for (i = 0; i < nr_pages; i++) { ++ if (pages[i]) { ++ __free_pages(pages[i], 0); ++ pages[i] = NULL; ++ } ++ } ++} ++ ++static int ___alloc_pages_bulk(struct page **pages, int nr_pages) ++{ ++ unsigned long nr_populated, nr_total = nr_pages; ++ struct page **page_array = pages; ++ ++ while (nr_pages) { ++ nr_populated = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages); ++ if (!nr_populated) { ++ ___free_pages_bulk(page_array, nr_total - nr_pages); ++ return -ENOMEM; ++ } ++ pages += nr_populated; ++ nr_pages -= nr_populated; ++ } ++ + return 0; + } + ++static int __kasan_populate_vmalloc(unsigned long start, unsigned long end) ++{ ++ unsigned long nr_pages, nr_total = PFN_UP(end - start); ++ struct vmalloc_populate_data data; ++ int ret = 0; ++ ++ data.pages = (struct page **)__get_free_page(GFP_KERNEL | __GFP_ZERO); ++ if (!data.pages) ++ return -ENOMEM; ++ ++ while (nr_total) { ++ nr_pages = min(nr_total, PAGE_SIZE / sizeof(data.pages[0])); ++ ret = ___alloc_pages_bulk(data.pages, nr_pages); ++ if (ret) ++ break; ++ ++ data.start = start; ++ ret = apply_to_page_range(&init_mm, start, nr_pages * PAGE_SIZE, ++ kasan_populate_vmalloc_pte, &data); ++ ___free_pages_bulk(data.pages, nr_pages); ++ if (ret) ++ break; ++ ++ start += nr_pages * PAGE_SIZE; ++ nr_total -= nr_pages; ++ } ++ ++ free_page((unsigned long)data.pages); ++ ++ return ret; ++} ++ + int kasan_populate_vmalloc(unsigned long addr, unsigned long size) + { + unsigned long shadow_start, shadow_end; +@@ -348,9 +414,7 @@ int kasan_populate_vmalloc(unsigned long + shadow_start = PAGE_ALIGN_DOWN(shadow_start); + shadow_end = PAGE_ALIGN(shadow_end); + +- ret = apply_to_page_range(&init_mm, shadow_start, +- shadow_end - shadow_start, +- kasan_populate_vmalloc_pte, NULL); ++ ret = __kasan_populate_vmalloc(shadow_start, shadow_end); + if (ret) + return ret; + diff --git a/queue-6.12/memcg-always-call-cond_resched-after-fn.patch b/queue-6.12/memcg-always-call-cond_resched-after-fn.patch new file mode 100644 index 0000000000..ac8df60462 --- /dev/null +++ b/queue-6.12/memcg-always-call-cond_resched-after-fn.patch @@ -0,0 +1,80 @@ +From 06717a7b6c86514dbd6ab322e8083ffaa4db5712 Mon Sep 17 00:00:00 2001 +From: Breno Leitao +Date: Fri, 23 May 2025 10:21:06 -0700 +Subject: memcg: always call cond_resched() after fn() + +From: Breno Leitao + +commit 06717a7b6c86514dbd6ab322e8083ffaa4db5712 upstream. + +I am seeing soft lockup on certain machine types when a cgroup OOMs. This +is happening because killing the process in certain machine might be very +slow, which causes the soft lockup and RCU stalls. This happens usually +when the cgroup has MANY processes and memory.oom.group is set. + +Example I am seeing in real production: + + [462012.244552] Memory cgroup out of memory: Killed process 3370438 (crosvm) .... + .... + [462037.318059] Memory cgroup out of memory: Killed process 4171372 (adb) .... + [462037.348314] watchdog: BUG: soft lockup - CPU#64 stuck for 26s! [stat_manager-ag:1618982] + .... + +Quick look at why this is so slow, it seems to be related to serial flush +for certain machine types. For all the crashes I saw, the target CPU was +at console_flush_all(). + +In the case above, there are thousands of processes in the cgroup, and it +is soft locking up before it reaches the 1024 limit in the code (which +would call the cond_resched()). So, cond_resched() in 1024 blocks is not +sufficient. + +Remove the counter-based conditional rescheduling logic and call +cond_resched() unconditionally after each task iteration, after fn() is +called. This avoids the lockup independently of how slow fn() is. + +Link: https://lkml.kernel.org/r/20250523-memcg_fix-v1-1-ad3eafb60477@debian.org +Fixes: ade81479c7dd ("memcg: fix soft lockup in the OOM process") +Signed-off-by: Breno Leitao +Suggested-by: Rik van Riel +Acked-by: Shakeel Butt +Cc: Michael van der Westhuizen +Cc: Usama Arif +Cc: Pavel Begunkov +Cc: Chen Ridong +Cc: Greg Kroah-Hartman +Cc: Johannes Weiner +Cc: Michal Hocko +Cc: Michal Hocko +Cc: Muchun Song +Cc: Roman Gushchin +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memcontrol.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -1139,7 +1139,6 @@ void mem_cgroup_scan_tasks(struct mem_cg + { + struct mem_cgroup *iter; + int ret = 0; +- int i = 0; + + BUG_ON(mem_cgroup_is_root(memcg)); + +@@ -1149,10 +1148,9 @@ void mem_cgroup_scan_tasks(struct mem_cg + + css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it); + while (!ret && (task = css_task_iter_next(&it))) { +- /* Avoid potential softlockup warning */ +- if ((++i & 1023) == 0) +- cond_resched(); + ret = fn(task, arg); ++ /* Avoid potential softlockup warning */ ++ cond_resched(); + } + css_task_iter_end(&it); + if (ret) { diff --git a/queue-6.12/mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch b/queue-6.12/mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch new file mode 100644 index 0000000000..d91436a478 --- /dev/null +++ b/queue-6.12/mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch @@ -0,0 +1,47 @@ +From 7190b3c8bd2b0cde483bd440cf91ba1c518b4261 Mon Sep 17 00:00:00 2001 +From: Ignacio Moreno Gonzalez +Date: Wed, 7 May 2025 15:28:06 +0200 +Subject: mm: mmap: map MAP_STACK to VM_NOHUGEPAGE only if THP is enabled + +From: Ignacio Moreno Gonzalez + +commit 7190b3c8bd2b0cde483bd440cf91ba1c518b4261 upstream. + +commit c4608d1bf7c6 ("mm: mmap: map MAP_STACK to VM_NOHUGEPAGE") maps the +mmap option MAP_STACK to VM_NOHUGEPAGE. This is also done if +CONFIG_TRANSPARENT_HUGEPAGE is not defined. But in that case, the +VM_NOHUGEPAGE does not make sense. + +I discovered this issue when trying to use the tool CRIU to checkpoint and +restore a container. Our running kernel is compiled without +CONFIG_TRANSPARENT_HUGEPAGE. CRIU parses the output of /proc//smaps +and saves the "nh" flag. When trying to restore the container, CRIU fails +to restore the "nh" mappings, since madvise() MADV_NOHUGEPAGE always +returns an error because CONFIG_TRANSPARENT_HUGEPAGE is not defined. + +Link: https://lkml.kernel.org/r/20250507-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled-v5-1-c6c38cfefd6e@kuka.com +Fixes: c4608d1bf7c6 ("mm: mmap: map MAP_STACK to VM_NOHUGEPAGE") +Signed-off-by: Ignacio Moreno Gonzalez +Acked-by: David Hildenbrand +Reviewed-by: Lorenzo Stoakes +Reviewed-by: Yang Shi +Reviewed-by: Liam R. Howlett +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mman.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/include/linux/mman.h ++++ b/include/linux/mman.h +@@ -157,7 +157,9 @@ calc_vm_flag_bits(struct file *file, uns + return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | + _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | + _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE + _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | ++#endif + arch_calc_vm_flag_bits(file, flags); + } + diff --git a/queue-6.12/mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch b/queue-6.12/mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch new file mode 100644 index 0000000000..2993b7d1c8 --- /dev/null +++ b/queue-6.12/mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch @@ -0,0 +1,79 @@ +From e05741fb10c38d70bbd7ec12b23c197b6355d519 Mon Sep 17 00:00:00 2001 +From: Tianyang Zhang +Date: Wed, 16 Apr 2025 16:24:05 +0800 +Subject: mm/page_alloc.c: avoid infinite retries caused by cpuset race + +From: Tianyang Zhang + +commit e05741fb10c38d70bbd7ec12b23c197b6355d519 upstream. + +__alloc_pages_slowpath has no change detection for ac->nodemask in the +part of retry path, while cpuset can modify it in parallel. For some +processes that set mempolicy as MPOL_BIND, this results ac->nodemask +changes, and then the should_reclaim_retry will judge based on the latest +nodemask and jump to retry, while the get_page_from_freelist only +traverses the zonelist from ac->preferred_zoneref, which selected by a +expired nodemask and may cause infinite retries in some cases + +cpu 64: +__alloc_pages_slowpath { + /* ..... */ +retry: + /* ac->nodemask = 0x1, ac->preferred->zone->nid = 1 */ + if (alloc_flags & ALLOC_KSWAPD) + wake_all_kswapds(order, gfp_mask, ac); + /* cpu 1: + cpuset_write_resmask + update_nodemask + update_nodemasks_hier + update_tasks_nodemask + mpol_rebind_task + mpol_rebind_policy + mpol_rebind_nodemask + // mempolicy->nodes has been modified, + // which ac->nodemask point to + + */ + /* ac->nodemask = 0x3, ac->preferred->zone->nid = 1 */ + if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, + did_some_progress > 0, &no_progress_loops)) + goto retry; +} + +Simultaneously starting multiple cpuset01 from LTP can quickly reproduce +this issue on a multi node server when the maximum memory pressure is +reached and the swap is enabled + +Link: https://lkml.kernel.org/r/20250416082405.20988-1-zhangtianyang@loongson.cn +Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice") +Signed-off-by: Tianyang Zhang +Reviewed-by: Suren Baghdasaryan +Reviewed-by: Vlastimil Babka +Cc: Michal Hocko +Cc: Brendan Jackman +Cc: Johannes Weiner +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_alloc.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -4381,6 +4381,14 @@ restart: + } + + retry: ++ /* ++ * Deal with possible cpuset update races or zonelist updates to avoid ++ * infinite retries. ++ */ ++ if (check_retry_cpuset(cpuset_mems_cookie, ac) || ++ check_retry_zonelist(zonelist_iter_cookie)) ++ goto restart; ++ + /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */ + if (alloc_flags & ALLOC_KSWAPD) + wake_all_kswapds(order, gfp_mask, ac); diff --git a/queue-6.12/mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch b/queue-6.12/mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch new file mode 100644 index 0000000000..6246796f94 --- /dev/null +++ b/queue-6.12/mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch @@ -0,0 +1,45 @@ +From f7a35a3c36d1e36059c5654737d9bee3454f01a3 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Thu, 15 May 2025 14:42:15 -0700 +Subject: mm: vmalloc: actually use the in-place vrealloc region + +From: Kees Cook + +commit f7a35a3c36d1e36059c5654737d9bee3454f01a3 upstream. + +Patch series "mm: vmalloc: Actually use the in-place vrealloc region". + +This fixes a performance regression[1] with vrealloc()[1]. + + +The refactoring to not build a new vmalloc region only actually worked +when shrinking. Actually return the resized area when it grows. Ugh. + +Link: https://lkml.kernel.org/r/20250515214217.619685-1-kees@kernel.org +Fixes: a0309faf1cb0 ("mm: vmalloc: support more granular vrealloc() sizing") +Signed-off-by: Kees Cook +Reported-by: Shung-Hsi Yu +Closes: https://lore.kernel.org/all/20250515-bpf-verifier-slowdown-vwo2meju4cgp2su5ckj@6gi6ssxbnfqg [1] +Tested-by: Eduard Zingerman +Tested-by: Pawan Gupta +Tested-by: Shung-Hsi Yu +Reviewed-by: "Uladzislau Rezki (Sony)" +Reviewed-by: Danilo Krummrich +Cc: "Erhard F." +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmalloc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -4115,6 +4115,7 @@ void *vrealloc_noprof(const void *p, siz + if (want_init_on_alloc(flags)) + memset((void *)p + old_size, 0, size - old_size); + vm->requested_size = size; ++ return (void *)p; + } + + /* TODO: Grow the vm_area, i.e. allocate and map additional pages. */ diff --git a/queue-6.12/mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch b/queue-6.12/mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch new file mode 100644 index 0000000000..a70d172ebf --- /dev/null +++ b/queue-6.12/mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch @@ -0,0 +1,57 @@ +From 70d1eb031a68cbde4eed8099674be21778441c94 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Thu, 15 May 2025 14:42:16 -0700 +Subject: mm: vmalloc: only zero-init on vrealloc shrink + +From: Kees Cook + +commit 70d1eb031a68cbde4eed8099674be21778441c94 upstream. + +The common case is to grow reallocations, and since init_on_alloc will +have already zeroed the whole allocation, we only need to zero when +shrinking the allocation. + +Link: https://lkml.kernel.org/r/20250515214217.619685-2-kees@kernel.org +Fixes: a0309faf1cb0 ("mm: vmalloc: support more granular vrealloc() sizing") +Signed-off-by: Kees Cook +Tested-by: Pawan Gupta +Cc: Danilo Krummrich +Cc: Eduard Zingerman +Cc: "Erhard F." +Cc: Shung-Hsi Yu +Cc: "Uladzislau Rezki (Sony)" +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmalloc.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -4097,8 +4097,8 @@ void *vrealloc_noprof(const void *p, siz + * would be a good heuristic for when to shrink the vm_area? + */ + if (size <= old_size) { +- /* Zero out "freed" memory. */ +- if (want_init_on_free()) ++ /* Zero out "freed" memory, potentially for future realloc. */ ++ if (want_init_on_free() || want_init_on_alloc(flags)) + memset((void *)p + size, 0, old_size - size); + vm->requested_size = size; + kasan_poison_vmalloc(p + size, old_size - size); +@@ -4111,9 +4111,11 @@ void *vrealloc_noprof(const void *p, siz + if (size <= alloced_size) { + kasan_unpoison_vmalloc(p + old_size, size - old_size, + KASAN_VMALLOC_PROT_NORMAL); +- /* Zero out "alloced" memory. */ +- if (want_init_on_alloc(flags)) +- memset((void *)p + old_size, 0, size - old_size); ++ /* ++ * No need to zero memory here, as unused memory will have ++ * already been zeroed at initial allocation time or during ++ * realloc shrink time. ++ */ + vm->requested_size = size; + return (void *)p; + } diff --git a/queue-6.12/nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch b/queue-6.12/nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch new file mode 100644 index 0000000000..bcceb9e31a --- /dev/null +++ b/queue-6.12/nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch @@ -0,0 +1,67 @@ +From fb881cd7604536b17a1927fb0533f9a6982ffcc5 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Sat, 3 May 2025 14:33:14 +0900 +Subject: nilfs2: fix deadlock warnings caused by lock dependency in init_nilfs() + +From: Ryusuke Konishi + +commit fb881cd7604536b17a1927fb0533f9a6982ffcc5 upstream. + +After commit c0e473a0d226 ("block: fix race between set_blocksize and read +paths") was merged, set_blocksize() called by sb_set_blocksize() now locks +the inode of the backing device file. As a result of this change, syzbot +started reporting deadlock warnings due to a circular dependency involving +the semaphore "ns_sem" of the nilfs object, the inode lock of the backing +device file, and the locks that this inode lock is transitively dependent +on. + +This is caused by a new lock dependency added by the above change, since +init_nilfs() calls sb_set_blocksize() in the lock section of "ns_sem". +However, these warnings are false positives because init_nilfs() is called +in the early stage of the mount operation and the filesystem has not yet +started. + +The reason why "ns_sem" is locked in init_nilfs() was to avoid a race +condition in nilfs_fill_super() caused by sharing a nilfs object among +multiple filesystem instances (super block structures) in the early +implementation. However, nilfs objects and super block structures have +long ago become one-to-one, and there is no longer any need to use the +semaphore there. + +So, fix this issue by removing the use of the semaphore "ns_sem" in +init_nilfs(). + +Link: https://lkml.kernel.org/r/20250503053327.12294-1-konishi.ryusuke@gmail.com +Fixes: c0e473a0d226 ("block: fix race between set_blocksize and read paths") +Signed-off-by: Ryusuke Konishi +Reported-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=00f7f5b884b117ee6773 +Tested-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com +Reported-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=f30591e72bfc24d4715b +Tested-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com> +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/the_nilfs.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/fs/nilfs2/the_nilfs.c ++++ b/fs/nilfs2/the_nilfs.c +@@ -693,8 +693,6 @@ int init_nilfs(struct the_nilfs *nilfs, + int blocksize; + int err; + +- down_write(&nilfs->ns_sem); +- + blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); + if (!blocksize) { + nilfs_err(sb, "unable to set blocksize"); +@@ -767,7 +765,6 @@ int init_nilfs(struct the_nilfs *nilfs, + set_nilfs_init(nilfs); + err = 0; + out: +- up_write(&nilfs->ns_sem); + return err; + + failed_sbh: diff --git a/queue-6.12/series b/queue-6.12/series index d374ade235..7816e3cd90 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -600,3 +600,13 @@ smb-client-reset-all-search-buffer-pointers-when-releasing-buffer.patch revert-drm-amd-keep-display-off-while-going-into-s4.patch input-xpad-add-more-controllers.patch input-synaptics-rmi-fix-crash-with-unsupported-versions-of-f34.patch +highmem-add-folio_test_partial_kmap.patch +kasan-avoid-sleepable-page-allocation-from-atomic-context.patch +memcg-always-call-cond_resched-after-fn.patch +mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch +mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch +mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch +mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch +nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch +bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch +bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch -- 2.47.3