--- /dev/null
+From 12ca42c237756182aad8ab04654c952765cb9061 Mon Sep 17 00:00:00 2001
+From: Suren Baghdasaryan <surenb@google.com>
+Date: Fri, 16 May 2025 17:07:39 -0700
+Subject: alloc_tag: allocate percpu counters for module tags dynamically
+
+From: Suren Baghdasaryan <surenb@google.com>
+
+commit 12ca42c237756182aad8ab04654c952765cb9061 upstream.
+
+When a module gets unloaded it checks whether any of its tags are still in
+use and if so, we keep the memory containing module's allocation tags
+alive until all tags are unused. However percpu counters referenced by
+the tags are freed by free_module(). This will lead to UAF if the memory
+allocated by a module is accessed after module was unloaded.
+
+To fix this we allocate percpu counters for module allocation tags
+dynamically and we keep it alive for tags which are still in use after
+module unloading. This also removes the requirement of a larger
+PERCPU_MODULE_RESERVE when memory allocation profiling is enabled because
+percpu memory for counters does not need to be reserved anymore.
+
+Link: https://lkml.kernel.org/r/20250517000739.5930-1-surenb@google.com
+Fixes: 0db6f8d7820a ("alloc_tag: load module tags into separate contiguous memory")
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Reported-by: David Wang <00107082@163.com>
+Closes: https://lore.kernel.org/all/20250516131246.6244-1-00107082@163.com/
+Tested-by: David Wang <00107082@163.com>
+Cc: Christoph Lameter (Ampere) <cl@gentwo.org>
+Cc: Dennis Zhou <dennis@kernel.org>
+Cc: Kent Overstreet <kent.overstreet@linux.dev>
+Cc: Pasha Tatashin <pasha.tatashin@soleen.com>
+Cc: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/alloc_tag.h | 12 ++++++
+ include/linux/codetag.h | 8 ++--
+ include/linux/percpu.h | 4 --
+ lib/alloc_tag.c | 89 ++++++++++++++++++++++++++++++++++++----------
+ lib/codetag.c | 5 +-
+ 5 files changed, 89 insertions(+), 29 deletions(-)
+
+--- a/include/linux/alloc_tag.h
++++ b/include/linux/alloc_tag.h
+@@ -104,6 +104,16 @@ DECLARE_PER_CPU(struct alloc_tag_counter
+
+ #else /* ARCH_NEEDS_WEAK_PER_CPU */
+
++#ifdef MODULE
++
++#define DEFINE_ALLOC_TAG(_alloc_tag) \
++ static struct alloc_tag _alloc_tag __used __aligned(8) \
++ __section(ALLOC_TAG_SECTION_NAME) = { \
++ .ct = CODE_TAG_INIT, \
++ .counters = NULL };
++
++#else /* MODULE */
++
+ #define DEFINE_ALLOC_TAG(_alloc_tag) \
+ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \
+ static struct alloc_tag _alloc_tag __used __aligned(8) \
+@@ -111,6 +121,8 @@ DECLARE_PER_CPU(struct alloc_tag_counter
+ .ct = CODE_TAG_INIT, \
+ .counters = &_alloc_tag_cntr };
+
++#endif /* MODULE */
++
+ #endif /* ARCH_NEEDS_WEAK_PER_CPU */
+
+ DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT,
+--- a/include/linux/codetag.h
++++ b/include/linux/codetag.h
+@@ -36,10 +36,10 @@ union codetag_ref {
+ struct codetag_type_desc {
+ const char *section;
+ size_t tag_size;
+- void (*module_load)(struct codetag_type *cttype,
+- struct codetag_module *cmod);
+- void (*module_unload)(struct codetag_type *cttype,
+- struct codetag_module *cmod);
++ void (*module_load)(struct module *mod,
++ struct codetag *start, struct codetag *end);
++ void (*module_unload)(struct module *mod,
++ struct codetag *start, struct codetag *end);
+ #ifdef CONFIG_MODULES
+ void (*module_replaced)(struct module *mod, struct module *new_mod);
+ bool (*needs_section_mem)(struct module *mod, unsigned long size);
+--- a/include/linux/percpu.h
++++ b/include/linux/percpu.h
+@@ -15,11 +15,7 @@
+
+ /* enough to cover all DEFINE_PER_CPUs in modules */
+ #ifdef CONFIG_MODULES
+-#ifdef CONFIG_MEM_ALLOC_PROFILING
+-#define PERCPU_MODULE_RESERVE (8 << 13)
+-#else
+ #define PERCPU_MODULE_RESERVE (8 << 10)
+-#endif
+ #else
+ #define PERCPU_MODULE_RESERVE 0
+ #endif
+--- a/lib/alloc_tag.c
++++ b/lib/alloc_tag.c
+@@ -350,18 +350,28 @@ static bool needs_section_mem(struct mod
+ return size >= sizeof(struct alloc_tag);
+ }
+
+-static struct alloc_tag *find_used_tag(struct alloc_tag *from, struct alloc_tag *to)
++static bool clean_unused_counters(struct alloc_tag *start_tag,
++ struct alloc_tag *end_tag)
+ {
+- while (from <= to) {
++ struct alloc_tag *tag;
++ bool ret = true;
++
++ for (tag = start_tag; tag <= end_tag; tag++) {
+ struct alloc_tag_counters counter;
+
+- counter = alloc_tag_read(from);
+- if (counter.bytes)
+- return from;
+- from++;
++ if (!tag->counters)
++ continue;
++
++ counter = alloc_tag_read(tag);
++ if (!counter.bytes) {
++ free_percpu(tag->counters);
++ tag->counters = NULL;
++ } else {
++ ret = false;
++ }
+ }
+
+- return NULL;
++ return ret;
+ }
+
+ /* Called with mod_area_mt locked */
+@@ -371,12 +381,16 @@ static void clean_unused_module_areas_lo
+ struct module *val;
+
+ mas_for_each(&mas, val, module_tags.size) {
++ struct alloc_tag *start_tag;
++ struct alloc_tag *end_tag;
++
+ if (val != &unloaded_mod)
+ continue;
+
+ /* Release area if all tags are unused */
+- if (!find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index),
+- (struct alloc_tag *)(module_tags.start_addr + mas.last)))
++ start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index);
++ end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last);
++ if (clean_unused_counters(start_tag, end_tag))
+ mas_erase(&mas);
+ }
+ }
+@@ -561,7 +575,8 @@ unlock:
+ static void release_module_tags(struct module *mod, bool used)
+ {
+ MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size);
+- struct alloc_tag *tag;
++ struct alloc_tag *start_tag;
++ struct alloc_tag *end_tag;
+ struct module *val;
+
+ mas_lock(&mas);
+@@ -575,15 +590,22 @@ static void release_module_tags(struct m
+ if (!used)
+ goto release_area;
+
+- /* Find out if the area is used */
+- tag = find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index),
+- (struct alloc_tag *)(module_tags.start_addr + mas.last));
+- if (tag) {
+- struct alloc_tag_counters counter = alloc_tag_read(tag);
+-
+- pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n",
+- tag->ct.filename, tag->ct.lineno, tag->ct.modname,
+- tag->ct.function, counter.bytes);
++ start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index);
++ end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last);
++ if (!clean_unused_counters(start_tag, end_tag)) {
++ struct alloc_tag *tag;
++
++ for (tag = start_tag; tag <= end_tag; tag++) {
++ struct alloc_tag_counters counter;
++
++ if (!tag->counters)
++ continue;
++
++ counter = alloc_tag_read(tag);
++ pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n",
++ tag->ct.filename, tag->ct.lineno, tag->ct.modname,
++ tag->ct.function, counter.bytes);
++ }
+ } else {
+ used = false;
+ }
+@@ -596,6 +618,34 @@ out:
+ mas_unlock(&mas);
+ }
+
++static void load_module(struct module *mod, struct codetag *start, struct codetag *stop)
++{
++ /* Allocate module alloc_tag percpu counters */
++ struct alloc_tag *start_tag;
++ struct alloc_tag *stop_tag;
++ struct alloc_tag *tag;
++
++ if (!mod)
++ return;
++
++ start_tag = ct_to_alloc_tag(start);
++ stop_tag = ct_to_alloc_tag(stop);
++ for (tag = start_tag; tag < stop_tag; tag++) {
++ WARN_ON(tag->counters);
++ tag->counters = alloc_percpu(struct alloc_tag_counters);
++ if (!tag->counters) {
++ while (--tag >= start_tag) {
++ free_percpu(tag->counters);
++ tag->counters = NULL;
++ }
++ shutdown_mem_profiling(true);
++ pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s. Memory allocation profiling is disabled!\n",
++ mod->name);
++ break;
++ }
++ }
++}
++
+ static void replace_module(struct module *mod, struct module *new_mod)
+ {
+ MA_STATE(mas, &mod_area_mt, 0, module_tags.size);
+@@ -757,6 +807,7 @@ static int __init alloc_tag_init(void)
+ .needs_section_mem = needs_section_mem,
+ .alloc_section_mem = reserve_module_tags,
+ .free_section_mem = release_module_tags,
++ .module_load = load_module,
+ .module_replaced = replace_module,
+ #endif
+ };
+--- a/lib/codetag.c
++++ b/lib/codetag.c
+@@ -194,7 +194,7 @@ static int codetag_module_init(struct co
+ if (err >= 0) {
+ cttype->count += range_size(cttype, &range);
+ if (cttype->desc.module_load)
+- cttype->desc.module_load(cttype, cmod);
++ cttype->desc.module_load(mod, range.start, range.stop);
+ }
+ up_write(&cttype->mod_lock);
+
+@@ -333,7 +333,8 @@ void codetag_unload_module(struct module
+ }
+ if (found) {
+ if (cttype->desc.module_unload)
+- cttype->desc.module_unload(cttype, cmod);
++ cttype->desc.module_unload(cmod->mod,
++ cmod->range.start, cmod->range.stop);
+
+ cttype->count -= range_size(cttype, &cmod->range);
+ idr_remove(&cttype->mod_idr, mod_id);
--- /dev/null
+From 07e90048e356a29079fbc011cfc2e1fa1d1c5ac9 Mon Sep 17 00:00:00 2001
+From: Chris Lu <chris.lu@mediatek.com>
+Date: Tue, 22 Apr 2025 09:21:55 +0800
+Subject: Bluetooth: btmtksdio: Check function enabled before doing close
+
+From: Chris Lu <chris.lu@mediatek.com>
+
+commit 07e90048e356a29079fbc011cfc2e1fa1d1c5ac9 upstream.
+
+Check BTMTKSDIO_FUNC_ENABLED flag before doing close to prevent
+btmtksdio_close been called twice.
+
+Fixes: 6ac4233afb9a ("Bluetooth: btmtksdio: Prevent enabling interrupts after IRQ handler removal")
+Signed-off-by: Chris Lu <chris.lu@mediatek.com>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/bluetooth/btmtksdio.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/bluetooth/btmtksdio.c
++++ b/drivers/bluetooth/btmtksdio.c
+@@ -723,6 +723,10 @@ static int btmtksdio_close(struct hci_de
+ {
+ struct btmtksdio_dev *bdev = hci_get_drvdata(hdev);
+
++ /* Skip btmtksdio_close if BTMTKSDIO_FUNC_ENABLED isn't set */
++ if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state))
++ return 0;
++
+ sdio_claim_host(bdev->func);
+
+ /* Disable interrupt */
--- /dev/null
+From 0b6d58bc6ea85e57de25c828444928e4a0aa79cb Mon Sep 17 00:00:00 2001
+From: Chris Lu <chris.lu@mediatek.com>
+Date: Tue, 22 Apr 2025 09:21:56 +0800
+Subject: Bluetooth: btmtksdio: Do close if SDIO card removed without close
+
+From: Chris Lu <chris.lu@mediatek.com>
+
+commit 0b6d58bc6ea85e57de25c828444928e4a0aa79cb upstream.
+
+To prevent Bluetooth SDIO card from be physically removed suddenly,
+driver needs to ensure btmtksdio_close is called before
+btmtksdio_remove to disable interrupts and txrx workqueue.
+
+Fixes: 6ac4233afb9a ("Bluetooth: btmtksdio: Prevent enabling interrupts after IRQ handler removal")
+Signed-off-by: Chris Lu <chris.lu@mediatek.com>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/bluetooth/btmtksdio.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/bluetooth/btmtksdio.c
++++ b/drivers/bluetooth/btmtksdio.c
+@@ -1447,11 +1447,15 @@ static void btmtksdio_remove(struct sdio
+ if (!bdev)
+ return;
+
++ hdev = bdev->hdev;
++
++ /* Make sure to call btmtksdio_close before removing sdio card */
++ if (test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state))
++ btmtksdio_close(hdev);
++
+ /* Be consistent the state in btmtksdio_probe */
+ pm_runtime_get_noresume(bdev->dev);
+
+- hdev = bdev->hdev;
+-
+ sdio_set_drvdata(func, NULL);
+ hci_unregister_dev(hdev);
+ hci_free_dev(hdev);
--- /dev/null
+From 97dfbbd135cb5e4426f37ca53a8fa87eaaa4e376 Mon Sep 17 00:00:00 2001
+From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
+Date: Wed, 14 May 2025 18:06:02 +0100
+Subject: highmem: add folio_test_partial_kmap()
+
+From: Matthew Wilcox (Oracle) <willy@infradead.org>
+
+commit 97dfbbd135cb5e4426f37ca53a8fa87eaaa4e376 upstream.
+
+In commit c749d9b7ebbc ("iov_iter: fix copy_page_from_iter_atomic() if
+KMAP_LOCAL_FORCE_MAP"), Hugh correctly noted that if KMAP_LOCAL_FORCE_MAP
+is enabled, we must limit ourselves to PAGE_SIZE bytes per call to
+kmap_local(). The same problem exists in memcpy_from_folio(),
+memcpy_to_folio(), folio_zero_tail(), folio_fill_tail() and
+memcpy_from_file_folio(), so add folio_test_partial_kmap() to do this more
+succinctly.
+
+Link: https://lkml.kernel.org/r/20250514170607.3000994-2-willy@infradead.org
+Fixes: 00cdf76012ab ("mm: add memcpy_from_file_folio()")
+Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/highmem.h | 10 +++++-----
+ include/linux/page-flags.h | 7 +++++++
+ 2 files changed, 12 insertions(+), 5 deletions(-)
+
+--- a/include/linux/highmem.h
++++ b/include/linux/highmem.h
+@@ -461,7 +461,7 @@ static inline void memcpy_from_folio(cha
+ const char *from = kmap_local_folio(folio, offset);
+ size_t chunk = len;
+
+- if (folio_test_highmem(folio) &&
++ if (folio_test_partial_kmap(folio) &&
+ chunk > PAGE_SIZE - offset_in_page(offset))
+ chunk = PAGE_SIZE - offset_in_page(offset);
+ memcpy(to, from, chunk);
+@@ -489,7 +489,7 @@ static inline void memcpy_to_folio(struc
+ char *to = kmap_local_folio(folio, offset);
+ size_t chunk = len;
+
+- if (folio_test_highmem(folio) &&
++ if (folio_test_partial_kmap(folio) &&
+ chunk > PAGE_SIZE - offset_in_page(offset))
+ chunk = PAGE_SIZE - offset_in_page(offset);
+ memcpy(to, from, chunk);
+@@ -522,7 +522,7 @@ static inline __must_check void *folio_z
+ {
+ size_t len = folio_size(folio) - offset;
+
+- if (folio_test_highmem(folio)) {
++ if (folio_test_partial_kmap(folio)) {
+ size_t max = PAGE_SIZE - offset_in_page(offset);
+
+ while (len > max) {
+@@ -560,7 +560,7 @@ static inline void folio_fill_tail(struc
+
+ VM_BUG_ON(offset + len > folio_size(folio));
+
+- if (folio_test_highmem(folio)) {
++ if (folio_test_partial_kmap(folio)) {
+ size_t max = PAGE_SIZE - offset_in_page(offset);
+
+ while (len > max) {
+@@ -597,7 +597,7 @@ static inline size_t memcpy_from_file_fo
+ size_t offset = offset_in_folio(folio, pos);
+ char *from = kmap_local_folio(folio, offset);
+
+- if (folio_test_highmem(folio)) {
++ if (folio_test_partial_kmap(folio)) {
+ offset = offset_in_page(offset);
+ len = min_t(size_t, len, PAGE_SIZE - offset);
+ } else
+--- a/include/linux/page-flags.h
++++ b/include/linux/page-flags.h
+@@ -578,6 +578,13 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE)
+ PAGEFLAG_FALSE(HighMem, highmem)
+ #endif
+
++/* Does kmap_local_folio() only allow access to one page of the folio? */
++#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP
++#define folio_test_partial_kmap(f) true
++#else
++#define folio_test_partial_kmap(f) folio_test_highmem(f)
++#endif
++
+ #ifdef CONFIG_SWAP
+ static __always_inline bool folio_test_swapcache(const struct folio *folio)
+ {
--- /dev/null
+From b6ea95a34cbd014ab6ade4248107b86b0aaf2d6c Mon Sep 17 00:00:00 2001
+From: Alexander Gordeev <agordeev@linux.ibm.com>
+Date: Thu, 15 May 2025 15:55:38 +0200
+Subject: kasan: avoid sleepable page allocation from atomic context
+
+From: Alexander Gordeev <agordeev@linux.ibm.com>
+
+commit b6ea95a34cbd014ab6ade4248107b86b0aaf2d6c upstream.
+
+apply_to_pte_range() enters the lazy MMU mode and then invokes
+kasan_populate_vmalloc_pte() callback on each page table walk iteration.
+However, the callback can go into sleep when trying to allocate a single
+page, e.g. if an architecutre disables preemption on lazy MMU mode enter.
+
+On s390 if make arch_enter_lazy_mmu_mode() -> preempt_enable() and
+arch_leave_lazy_mmu_mode() -> preempt_disable(), such crash occurs:
+
+[ 0.663336] BUG: sleeping function called from invalid context at ./include/linux/sched/mm.h:321
+[ 0.663348] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2, name: kthreadd
+[ 0.663358] preempt_count: 1, expected: 0
+[ 0.663366] RCU nest depth: 0, expected: 0
+[ 0.663375] no locks held by kthreadd/2.
+[ 0.663383] Preemption disabled at:
+[ 0.663386] [<0002f3284cbb4eda>] apply_to_pte_range+0xfa/0x4a0
+[ 0.663405] CPU: 0 UID: 0 PID: 2 Comm: kthreadd Not tainted 6.15.0-rc5-gcc-kasan-00043-gd76bb1ebb558-dirty #162 PREEMPT
+[ 0.663408] Hardware name: IBM 3931 A01 701 (KVM/Linux)
+[ 0.663409] Call Trace:
+[ 0.663410] [<0002f3284c385f58>] dump_stack_lvl+0xe8/0x140
+[ 0.663413] [<0002f3284c507b9e>] __might_resched+0x66e/0x700
+[ 0.663415] [<0002f3284cc4f6c0>] __alloc_frozen_pages_noprof+0x370/0x4b0
+[ 0.663419] [<0002f3284ccc73c0>] alloc_pages_mpol+0x1a0/0x4a0
+[ 0.663421] [<0002f3284ccc8518>] alloc_frozen_pages_noprof+0x88/0xc0
+[ 0.663424] [<0002f3284ccc8572>] alloc_pages_noprof+0x22/0x120
+[ 0.663427] [<0002f3284cc341ac>] get_free_pages_noprof+0x2c/0xc0
+[ 0.663429] [<0002f3284cceba70>] kasan_populate_vmalloc_pte+0x50/0x120
+[ 0.663433] [<0002f3284cbb4ef8>] apply_to_pte_range+0x118/0x4a0
+[ 0.663435] [<0002f3284cbc7c14>] apply_to_pmd_range+0x194/0x3e0
+[ 0.663437] [<0002f3284cbc99be>] __apply_to_page_range+0x2fe/0x7a0
+[ 0.663440] [<0002f3284cbc9e88>] apply_to_page_range+0x28/0x40
+[ 0.663442] [<0002f3284ccebf12>] kasan_populate_vmalloc+0x82/0xa0
+[ 0.663445] [<0002f3284cc1578c>] alloc_vmap_area+0x34c/0xc10
+[ 0.663448] [<0002f3284cc1c2a6>] __get_vm_area_node+0x186/0x2a0
+[ 0.663451] [<0002f3284cc1e696>] __vmalloc_node_range_noprof+0x116/0x310
+[ 0.663454] [<0002f3284cc1d950>] __vmalloc_node_noprof+0xd0/0x110
+[ 0.663457] [<0002f3284c454b88>] alloc_thread_stack_node+0xf8/0x330
+[ 0.663460] [<0002f3284c458d56>] dup_task_struct+0x66/0x4d0
+[ 0.663463] [<0002f3284c45be90>] copy_process+0x280/0x4b90
+[ 0.663465] [<0002f3284c460940>] kernel_clone+0xd0/0x4b0
+[ 0.663467] [<0002f3284c46115e>] kernel_thread+0xbe/0xe0
+[ 0.663469] [<0002f3284c4e440e>] kthreadd+0x50e/0x7f0
+[ 0.663472] [<0002f3284c38c04a>] __ret_from_fork+0x8a/0xf0
+[ 0.663475] [<0002f3284ed57ff2>] ret_from_fork+0xa/0x38
+
+Instead of allocating single pages per-PTE, bulk-allocate the shadow
+memory prior to applying kasan_populate_vmalloc_pte() callback on a page
+range.
+
+Link: https://lkml.kernel.org/r/c61d3560297c93ed044f0b1af085610353a06a58.1747316918.git.agordeev@linux.ibm.com
+Fixes: 3c5c3cfb9ef4 ("kasan: support backing vmalloc space with real shadow memory")
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Suggested-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Reviewed-by: Harry Yoo <harry.yoo@oracle.com>
+Cc: Daniel Axtens <dja@axtens.net>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/kasan/shadow.c | 92 +++++++++++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 78 insertions(+), 14 deletions(-)
+
+--- a/mm/kasan/shadow.c
++++ b/mm/kasan/shadow.c
+@@ -292,33 +292,99 @@ void __init __weak kasan_populate_early_
+ {
+ }
+
++struct vmalloc_populate_data {
++ unsigned long start;
++ struct page **pages;
++};
++
+ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
+- void *unused)
++ void *_data)
+ {
+- unsigned long page;
++ struct vmalloc_populate_data *data = _data;
++ struct page *page;
+ pte_t pte;
++ int index;
+
+ if (likely(!pte_none(ptep_get(ptep))))
+ return 0;
+
+- page = __get_free_page(GFP_KERNEL);
+- if (!page)
+- return -ENOMEM;
+-
+- __memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE);
+- pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL);
++ index = PFN_DOWN(addr - data->start);
++ page = data->pages[index];
++ __memset(page_to_virt(page), KASAN_VMALLOC_INVALID, PAGE_SIZE);
++ pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL);
+
+ spin_lock(&init_mm.page_table_lock);
+ if (likely(pte_none(ptep_get(ptep)))) {
+ set_pte_at(&init_mm, addr, ptep, pte);
+- page = 0;
++ data->pages[index] = NULL;
+ }
+ spin_unlock(&init_mm.page_table_lock);
+- if (page)
+- free_page(page);
++
++ return 0;
++}
++
++static void ___free_pages_bulk(struct page **pages, int nr_pages)
++{
++ int i;
++
++ for (i = 0; i < nr_pages; i++) {
++ if (pages[i]) {
++ __free_pages(pages[i], 0);
++ pages[i] = NULL;
++ }
++ }
++}
++
++static int ___alloc_pages_bulk(struct page **pages, int nr_pages)
++{
++ unsigned long nr_populated, nr_total = nr_pages;
++ struct page **page_array = pages;
++
++ while (nr_pages) {
++ nr_populated = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages);
++ if (!nr_populated) {
++ ___free_pages_bulk(page_array, nr_total - nr_pages);
++ return -ENOMEM;
++ }
++ pages += nr_populated;
++ nr_pages -= nr_populated;
++ }
++
+ return 0;
+ }
+
++static int __kasan_populate_vmalloc(unsigned long start, unsigned long end)
++{
++ unsigned long nr_pages, nr_total = PFN_UP(end - start);
++ struct vmalloc_populate_data data;
++ int ret = 0;
++
++ data.pages = (struct page **)__get_free_page(GFP_KERNEL | __GFP_ZERO);
++ if (!data.pages)
++ return -ENOMEM;
++
++ while (nr_total) {
++ nr_pages = min(nr_total, PAGE_SIZE / sizeof(data.pages[0]));
++ ret = ___alloc_pages_bulk(data.pages, nr_pages);
++ if (ret)
++ break;
++
++ data.start = start;
++ ret = apply_to_page_range(&init_mm, start, nr_pages * PAGE_SIZE,
++ kasan_populate_vmalloc_pte, &data);
++ ___free_pages_bulk(data.pages, nr_pages);
++ if (ret)
++ break;
++
++ start += nr_pages * PAGE_SIZE;
++ nr_total -= nr_pages;
++ }
++
++ free_page((unsigned long)data.pages);
++
++ return ret;
++}
++
+ int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
+ {
+ unsigned long shadow_start, shadow_end;
+@@ -348,9 +414,7 @@ int kasan_populate_vmalloc(unsigned long
+ shadow_start = PAGE_ALIGN_DOWN(shadow_start);
+ shadow_end = PAGE_ALIGN(shadow_end);
+
+- ret = apply_to_page_range(&init_mm, shadow_start,
+- shadow_end - shadow_start,
+- kasan_populate_vmalloc_pte, NULL);
++ ret = __kasan_populate_vmalloc(shadow_start, shadow_end);
+ if (ret)
+ return ret;
+
--- /dev/null
+From 06717a7b6c86514dbd6ab322e8083ffaa4db5712 Mon Sep 17 00:00:00 2001
+From: Breno Leitao <leitao@debian.org>
+Date: Fri, 23 May 2025 10:21:06 -0700
+Subject: memcg: always call cond_resched() after fn()
+
+From: Breno Leitao <leitao@debian.org>
+
+commit 06717a7b6c86514dbd6ab322e8083ffaa4db5712 upstream.
+
+I am seeing soft lockup on certain machine types when a cgroup OOMs. This
+is happening because killing the process in certain machine might be very
+slow, which causes the soft lockup and RCU stalls. This happens usually
+when the cgroup has MANY processes and memory.oom.group is set.
+
+Example I am seeing in real production:
+
+ [462012.244552] Memory cgroup out of memory: Killed process 3370438 (crosvm) ....
+ ....
+ [462037.318059] Memory cgroup out of memory: Killed process 4171372 (adb) ....
+ [462037.348314] watchdog: BUG: soft lockup - CPU#64 stuck for 26s! [stat_manager-ag:1618982]
+ ....
+
+Quick look at why this is so slow, it seems to be related to serial flush
+for certain machine types. For all the crashes I saw, the target CPU was
+at console_flush_all().
+
+In the case above, there are thousands of processes in the cgroup, and it
+is soft locking up before it reaches the 1024 limit in the code (which
+would call the cond_resched()). So, cond_resched() in 1024 blocks is not
+sufficient.
+
+Remove the counter-based conditional rescheduling logic and call
+cond_resched() unconditionally after each task iteration, after fn() is
+called. This avoids the lockup independently of how slow fn() is.
+
+Link: https://lkml.kernel.org/r/20250523-memcg_fix-v1-1-ad3eafb60477@debian.org
+Fixes: ade81479c7dd ("memcg: fix soft lockup in the OOM process")
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Suggested-by: Rik van Riel <riel@surriel.com>
+Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
+Cc: Michael van der Westhuizen <rmikey@meta.com>
+Cc: Usama Arif <usamaarif642@gmail.com>
+Cc: Pavel Begunkov <asml.silence@gmail.com>
+Cc: Chen Ridong <chenridong@huawei.com>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Muchun Song <muchun.song@linux.dev>
+Cc: Roman Gushchin <roman.gushchin@linux.dev>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memcontrol.c | 6 ++----
+ 1 file changed, 2 insertions(+), 4 deletions(-)
+
+--- a/mm/memcontrol.c
++++ b/mm/memcontrol.c
+@@ -1161,7 +1161,6 @@ void mem_cgroup_scan_tasks(struct mem_cg
+ {
+ struct mem_cgroup *iter;
+ int ret = 0;
+- int i = 0;
+
+ BUG_ON(mem_cgroup_is_root(memcg));
+
+@@ -1171,10 +1170,9 @@ void mem_cgroup_scan_tasks(struct mem_cg
+
+ css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it);
+ while (!ret && (task = css_task_iter_next(&it))) {
+- /* Avoid potential softlockup warning */
+- if ((++i & 1023) == 0)
+- cond_resched();
+ ret = fn(task, arg);
++ /* Avoid potential softlockup warning */
++ cond_resched();
+ }
+ css_task_iter_end(&it);
+ if (ret) {
--- /dev/null
+From 0f518255bde881d2a2605bbc080b438b532b6ab2 Mon Sep 17 00:00:00 2001
+From: Florent Revest <revest@chromium.org>
+Date: Wed, 7 May 2025 15:09:57 +0200
+Subject: mm: fix VM_UFFD_MINOR == VM_SHADOW_STACK on USERFAULTFD=y && ARM64_GCS=y
+
+From: Florent Revest <revest@chromium.org>
+
+commit 0f518255bde881d2a2605bbc080b438b532b6ab2 upstream.
+
+On configs with CONFIG_ARM64_GCS=y, VM_SHADOW_STACK is bit 38. On configs
+with CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y (selected by CONFIG_ARM64 when
+CONFIG_USERFAULTFD=y), VM_UFFD_MINOR is _also_ bit 38.
+
+This bit being shared by two different VMA flags could lead to all sorts
+of unintended behaviors. Presumably, a process could maybe call into
+userfaultfd in a way that disables the shadow stack vma flag. I can't
+think of any attack where this would help (presumably, if an attacker
+tries to disable shadow stacks, they are trying to hijack control flow so
+can't arbitrarily call into userfaultfd yet anyway) but this still feels
+somewhat scary.
+
+Link: https://lkml.kernel.org/r/20250507131000.1204175-2-revest@chromium.org
+Fixes: ae80e1629aea ("mm: Define VM_SHADOW_STACK for arm64 when we support GCS")
+Signed-off-by: Florent Revest <revest@chromium.org>
+Reviewed-by: Mark Brown <broonie@kernel.org>
+Cc: Borislav Betkov <bp@alien8.de>
+Cc: Brendan Jackman <jackmanb@google.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Florent Revest <revest@chromium.org>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Thiago Jung Bauermann <thiago.bauermann@linaro.org>
+Cc: Thomas Gleinxer <tglx@linutronix.de>
+Cc: Will Deacon <will@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -411,7 +411,7 @@ extern unsigned int kobjsize(const void
+ #endif
+
+ #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+-# define VM_UFFD_MINOR_BIT 38
++# define VM_UFFD_MINOR_BIT 41
+ # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */
+ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+ # define VM_UFFD_MINOR VM_NONE
--- /dev/null
+From 113ed54ad276c352ee5ce109bdcf0df118a43bda Mon Sep 17 00:00:00 2001
+From: Ge Yang <yangge1116@126.com>
+Date: Thu, 22 May 2025 11:22:17 +0800
+Subject: mm/hugetlb: fix kernel NULL pointer dereference when replacing free hugetlb folios
+
+From: Ge Yang <yangge1116@126.com>
+
+commit 113ed54ad276c352ee5ce109bdcf0df118a43bda upstream.
+
+A kernel crash was observed when replacing free hugetlb folios:
+
+BUG: kernel NULL pointer dereference, address: 0000000000000028
+PGD 0 P4D 0
+Oops: Oops: 0000 [#1] SMP NOPTI
+CPU: 28 UID: 0 PID: 29639 Comm: test_cma.sh Tainted 6.15.0-rc6-zp #41 PREEMPT(voluntary)
+RIP: 0010:alloc_and_dissolve_hugetlb_folio+0x1d/0x1f0
+RSP: 0018:ffffc9000b30fa90 EFLAGS: 00010286
+RAX: 0000000000000000 RBX: 0000000000342cca RCX: ffffea0043000000
+RDX: ffffc9000b30fb08 RSI: ffffea0043000000 RDI: 0000000000000000
+RBP: ffffc9000b30fb20 R08: 0000000000001000 R09: 0000000000000000
+R10: ffff88886f92eb00 R11: 0000000000000000 R12: ffffea0043000000
+R13: 0000000000000000 R14: 00000000010c0200 R15: 0000000000000004
+FS: 00007fcda5f14740(0000) GS:ffff8888ec1d8000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000000000028 CR3: 0000000391402000 CR4: 0000000000350ef0
+Call Trace:
+<TASK>
+ replace_free_hugepage_folios+0xb6/0x100
+ alloc_contig_range_noprof+0x18a/0x590
+ ? srso_return_thunk+0x5/0x5f
+ ? down_read+0x12/0xa0
+ ? srso_return_thunk+0x5/0x5f
+ cma_range_alloc.constprop.0+0x131/0x290
+ __cma_alloc+0xcf/0x2c0
+ cma_alloc_write+0x43/0xb0
+ simple_attr_write_xsigned.constprop.0.isra.0+0xb2/0x110
+ debugfs_attr_write+0x46/0x70
+ full_proxy_write+0x62/0xa0
+ vfs_write+0xf8/0x420
+ ? srso_return_thunk+0x5/0x5f
+ ? filp_flush+0x86/0xa0
+ ? srso_return_thunk+0x5/0x5f
+ ? filp_close+0x1f/0x30
+ ? srso_return_thunk+0x5/0x5f
+ ? do_dup2+0xaf/0x160
+ ? srso_return_thunk+0x5/0x5f
+ ksys_write+0x65/0xe0
+ do_syscall_64+0x64/0x170
+ entry_SYSCALL_64_after_hwframe+0x76/0x7e
+
+There is a potential race between __update_and_free_hugetlb_folio() and
+replace_free_hugepage_folios():
+
+CPU1 CPU2
+__update_and_free_hugetlb_folio replace_free_hugepage_folios
+ folio_test_hugetlb(folio)
+ -- It's still hugetlb folio.
+
+ __folio_clear_hugetlb(folio)
+ hugetlb_free_folio(folio)
+ h = folio_hstate(folio)
+ -- Here, h is NULL pointer
+
+When the above race condition occurs, folio_hstate(folio) returns NULL,
+and subsequent access to this NULL pointer will cause the system to crash.
+To resolve this issue, execute folio_hstate(folio) under the protection
+of the hugetlb_lock lock, ensuring that folio_hstate(folio) does not
+return NULL.
+
+Link: https://lkml.kernel.org/r/1747884137-26685-1-git-send-email-yangge1116@126.com
+Fixes: 04f13d241b8b ("mm: replace free hugepage folios after migration")
+Signed-off-by: Ge Yang <yangge1116@126.com>
+Reviewed-by: Muchun Song <muchun.song@linux.dev>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: Barry Song <21cnbao@gmail.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -2926,12 +2926,20 @@ int replace_free_hugepage_folios(unsigne
+
+ while (start_pfn < end_pfn) {
+ folio = pfn_folio(start_pfn);
++
++ /*
++ * The folio might have been dissolved from under our feet, so make sure
++ * to carefully check the state under the lock.
++ */
++ spin_lock_irq(&hugetlb_lock);
+ if (folio_test_hugetlb(folio)) {
+ h = folio_hstate(folio);
+ } else {
++ spin_unlock_irq(&hugetlb_lock);
+ start_pfn++;
+ continue;
+ }
++ spin_unlock_irq(&hugetlb_lock);
+
+ if (!folio_ref_count(folio)) {
+ ret = alloc_and_dissolve_hugetlb_folio(h, folio,
--- /dev/null
+From 7190b3c8bd2b0cde483bd440cf91ba1c518b4261 Mon Sep 17 00:00:00 2001
+From: Ignacio Moreno Gonzalez <Ignacio.MorenoGonzalez@kuka.com>
+Date: Wed, 7 May 2025 15:28:06 +0200
+Subject: mm: mmap: map MAP_STACK to VM_NOHUGEPAGE only if THP is enabled
+
+From: Ignacio Moreno Gonzalez <Ignacio.MorenoGonzalez@kuka.com>
+
+commit 7190b3c8bd2b0cde483bd440cf91ba1c518b4261 upstream.
+
+commit c4608d1bf7c6 ("mm: mmap: map MAP_STACK to VM_NOHUGEPAGE") maps the
+mmap option MAP_STACK to VM_NOHUGEPAGE. This is also done if
+CONFIG_TRANSPARENT_HUGEPAGE is not defined. But in that case, the
+VM_NOHUGEPAGE does not make sense.
+
+I discovered this issue when trying to use the tool CRIU to checkpoint and
+restore a container. Our running kernel is compiled without
+CONFIG_TRANSPARENT_HUGEPAGE. CRIU parses the output of /proc/<pid>/smaps
+and saves the "nh" flag. When trying to restore the container, CRIU fails
+to restore the "nh" mappings, since madvise() MADV_NOHUGEPAGE always
+returns an error because CONFIG_TRANSPARENT_HUGEPAGE is not defined.
+
+Link: https://lkml.kernel.org/r/20250507-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled-v5-1-c6c38cfefd6e@kuka.com
+Fixes: c4608d1bf7c6 ("mm: mmap: map MAP_STACK to VM_NOHUGEPAGE")
+Signed-off-by: Ignacio Moreno Gonzalez <Ignacio.MorenoGonzalez@kuka.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Reviewed-by: Yang Shi <yang@os.amperecomputing.com>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mman.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/include/linux/mman.h
++++ b/include/linux/mman.h
+@@ -157,7 +157,9 @@ calc_vm_flag_bits(struct file *file, uns
+ return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
+ _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) |
+ _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) |
++#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) |
++#endif
+ arch_calc_vm_flag_bits(file, flags);
+ }
+
--- /dev/null
+From e05741fb10c38d70bbd7ec12b23c197b6355d519 Mon Sep 17 00:00:00 2001
+From: Tianyang Zhang <zhangtianyang@loongson.cn>
+Date: Wed, 16 Apr 2025 16:24:05 +0800
+Subject: mm/page_alloc.c: avoid infinite retries caused by cpuset race
+
+From: Tianyang Zhang <zhangtianyang@loongson.cn>
+
+commit e05741fb10c38d70bbd7ec12b23c197b6355d519 upstream.
+
+__alloc_pages_slowpath has no change detection for ac->nodemask in the
+part of retry path, while cpuset can modify it in parallel. For some
+processes that set mempolicy as MPOL_BIND, this results ac->nodemask
+changes, and then the should_reclaim_retry will judge based on the latest
+nodemask and jump to retry, while the get_page_from_freelist only
+traverses the zonelist from ac->preferred_zoneref, which selected by a
+expired nodemask and may cause infinite retries in some cases
+
+cpu 64:
+__alloc_pages_slowpath {
+ /* ..... */
+retry:
+ /* ac->nodemask = 0x1, ac->preferred->zone->nid = 1 */
+ if (alloc_flags & ALLOC_KSWAPD)
+ wake_all_kswapds(order, gfp_mask, ac);
+ /* cpu 1:
+ cpuset_write_resmask
+ update_nodemask
+ update_nodemasks_hier
+ update_tasks_nodemask
+ mpol_rebind_task
+ mpol_rebind_policy
+ mpol_rebind_nodemask
+ // mempolicy->nodes has been modified,
+ // which ac->nodemask point to
+
+ */
+ /* ac->nodemask = 0x3, ac->preferred->zone->nid = 1 */
+ if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
+ did_some_progress > 0, &no_progress_loops))
+ goto retry;
+}
+
+Simultaneously starting multiple cpuset01 from LTP can quickly reproduce
+this issue on a multi node server when the maximum memory pressure is
+reached and the swap is enabled
+
+Link: https://lkml.kernel.org/r/20250416082405.20988-1-zhangtianyang@loongson.cn
+Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice")
+Signed-off-by: Tianyang Zhang <zhangtianyang@loongson.cn>
+Reviewed-by: Suren Baghdasaryan <surenb@google.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Brendan Jackman <jackmanb@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/page_alloc.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -4381,6 +4381,14 @@ restart:
+ }
+
+ retry:
++ /*
++ * Deal with possible cpuset update races or zonelist updates to avoid
++ * infinite retries.
++ */
++ if (check_retry_cpuset(cpuset_mems_cookie, ac) ||
++ check_retry_zonelist(zonelist_iter_cookie))
++ goto restart;
++
+ /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */
+ if (alloc_flags & ALLOC_KSWAPD)
+ wake_all_kswapds(order, gfp_mask, ac);
--- /dev/null
+From f7a35a3c36d1e36059c5654737d9bee3454f01a3 Mon Sep 17 00:00:00 2001
+From: Kees Cook <kees@kernel.org>
+Date: Thu, 15 May 2025 14:42:15 -0700
+Subject: mm: vmalloc: actually use the in-place vrealloc region
+
+From: Kees Cook <kees@kernel.org>
+
+commit f7a35a3c36d1e36059c5654737d9bee3454f01a3 upstream.
+
+Patch series "mm: vmalloc: Actually use the in-place vrealloc region".
+
+This fixes a performance regression[1] with vrealloc()[1].
+
+
+The refactoring to not build a new vmalloc region only actually worked
+when shrinking. Actually return the resized area when it grows. Ugh.
+
+Link: https://lkml.kernel.org/r/20250515214217.619685-1-kees@kernel.org
+Fixes: a0309faf1cb0 ("mm: vmalloc: support more granular vrealloc() sizing")
+Signed-off-by: Kees Cook <kees@kernel.org>
+Reported-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
+Closes: https://lore.kernel.org/all/20250515-bpf-verifier-slowdown-vwo2meju4cgp2su5ckj@6gi6ssxbnfqg [1]
+Tested-by: Eduard Zingerman <eddyz87@gmail.com>
+Tested-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Tested-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
+Reviewed-by: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
+Reviewed-by: Danilo Krummrich <dakr@kernel.org>
+Cc: "Erhard F." <erhard_f@mailbox.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmalloc.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -4115,6 +4115,7 @@ void *vrealloc_noprof(const void *p, siz
+ if (want_init_on_alloc(flags))
+ memset((void *)p + old_size, 0, size - old_size);
+ vm->requested_size = size;
++ return (void *)p;
+ }
+
+ /* TODO: Grow the vm_area, i.e. allocate and map additional pages. */
--- /dev/null
+From 70d1eb031a68cbde4eed8099674be21778441c94 Mon Sep 17 00:00:00 2001
+From: Kees Cook <kees@kernel.org>
+Date: Thu, 15 May 2025 14:42:16 -0700
+Subject: mm: vmalloc: only zero-init on vrealloc shrink
+
+From: Kees Cook <kees@kernel.org>
+
+commit 70d1eb031a68cbde4eed8099674be21778441c94 upstream.
+
+The common case is to grow reallocations, and since init_on_alloc will
+have already zeroed the whole allocation, we only need to zero when
+shrinking the allocation.
+
+Link: https://lkml.kernel.org/r/20250515214217.619685-2-kees@kernel.org
+Fixes: a0309faf1cb0 ("mm: vmalloc: support more granular vrealloc() sizing")
+Signed-off-by: Kees Cook <kees@kernel.org>
+Tested-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
+Cc: Danilo Krummrich <dakr@kernel.org>
+Cc: Eduard Zingerman <eddyz87@gmail.com>
+Cc: "Erhard F." <erhard_f@mailbox.org>
+Cc: Shung-Hsi Yu <shung-hsi.yu@suse.com>
+Cc: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/vmalloc.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/mm/vmalloc.c
++++ b/mm/vmalloc.c
+@@ -4097,8 +4097,8 @@ void *vrealloc_noprof(const void *p, siz
+ * would be a good heuristic for when to shrink the vm_area?
+ */
+ if (size <= old_size) {
+- /* Zero out "freed" memory. */
+- if (want_init_on_free())
++ /* Zero out "freed" memory, potentially for future realloc. */
++ if (want_init_on_free() || want_init_on_alloc(flags))
+ memset((void *)p + size, 0, old_size - size);
+ vm->requested_size = size;
+ kasan_poison_vmalloc(p + size, old_size - size);
+@@ -4111,9 +4111,11 @@ void *vrealloc_noprof(const void *p, siz
+ if (size <= alloced_size) {
+ kasan_unpoison_vmalloc(p + old_size, size - old_size,
+ KASAN_VMALLOC_PROT_NORMAL);
+- /* Zero out "alloced" memory. */
+- if (want_init_on_alloc(flags))
+- memset((void *)p + old_size, 0, size - old_size);
++ /*
++ * No need to zero memory here, as unused memory will have
++ * already been zeroed at initial allocation time or during
++ * realloc shrink time.
++ */
+ vm->requested_size = size;
+ return (void *)p;
+ }
--- /dev/null
+From 221fcbf77578826fad8f4bfa0530b5b55bf9676a Mon Sep 17 00:00:00 2001
+From: David Wang <00107082@163.com>
+Date: Tue, 20 May 2025 00:38:23 +0800
+Subject: module: release codetag section when module load fails
+
+From: David Wang <00107082@163.com>
+
+commit 221fcbf77578826fad8f4bfa0530b5b55bf9676a upstream.
+
+When module load fails after memory for codetag section is ready, codetag
+section memory will not be properly released. This causes memory leak,
+and if next module load happens to get the same module address, codetag
+may pick the uninitialized section when manipulating tags during module
+unload, and leads to "unable to handle page fault" BUG.
+
+Link: https://lkml.kernel.org/r/20250519163823.7540-1-00107082@163.com
+Fixes: 0db6f8d7820a ("alloc_tag: load module tags into separate contiguous memory")
+Closes: https://lore.kernel.org/all/20250516131246.6244-1-00107082@163.com/
+Signed-off-by: David Wang <00107082@163.com>
+Acked-by: Suren Baghdasaryan <surenb@google.com>
+Cc: Petr Pavlu <petr.pavlu@suse.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/module/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/kernel/module/main.c
++++ b/kernel/module/main.c
+@@ -2852,6 +2852,7 @@ static void module_deallocate(struct mod
+ {
+ percpu_modfree(mod);
+ module_arch_freeing_init(mod);
++ codetag_free_module_sections(mod);
+
+ free_mod_mem(mod);
+ }
--- /dev/null
+From fb881cd7604536b17a1927fb0533f9a6982ffcc5 Mon Sep 17 00:00:00 2001
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Date: Sat, 3 May 2025 14:33:14 +0900
+Subject: nilfs2: fix deadlock warnings caused by lock dependency in init_nilfs()
+
+From: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+
+commit fb881cd7604536b17a1927fb0533f9a6982ffcc5 upstream.
+
+After commit c0e473a0d226 ("block: fix race between set_blocksize and read
+paths") was merged, set_blocksize() called by sb_set_blocksize() now locks
+the inode of the backing device file. As a result of this change, syzbot
+started reporting deadlock warnings due to a circular dependency involving
+the semaphore "ns_sem" of the nilfs object, the inode lock of the backing
+device file, and the locks that this inode lock is transitively dependent
+on.
+
+This is caused by a new lock dependency added by the above change, since
+init_nilfs() calls sb_set_blocksize() in the lock section of "ns_sem".
+However, these warnings are false positives because init_nilfs() is called
+in the early stage of the mount operation and the filesystem has not yet
+started.
+
+The reason why "ns_sem" is locked in init_nilfs() was to avoid a race
+condition in nilfs_fill_super() caused by sharing a nilfs object among
+multiple filesystem instances (super block structures) in the early
+implementation. However, nilfs objects and super block structures have
+long ago become one-to-one, and there is no longer any need to use the
+semaphore there.
+
+So, fix this issue by removing the use of the semaphore "ns_sem" in
+init_nilfs().
+
+Link: https://lkml.kernel.org/r/20250503053327.12294-1-konishi.ryusuke@gmail.com
+Fixes: c0e473a0d226 ("block: fix race between set_blocksize and read paths")
+Signed-off-by: Ryusuke Konishi <konishi.ryusuke@gmail.com>
+Reported-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=00f7f5b884b117ee6773
+Tested-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com
+Reported-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=f30591e72bfc24d4715b
+Tested-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/nilfs2/the_nilfs.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/fs/nilfs2/the_nilfs.c
++++ b/fs/nilfs2/the_nilfs.c
+@@ -705,8 +705,6 @@ int init_nilfs(struct the_nilfs *nilfs,
+ int blocksize;
+ int err;
+
+- down_write(&nilfs->ns_sem);
+-
+ blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE);
+ if (!blocksize) {
+ nilfs_err(sb, "unable to set blocksize");
+@@ -779,7 +777,6 @@ int init_nilfs(struct the_nilfs *nilfs,
+ set_nilfs_init(nilfs);
+ err = 0;
+ out:
+- up_write(&nilfs->ns_sem);
+ return err;
+
+ failed_sbh:
--- /dev/null
+From ca57d1c56f4015d83fe7840b41d74783ee900b28 Mon Sep 17 00:00:00 2001
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Wed, 19 Feb 2025 17:21:14 +0100
+Subject: octeontx2: hide unused label
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit ca57d1c56f4015d83fe7840b41d74783ee900b28 upstream.
+
+A previous patch introduces a build-time warning when CONFIG_DCB
+is disabled:
+
+drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c: In function 'otx2_probe':
+drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c:3217:1: error: label 'err_free_zc_bmap' defined but not used [-Werror=unused-label]
+drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c: In function 'otx2vf_probe':
+drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c:740:1: error: label 'err_free_zc_bmap' defined but not used [-Werror=unused-label]
+
+Add the same #ifdef check around it.
+
+Fixes: efabce290151 ("octeontx2-pf: AF_XDP zero copy receive support")
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Reviewed-by: Suman Ghosh <sumang@marvell.com>
+Link: https://patch.msgid.link/20250219162239.1376865-1-arnd@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 ++
+ drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 ++
+ 2 files changed, 4 insertions(+)
+
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -3214,8 +3214,10 @@ static int otx2_probe(struct pci_dev *pd
+
+ return 0;
+
++#ifdef CONFIG_DCB
+ err_free_zc_bmap:
+ bitmap_free(pf->af_xdp_zc_qidx);
++#endif
+ err_sriov_cleannup:
+ otx2_sriov_vfcfg_cleanup(pf);
+ err_pf_sriov_init:
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c
+@@ -740,8 +740,10 @@ static int otx2vf_probe(struct pci_dev *
+
+ return 0;
+
++#ifdef CONFIG_DCB
+ err_free_zc_bmap:
+ bitmap_free(vf->af_xdp_zc_qidx);
++#endif
+ err_unreg_devlink:
+ otx2_unregister_dl(vf);
+ err_shutdown_tc:
revert-drm-amd-keep-display-off-while-going-into-s4.patch
input-xpad-add-more-controllers.patch
input-synaptics-rmi-fix-crash-with-unsupported-versions-of-f34.patch
+alloc_tag-allocate-percpu-counters-for-module-tags-dynamically.patch
+highmem-add-folio_test_partial_kmap.patch
+kasan-avoid-sleepable-page-allocation-from-atomic-context.patch
+memcg-always-call-cond_resched-after-fn.patch
+mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios.patch
+mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch
+module-release-codetag-section-when-module-load-fails.patch
+taskstats-fix-struct-taskstats-breaks-backward-compatibility-since-version-15.patch
+mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch
+mm-fix-vm_uffd_minor-vm_shadow_stack-on-userfaultfd-y-arm64_gcs-y.patch
+mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch
+mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch
+octeontx2-hide-unused-label.patch
+wifi-mac80211-restore-monitor-for-outgoing-frames.patch
+nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch
+bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch
+bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch
--- /dev/null
+From 0bf2d838de1ffb6d0bb6f8d18a6ccc59b7d9a705 Mon Sep 17 00:00:00 2001
+From: Wang Yaxin <wang.yaxin@zte.com.cn>
+Date: Sat, 10 May 2025 15:54:13 +0800
+Subject: taskstats: fix struct taskstats breaks backward compatibility since version 15
+
+From: Wang Yaxin <wang.yaxin@zte.com.cn>
+
+commit 0bf2d838de1ffb6d0bb6f8d18a6ccc59b7d9a705 upstream.
+
+Problem
+========
+commit 658eb5ab916d ("delayacct: add delay max to record delay peak")
+ - adding more fields
+commit f65c64f311ee ("delayacct: add delay min to record delay peak")
+ - adding more fields
+commit b016d0873777 ("taskstats: modify taskstats version")
+ - version bump to 15
+
+Since version 15 (TASKSTATS_VERSION=15) the new layout of the structure
+adds fields in the middle of the structure, rendering all old software
+incompatible with newer kernels and software compiled against the new
+kernel headers incompatible with older kernels.
+
+Solution
+=========
+move delay max and delay min to the end of taskstat, and bump
+the version to 16 after the change
+
+[wang.yaxin@zte.com.cn: adjust indentation]
+ Link: https://lkml.kernel.org/r/202505192131489882NSciXV4EGd8zzjLuwoOK@zte.com.cn
+Link: https://lkml.kernel.org/r/20250510155413259V4JNRXxukdDgzsaL0Fo6a@zte.com.cn
+Fixes: f65c64f311ee ("delayacct: add delay min to record delay peak")
+Signed-off-by: Wang Yaxin <wang.yaxin@zte.com.cn>
+Signed-off-by: xu xin <xu.xin16@zte.com.cn>
+Signed-off-by: Kun Jiang <jiang.kun2@zte.com.cn>
+Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>
+Cc: Balbir Singh <bsingharora@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/taskstats.h | 47 +++++++++++++++++++++-------------
+ 1 file changed, 29 insertions(+), 18 deletions(-)
+
+diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h
+index 95762232e018..5929030d4e8b 100644
+--- a/include/uapi/linux/taskstats.h
++++ b/include/uapi/linux/taskstats.h
+@@ -34,7 +34,7 @@
+ */
+
+
+-#define TASKSTATS_VERSION 15
++#define TASKSTATS_VERSION 16
+ #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN
+ * in linux/sched.h */
+
+@@ -72,8 +72,6 @@ struct taskstats {
+ */
+ __u64 cpu_count __attribute__((aligned(8)));
+ __u64 cpu_delay_total;
+- __u64 cpu_delay_max;
+- __u64 cpu_delay_min;
+
+ /* Following four fields atomically updated using task->delays->lock */
+
+@@ -82,14 +80,10 @@ struct taskstats {
+ */
+ __u64 blkio_count;
+ __u64 blkio_delay_total;
+- __u64 blkio_delay_max;
+- __u64 blkio_delay_min;
+
+ /* Delay waiting for page fault I/O (swap in only) */
+ __u64 swapin_count;
+ __u64 swapin_delay_total;
+- __u64 swapin_delay_max;
+- __u64 swapin_delay_min;
+
+ /* cpu "wall-clock" running time
+ * On some architectures, value will adjust for cpu time stolen
+@@ -172,14 +166,11 @@ struct taskstats {
+ /* Delay waiting for memory reclaim */
+ __u64 freepages_count;
+ __u64 freepages_delay_total;
+- __u64 freepages_delay_max;
+- __u64 freepages_delay_min;
++
+
+ /* Delay waiting for thrashing page */
+ __u64 thrashing_count;
+ __u64 thrashing_delay_total;
+- __u64 thrashing_delay_max;
+- __u64 thrashing_delay_min;
+
+ /* v10: 64-bit btime to avoid overflow */
+ __u64 ac_btime64; /* 64-bit begin time */
+@@ -187,8 +178,6 @@ struct taskstats {
+ /* v11: Delay waiting for memory compact */
+ __u64 compact_count;
+ __u64 compact_delay_total;
+- __u64 compact_delay_max;
+- __u64 compact_delay_min;
+
+ /* v12 begin */
+ __u32 ac_tgid; /* thread group ID */
+@@ -210,15 +199,37 @@ struct taskstats {
+ /* v13: Delay waiting for write-protect copy */
+ __u64 wpcopy_count;
+ __u64 wpcopy_delay_total;
+- __u64 wpcopy_delay_max;
+- __u64 wpcopy_delay_min;
+
+ /* v14: Delay waiting for IRQ/SOFTIRQ */
+ __u64 irq_count;
+ __u64 irq_delay_total;
+- __u64 irq_delay_max;
+- __u64 irq_delay_min;
+- /* v15: add Delay max */
++
++ /* v15: add Delay max and Delay min */
++
++ /* v16: move Delay max and Delay min to the end of taskstat */
++ __u64 cpu_delay_max;
++ __u64 cpu_delay_min;
++
++ __u64 blkio_delay_max;
++ __u64 blkio_delay_min;
++
++ __u64 swapin_delay_max;
++ __u64 swapin_delay_min;
++
++ __u64 freepages_delay_max;
++ __u64 freepages_delay_min;
++
++ __u64 thrashing_delay_max;
++ __u64 thrashing_delay_min;
++
++ __u64 compact_delay_max;
++ __u64 compact_delay_min;
++
++ __u64 wpcopy_delay_max;
++ __u64 wpcopy_delay_min;
++
++ __u64 irq_delay_max;
++ __u64 irq_delay_min;
+ };
+
+
+--
+2.49.0
+
--- /dev/null
+From abf078c0a322159f5ebe2adaa0cd69dc45b1e710 Mon Sep 17 00:00:00 2001
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Tue, 22 Apr 2025 21:32:51 +0200
+Subject: wifi: mac80211: restore monitor for outgoing frames
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+commit abf078c0a322159f5ebe2adaa0cd69dc45b1e710 upstream.
+
+This code was accidentally dropped during the cooked
+monitor removal, but really should've been simplified
+instead. Add the simple version back.
+
+Fixes: 286e69677065 ("wifi: mac80211: Drop cooked monitor support")
+Link: https://patch.msgid.link/20250422213251.b3d65fd0f323.Id2a6901583f7af86bbe94deb355968b238f350c6@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/mac80211/status.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/net/mac80211/status.c
++++ b/net/mac80211/status.c
+@@ -1085,7 +1085,13 @@ static void __ieee80211_tx_status(struct
+
+ ieee80211_report_used_skb(local, skb, false, status->ack_hwtstamp);
+
+- if (status->free_list)
++ /*
++ * This is a bit racy but we can avoid a lot of work
++ * with this test...
++ */
++ if (local->tx_mntrs)
++ ieee80211_tx_monitor(local, skb, retry_count, status);
++ else if (status->free_list)
+ list_add_tail(&skb->list, status->free_list);
+ else
+ dev_kfree_skb(skb);