From dee9426369374cfd16ca6b66a9e0e97b7d334389 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 26 May 2025 14:18:38 +0200 Subject: [PATCH] 6.14-stable patches added patches: alloc_tag-allocate-percpu-counters-for-module-tags-dynamically.patch bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch highmem-add-folio_test_partial_kmap.patch kasan-avoid-sleepable-page-allocation-from-atomic-context.patch memcg-always-call-cond_resched-after-fn.patch mm-fix-vm_uffd_minor-vm_shadow_stack-on-userfaultfd-y-arm64_gcs-y.patch mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios.patch mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch module-release-codetag-section-when-module-load-fails.patch nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch octeontx2-hide-unused-label.patch taskstats-fix-struct-taskstats-breaks-backward-compatibility-since-version-15.patch wifi-mac80211-restore-monitor-for-outgoing-frames.patch --- ...counters-for-module-tags-dynamically.patch | 265 ++++++++++++++++++ ...-function-enabled-before-doing-close.patch | 33 +++ ...e-if-sdio-card-removed-without-close.patch | 41 +++ .../highmem-add-folio_test_partial_kmap.patch | 93 ++++++ ...-page-allocation-from-atomic-context.patch | 194 +++++++++++++ ...cg-always-call-cond_resched-after-fn.patch | 80 ++++++ ...w_stack-on-userfaultfd-y-arm64_gcs-y.patch | 52 ++++ ...e-when-replacing-free-hugetlb-folios.patch | 107 +++++++ ...vm_nohugepage-only-if-thp-is-enabled.patch | 47 ++++ ...finite-retries-caused-by-cpuset-race.patch | 79 ++++++ ...lly-use-the-in-place-vrealloc-region.patch | 45 +++ ...oc-only-zero-init-on-vrealloc-shrink.patch | 57 ++++ ...detag-section-when-module-load-fails.patch | 38 +++ ...sed-by-lock-dependency-in-init_nilfs.patch | 67 +++++ queue-6.14/octeontx2-hide-unused-label.patch | 56 ++++ queue-6.14/series | 17 ++ ...kward-compatibility-since-version-15.patch | 152 ++++++++++ ...-restore-monitor-for-outgoing-frames.patch | 38 +++ 18 files changed, 1461 insertions(+) create mode 100644 queue-6.14/alloc_tag-allocate-percpu-counters-for-module-tags-dynamically.patch create mode 100644 queue-6.14/bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch create mode 100644 queue-6.14/bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch create mode 100644 queue-6.14/highmem-add-folio_test_partial_kmap.patch create mode 100644 queue-6.14/kasan-avoid-sleepable-page-allocation-from-atomic-context.patch create mode 100644 queue-6.14/memcg-always-call-cond_resched-after-fn.patch create mode 100644 queue-6.14/mm-fix-vm_uffd_minor-vm_shadow_stack-on-userfaultfd-y-arm64_gcs-y.patch create mode 100644 queue-6.14/mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios.patch create mode 100644 queue-6.14/mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch create mode 100644 queue-6.14/mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch create mode 100644 queue-6.14/mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch create mode 100644 queue-6.14/mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch create mode 100644 queue-6.14/module-release-codetag-section-when-module-load-fails.patch create mode 100644 queue-6.14/nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch create mode 100644 queue-6.14/octeontx2-hide-unused-label.patch create mode 100644 queue-6.14/taskstats-fix-struct-taskstats-breaks-backward-compatibility-since-version-15.patch create mode 100644 queue-6.14/wifi-mac80211-restore-monitor-for-outgoing-frames.patch diff --git a/queue-6.14/alloc_tag-allocate-percpu-counters-for-module-tags-dynamically.patch b/queue-6.14/alloc_tag-allocate-percpu-counters-for-module-tags-dynamically.patch new file mode 100644 index 0000000000..829475a6b8 --- /dev/null +++ b/queue-6.14/alloc_tag-allocate-percpu-counters-for-module-tags-dynamically.patch @@ -0,0 +1,265 @@ +From 12ca42c237756182aad8ab04654c952765cb9061 Mon Sep 17 00:00:00 2001 +From: Suren Baghdasaryan +Date: Fri, 16 May 2025 17:07:39 -0700 +Subject: alloc_tag: allocate percpu counters for module tags dynamically + +From: Suren Baghdasaryan + +commit 12ca42c237756182aad8ab04654c952765cb9061 upstream. + +When a module gets unloaded it checks whether any of its tags are still in +use and if so, we keep the memory containing module's allocation tags +alive until all tags are unused. However percpu counters referenced by +the tags are freed by free_module(). This will lead to UAF if the memory +allocated by a module is accessed after module was unloaded. + +To fix this we allocate percpu counters for module allocation tags +dynamically and we keep it alive for tags which are still in use after +module unloading. This also removes the requirement of a larger +PERCPU_MODULE_RESERVE when memory allocation profiling is enabled because +percpu memory for counters does not need to be reserved anymore. + +Link: https://lkml.kernel.org/r/20250517000739.5930-1-surenb@google.com +Fixes: 0db6f8d7820a ("alloc_tag: load module tags into separate contiguous memory") +Signed-off-by: Suren Baghdasaryan +Reported-by: David Wang <00107082@163.com> +Closes: https://lore.kernel.org/all/20250516131246.6244-1-00107082@163.com/ +Tested-by: David Wang <00107082@163.com> +Cc: Christoph Lameter (Ampere) +Cc: Dennis Zhou +Cc: Kent Overstreet +Cc: Pasha Tatashin +Cc: Tejun Heo +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/alloc_tag.h | 12 ++++++ + include/linux/codetag.h | 8 ++-- + include/linux/percpu.h | 4 -- + lib/alloc_tag.c | 89 ++++++++++++++++++++++++++++++++++++---------- + lib/codetag.c | 5 +- + 5 files changed, 89 insertions(+), 29 deletions(-) + +--- a/include/linux/alloc_tag.h ++++ b/include/linux/alloc_tag.h +@@ -104,6 +104,16 @@ DECLARE_PER_CPU(struct alloc_tag_counter + + #else /* ARCH_NEEDS_WEAK_PER_CPU */ + ++#ifdef MODULE ++ ++#define DEFINE_ALLOC_TAG(_alloc_tag) \ ++ static struct alloc_tag _alloc_tag __used __aligned(8) \ ++ __section(ALLOC_TAG_SECTION_NAME) = { \ ++ .ct = CODE_TAG_INIT, \ ++ .counters = NULL }; ++ ++#else /* MODULE */ ++ + #define DEFINE_ALLOC_TAG(_alloc_tag) \ + static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ +@@ -111,6 +121,8 @@ DECLARE_PER_CPU(struct alloc_tag_counter + .ct = CODE_TAG_INIT, \ + .counters = &_alloc_tag_cntr }; + ++#endif /* MODULE */ ++ + #endif /* ARCH_NEEDS_WEAK_PER_CPU */ + + DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, +--- a/include/linux/codetag.h ++++ b/include/linux/codetag.h +@@ -36,10 +36,10 @@ union codetag_ref { + struct codetag_type_desc { + const char *section; + size_t tag_size; +- void (*module_load)(struct codetag_type *cttype, +- struct codetag_module *cmod); +- void (*module_unload)(struct codetag_type *cttype, +- struct codetag_module *cmod); ++ void (*module_load)(struct module *mod, ++ struct codetag *start, struct codetag *end); ++ void (*module_unload)(struct module *mod, ++ struct codetag *start, struct codetag *end); + #ifdef CONFIG_MODULES + void (*module_replaced)(struct module *mod, struct module *new_mod); + bool (*needs_section_mem)(struct module *mod, unsigned long size); +--- a/include/linux/percpu.h ++++ b/include/linux/percpu.h +@@ -15,11 +15,7 @@ + + /* enough to cover all DEFINE_PER_CPUs in modules */ + #ifdef CONFIG_MODULES +-#ifdef CONFIG_MEM_ALLOC_PROFILING +-#define PERCPU_MODULE_RESERVE (8 << 13) +-#else + #define PERCPU_MODULE_RESERVE (8 << 10) +-#endif + #else + #define PERCPU_MODULE_RESERVE 0 + #endif +--- a/lib/alloc_tag.c ++++ b/lib/alloc_tag.c +@@ -350,18 +350,28 @@ static bool needs_section_mem(struct mod + return size >= sizeof(struct alloc_tag); + } + +-static struct alloc_tag *find_used_tag(struct alloc_tag *from, struct alloc_tag *to) ++static bool clean_unused_counters(struct alloc_tag *start_tag, ++ struct alloc_tag *end_tag) + { +- while (from <= to) { ++ struct alloc_tag *tag; ++ bool ret = true; ++ ++ for (tag = start_tag; tag <= end_tag; tag++) { + struct alloc_tag_counters counter; + +- counter = alloc_tag_read(from); +- if (counter.bytes) +- return from; +- from++; ++ if (!tag->counters) ++ continue; ++ ++ counter = alloc_tag_read(tag); ++ if (!counter.bytes) { ++ free_percpu(tag->counters); ++ tag->counters = NULL; ++ } else { ++ ret = false; ++ } + } + +- return NULL; ++ return ret; + } + + /* Called with mod_area_mt locked */ +@@ -371,12 +381,16 @@ static void clean_unused_module_areas_lo + struct module *val; + + mas_for_each(&mas, val, module_tags.size) { ++ struct alloc_tag *start_tag; ++ struct alloc_tag *end_tag; ++ + if (val != &unloaded_mod) + continue; + + /* Release area if all tags are unused */ +- if (!find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), +- (struct alloc_tag *)(module_tags.start_addr + mas.last))) ++ start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index); ++ end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last); ++ if (clean_unused_counters(start_tag, end_tag)) + mas_erase(&mas); + } + } +@@ -561,7 +575,8 @@ unlock: + static void release_module_tags(struct module *mod, bool used) + { + MA_STATE(mas, &mod_area_mt, module_tags.size, module_tags.size); +- struct alloc_tag *tag; ++ struct alloc_tag *start_tag; ++ struct alloc_tag *end_tag; + struct module *val; + + mas_lock(&mas); +@@ -575,15 +590,22 @@ static void release_module_tags(struct m + if (!used) + goto release_area; + +- /* Find out if the area is used */ +- tag = find_used_tag((struct alloc_tag *)(module_tags.start_addr + mas.index), +- (struct alloc_tag *)(module_tags.start_addr + mas.last)); +- if (tag) { +- struct alloc_tag_counters counter = alloc_tag_read(tag); +- +- pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n", +- tag->ct.filename, tag->ct.lineno, tag->ct.modname, +- tag->ct.function, counter.bytes); ++ start_tag = (struct alloc_tag *)(module_tags.start_addr + mas.index); ++ end_tag = (struct alloc_tag *)(module_tags.start_addr + mas.last); ++ if (!clean_unused_counters(start_tag, end_tag)) { ++ struct alloc_tag *tag; ++ ++ for (tag = start_tag; tag <= end_tag; tag++) { ++ struct alloc_tag_counters counter; ++ ++ if (!tag->counters) ++ continue; ++ ++ counter = alloc_tag_read(tag); ++ pr_info("%s:%u module %s func:%s has %llu allocated at module unload\n", ++ tag->ct.filename, tag->ct.lineno, tag->ct.modname, ++ tag->ct.function, counter.bytes); ++ } + } else { + used = false; + } +@@ -596,6 +618,34 @@ out: + mas_unlock(&mas); + } + ++static void load_module(struct module *mod, struct codetag *start, struct codetag *stop) ++{ ++ /* Allocate module alloc_tag percpu counters */ ++ struct alloc_tag *start_tag; ++ struct alloc_tag *stop_tag; ++ struct alloc_tag *tag; ++ ++ if (!mod) ++ return; ++ ++ start_tag = ct_to_alloc_tag(start); ++ stop_tag = ct_to_alloc_tag(stop); ++ for (tag = start_tag; tag < stop_tag; tag++) { ++ WARN_ON(tag->counters); ++ tag->counters = alloc_percpu(struct alloc_tag_counters); ++ if (!tag->counters) { ++ while (--tag >= start_tag) { ++ free_percpu(tag->counters); ++ tag->counters = NULL; ++ } ++ shutdown_mem_profiling(true); ++ pr_err("Failed to allocate memory for allocation tag percpu counters in the module %s. Memory allocation profiling is disabled!\n", ++ mod->name); ++ break; ++ } ++ } ++} ++ + static void replace_module(struct module *mod, struct module *new_mod) + { + MA_STATE(mas, &mod_area_mt, 0, module_tags.size); +@@ -757,6 +807,7 @@ static int __init alloc_tag_init(void) + .needs_section_mem = needs_section_mem, + .alloc_section_mem = reserve_module_tags, + .free_section_mem = release_module_tags, ++ .module_load = load_module, + .module_replaced = replace_module, + #endif + }; +--- a/lib/codetag.c ++++ b/lib/codetag.c +@@ -194,7 +194,7 @@ static int codetag_module_init(struct co + if (err >= 0) { + cttype->count += range_size(cttype, &range); + if (cttype->desc.module_load) +- cttype->desc.module_load(cttype, cmod); ++ cttype->desc.module_load(mod, range.start, range.stop); + } + up_write(&cttype->mod_lock); + +@@ -333,7 +333,8 @@ void codetag_unload_module(struct module + } + if (found) { + if (cttype->desc.module_unload) +- cttype->desc.module_unload(cttype, cmod); ++ cttype->desc.module_unload(cmod->mod, ++ cmod->range.start, cmod->range.stop); + + cttype->count -= range_size(cttype, &cmod->range); + idr_remove(&cttype->mod_idr, mod_id); diff --git a/queue-6.14/bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch b/queue-6.14/bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch new file mode 100644 index 0000000000..5b22e09dce --- /dev/null +++ b/queue-6.14/bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch @@ -0,0 +1,33 @@ +From 07e90048e356a29079fbc011cfc2e1fa1d1c5ac9 Mon Sep 17 00:00:00 2001 +From: Chris Lu +Date: Tue, 22 Apr 2025 09:21:55 +0800 +Subject: Bluetooth: btmtksdio: Check function enabled before doing close + +From: Chris Lu + +commit 07e90048e356a29079fbc011cfc2e1fa1d1c5ac9 upstream. + +Check BTMTKSDIO_FUNC_ENABLED flag before doing close to prevent +btmtksdio_close been called twice. + +Fixes: 6ac4233afb9a ("Bluetooth: btmtksdio: Prevent enabling interrupts after IRQ handler removal") +Signed-off-by: Chris Lu +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bluetooth/btmtksdio.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/bluetooth/btmtksdio.c ++++ b/drivers/bluetooth/btmtksdio.c +@@ -723,6 +723,10 @@ static int btmtksdio_close(struct hci_de + { + struct btmtksdio_dev *bdev = hci_get_drvdata(hdev); + ++ /* Skip btmtksdio_close if BTMTKSDIO_FUNC_ENABLED isn't set */ ++ if (!test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) ++ return 0; ++ + sdio_claim_host(bdev->func); + + /* Disable interrupt */ diff --git a/queue-6.14/bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch b/queue-6.14/bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch new file mode 100644 index 0000000000..82d20c2722 --- /dev/null +++ b/queue-6.14/bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch @@ -0,0 +1,41 @@ +From 0b6d58bc6ea85e57de25c828444928e4a0aa79cb Mon Sep 17 00:00:00 2001 +From: Chris Lu +Date: Tue, 22 Apr 2025 09:21:56 +0800 +Subject: Bluetooth: btmtksdio: Do close if SDIO card removed without close + +From: Chris Lu + +commit 0b6d58bc6ea85e57de25c828444928e4a0aa79cb upstream. + +To prevent Bluetooth SDIO card from be physically removed suddenly, +driver needs to ensure btmtksdio_close is called before +btmtksdio_remove to disable interrupts and txrx workqueue. + +Fixes: 6ac4233afb9a ("Bluetooth: btmtksdio: Prevent enabling interrupts after IRQ handler removal") +Signed-off-by: Chris Lu +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bluetooth/btmtksdio.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/drivers/bluetooth/btmtksdio.c ++++ b/drivers/bluetooth/btmtksdio.c +@@ -1447,11 +1447,15 @@ static void btmtksdio_remove(struct sdio + if (!bdev) + return; + ++ hdev = bdev->hdev; ++ ++ /* Make sure to call btmtksdio_close before removing sdio card */ ++ if (test_bit(BTMTKSDIO_FUNC_ENABLED, &bdev->tx_state)) ++ btmtksdio_close(hdev); ++ + /* Be consistent the state in btmtksdio_probe */ + pm_runtime_get_noresume(bdev->dev); + +- hdev = bdev->hdev; +- + sdio_set_drvdata(func, NULL); + hci_unregister_dev(hdev); + hci_free_dev(hdev); diff --git a/queue-6.14/highmem-add-folio_test_partial_kmap.patch b/queue-6.14/highmem-add-folio_test_partial_kmap.patch new file mode 100644 index 0000000000..e8418274f3 --- /dev/null +++ b/queue-6.14/highmem-add-folio_test_partial_kmap.patch @@ -0,0 +1,93 @@ +From 97dfbbd135cb5e4426f37ca53a8fa87eaaa4e376 Mon Sep 17 00:00:00 2001 +From: "Matthew Wilcox (Oracle)" +Date: Wed, 14 May 2025 18:06:02 +0100 +Subject: highmem: add folio_test_partial_kmap() + +From: Matthew Wilcox (Oracle) + +commit 97dfbbd135cb5e4426f37ca53a8fa87eaaa4e376 upstream. + +In commit c749d9b7ebbc ("iov_iter: fix copy_page_from_iter_atomic() if +KMAP_LOCAL_FORCE_MAP"), Hugh correctly noted that if KMAP_LOCAL_FORCE_MAP +is enabled, we must limit ourselves to PAGE_SIZE bytes per call to +kmap_local(). The same problem exists in memcpy_from_folio(), +memcpy_to_folio(), folio_zero_tail(), folio_fill_tail() and +memcpy_from_file_folio(), so add folio_test_partial_kmap() to do this more +succinctly. + +Link: https://lkml.kernel.org/r/20250514170607.3000994-2-willy@infradead.org +Fixes: 00cdf76012ab ("mm: add memcpy_from_file_folio()") +Signed-off-by: Matthew Wilcox (Oracle) +Cc: Al Viro +Cc: Hugh Dickins +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/highmem.h | 10 +++++----- + include/linux/page-flags.h | 7 +++++++ + 2 files changed, 12 insertions(+), 5 deletions(-) + +--- a/include/linux/highmem.h ++++ b/include/linux/highmem.h +@@ -461,7 +461,7 @@ static inline void memcpy_from_folio(cha + const char *from = kmap_local_folio(folio, offset); + size_t chunk = len; + +- if (folio_test_highmem(folio) && ++ if (folio_test_partial_kmap(folio) && + chunk > PAGE_SIZE - offset_in_page(offset)) + chunk = PAGE_SIZE - offset_in_page(offset); + memcpy(to, from, chunk); +@@ -489,7 +489,7 @@ static inline void memcpy_to_folio(struc + char *to = kmap_local_folio(folio, offset); + size_t chunk = len; + +- if (folio_test_highmem(folio) && ++ if (folio_test_partial_kmap(folio) && + chunk > PAGE_SIZE - offset_in_page(offset)) + chunk = PAGE_SIZE - offset_in_page(offset); + memcpy(to, from, chunk); +@@ -522,7 +522,7 @@ static inline __must_check void *folio_z + { + size_t len = folio_size(folio) - offset; + +- if (folio_test_highmem(folio)) { ++ if (folio_test_partial_kmap(folio)) { + size_t max = PAGE_SIZE - offset_in_page(offset); + + while (len > max) { +@@ -560,7 +560,7 @@ static inline void folio_fill_tail(struc + + VM_BUG_ON(offset + len > folio_size(folio)); + +- if (folio_test_highmem(folio)) { ++ if (folio_test_partial_kmap(folio)) { + size_t max = PAGE_SIZE - offset_in_page(offset); + + while (len > max) { +@@ -597,7 +597,7 @@ static inline size_t memcpy_from_file_fo + size_t offset = offset_in_folio(folio, pos); + char *from = kmap_local_folio(folio, offset); + +- if (folio_test_highmem(folio)) { ++ if (folio_test_partial_kmap(folio)) { + offset = offset_in_page(offset); + len = min_t(size_t, len, PAGE_SIZE - offset); + } else +--- a/include/linux/page-flags.h ++++ b/include/linux/page-flags.h +@@ -578,6 +578,13 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) + PAGEFLAG_FALSE(HighMem, highmem) + #endif + ++/* Does kmap_local_folio() only allow access to one page of the folio? */ ++#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP ++#define folio_test_partial_kmap(f) true ++#else ++#define folio_test_partial_kmap(f) folio_test_highmem(f) ++#endif ++ + #ifdef CONFIG_SWAP + static __always_inline bool folio_test_swapcache(const struct folio *folio) + { diff --git a/queue-6.14/kasan-avoid-sleepable-page-allocation-from-atomic-context.patch b/queue-6.14/kasan-avoid-sleepable-page-allocation-from-atomic-context.patch new file mode 100644 index 0000000000..18b6ab865e --- /dev/null +++ b/queue-6.14/kasan-avoid-sleepable-page-allocation-from-atomic-context.patch @@ -0,0 +1,194 @@ +From b6ea95a34cbd014ab6ade4248107b86b0aaf2d6c Mon Sep 17 00:00:00 2001 +From: Alexander Gordeev +Date: Thu, 15 May 2025 15:55:38 +0200 +Subject: kasan: avoid sleepable page allocation from atomic context + +From: Alexander Gordeev + +commit b6ea95a34cbd014ab6ade4248107b86b0aaf2d6c upstream. + +apply_to_pte_range() enters the lazy MMU mode and then invokes +kasan_populate_vmalloc_pte() callback on each page table walk iteration. +However, the callback can go into sleep when trying to allocate a single +page, e.g. if an architecutre disables preemption on lazy MMU mode enter. + +On s390 if make arch_enter_lazy_mmu_mode() -> preempt_enable() and +arch_leave_lazy_mmu_mode() -> preempt_disable(), such crash occurs: + +[ 0.663336] BUG: sleeping function called from invalid context at ./include/linux/sched/mm.h:321 +[ 0.663348] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 2, name: kthreadd +[ 0.663358] preempt_count: 1, expected: 0 +[ 0.663366] RCU nest depth: 0, expected: 0 +[ 0.663375] no locks held by kthreadd/2. +[ 0.663383] Preemption disabled at: +[ 0.663386] [<0002f3284cbb4eda>] apply_to_pte_range+0xfa/0x4a0 +[ 0.663405] CPU: 0 UID: 0 PID: 2 Comm: kthreadd Not tainted 6.15.0-rc5-gcc-kasan-00043-gd76bb1ebb558-dirty #162 PREEMPT +[ 0.663408] Hardware name: IBM 3931 A01 701 (KVM/Linux) +[ 0.663409] Call Trace: +[ 0.663410] [<0002f3284c385f58>] dump_stack_lvl+0xe8/0x140 +[ 0.663413] [<0002f3284c507b9e>] __might_resched+0x66e/0x700 +[ 0.663415] [<0002f3284cc4f6c0>] __alloc_frozen_pages_noprof+0x370/0x4b0 +[ 0.663419] [<0002f3284ccc73c0>] alloc_pages_mpol+0x1a0/0x4a0 +[ 0.663421] [<0002f3284ccc8518>] alloc_frozen_pages_noprof+0x88/0xc0 +[ 0.663424] [<0002f3284ccc8572>] alloc_pages_noprof+0x22/0x120 +[ 0.663427] [<0002f3284cc341ac>] get_free_pages_noprof+0x2c/0xc0 +[ 0.663429] [<0002f3284cceba70>] kasan_populate_vmalloc_pte+0x50/0x120 +[ 0.663433] [<0002f3284cbb4ef8>] apply_to_pte_range+0x118/0x4a0 +[ 0.663435] [<0002f3284cbc7c14>] apply_to_pmd_range+0x194/0x3e0 +[ 0.663437] [<0002f3284cbc99be>] __apply_to_page_range+0x2fe/0x7a0 +[ 0.663440] [<0002f3284cbc9e88>] apply_to_page_range+0x28/0x40 +[ 0.663442] [<0002f3284ccebf12>] kasan_populate_vmalloc+0x82/0xa0 +[ 0.663445] [<0002f3284cc1578c>] alloc_vmap_area+0x34c/0xc10 +[ 0.663448] [<0002f3284cc1c2a6>] __get_vm_area_node+0x186/0x2a0 +[ 0.663451] [<0002f3284cc1e696>] __vmalloc_node_range_noprof+0x116/0x310 +[ 0.663454] [<0002f3284cc1d950>] __vmalloc_node_noprof+0xd0/0x110 +[ 0.663457] [<0002f3284c454b88>] alloc_thread_stack_node+0xf8/0x330 +[ 0.663460] [<0002f3284c458d56>] dup_task_struct+0x66/0x4d0 +[ 0.663463] [<0002f3284c45be90>] copy_process+0x280/0x4b90 +[ 0.663465] [<0002f3284c460940>] kernel_clone+0xd0/0x4b0 +[ 0.663467] [<0002f3284c46115e>] kernel_thread+0xbe/0xe0 +[ 0.663469] [<0002f3284c4e440e>] kthreadd+0x50e/0x7f0 +[ 0.663472] [<0002f3284c38c04a>] __ret_from_fork+0x8a/0xf0 +[ 0.663475] [<0002f3284ed57ff2>] ret_from_fork+0xa/0x38 + +Instead of allocating single pages per-PTE, bulk-allocate the shadow +memory prior to applying kasan_populate_vmalloc_pte() callback on a page +range. + +Link: https://lkml.kernel.org/r/c61d3560297c93ed044f0b1af085610353a06a58.1747316918.git.agordeev@linux.ibm.com +Fixes: 3c5c3cfb9ef4 ("kasan: support backing vmalloc space with real shadow memory") +Signed-off-by: Alexander Gordeev +Suggested-by: Andrey Ryabinin +Reviewed-by: Harry Yoo +Cc: Daniel Axtens +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/kasan/shadow.c | 92 +++++++++++++++++++++++++++++++++++++++++++++--------- + 1 file changed, 78 insertions(+), 14 deletions(-) + +--- a/mm/kasan/shadow.c ++++ b/mm/kasan/shadow.c +@@ -292,33 +292,99 @@ void __init __weak kasan_populate_early_ + { + } + ++struct vmalloc_populate_data { ++ unsigned long start; ++ struct page **pages; ++}; ++ + static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr, +- void *unused) ++ void *_data) + { +- unsigned long page; ++ struct vmalloc_populate_data *data = _data; ++ struct page *page; + pte_t pte; ++ int index; + + if (likely(!pte_none(ptep_get(ptep)))) + return 0; + +- page = __get_free_page(GFP_KERNEL); +- if (!page) +- return -ENOMEM; +- +- __memset((void *)page, KASAN_VMALLOC_INVALID, PAGE_SIZE); +- pte = pfn_pte(PFN_DOWN(__pa(page)), PAGE_KERNEL); ++ index = PFN_DOWN(addr - data->start); ++ page = data->pages[index]; ++ __memset(page_to_virt(page), KASAN_VMALLOC_INVALID, PAGE_SIZE); ++ pte = pfn_pte(page_to_pfn(page), PAGE_KERNEL); + + spin_lock(&init_mm.page_table_lock); + if (likely(pte_none(ptep_get(ptep)))) { + set_pte_at(&init_mm, addr, ptep, pte); +- page = 0; ++ data->pages[index] = NULL; + } + spin_unlock(&init_mm.page_table_lock); +- if (page) +- free_page(page); ++ ++ return 0; ++} ++ ++static void ___free_pages_bulk(struct page **pages, int nr_pages) ++{ ++ int i; ++ ++ for (i = 0; i < nr_pages; i++) { ++ if (pages[i]) { ++ __free_pages(pages[i], 0); ++ pages[i] = NULL; ++ } ++ } ++} ++ ++static int ___alloc_pages_bulk(struct page **pages, int nr_pages) ++{ ++ unsigned long nr_populated, nr_total = nr_pages; ++ struct page **page_array = pages; ++ ++ while (nr_pages) { ++ nr_populated = alloc_pages_bulk(GFP_KERNEL, nr_pages, pages); ++ if (!nr_populated) { ++ ___free_pages_bulk(page_array, nr_total - nr_pages); ++ return -ENOMEM; ++ } ++ pages += nr_populated; ++ nr_pages -= nr_populated; ++ } ++ + return 0; + } + ++static int __kasan_populate_vmalloc(unsigned long start, unsigned long end) ++{ ++ unsigned long nr_pages, nr_total = PFN_UP(end - start); ++ struct vmalloc_populate_data data; ++ int ret = 0; ++ ++ data.pages = (struct page **)__get_free_page(GFP_KERNEL | __GFP_ZERO); ++ if (!data.pages) ++ return -ENOMEM; ++ ++ while (nr_total) { ++ nr_pages = min(nr_total, PAGE_SIZE / sizeof(data.pages[0])); ++ ret = ___alloc_pages_bulk(data.pages, nr_pages); ++ if (ret) ++ break; ++ ++ data.start = start; ++ ret = apply_to_page_range(&init_mm, start, nr_pages * PAGE_SIZE, ++ kasan_populate_vmalloc_pte, &data); ++ ___free_pages_bulk(data.pages, nr_pages); ++ if (ret) ++ break; ++ ++ start += nr_pages * PAGE_SIZE; ++ nr_total -= nr_pages; ++ } ++ ++ free_page((unsigned long)data.pages); ++ ++ return ret; ++} ++ + int kasan_populate_vmalloc(unsigned long addr, unsigned long size) + { + unsigned long shadow_start, shadow_end; +@@ -348,9 +414,7 @@ int kasan_populate_vmalloc(unsigned long + shadow_start = PAGE_ALIGN_DOWN(shadow_start); + shadow_end = PAGE_ALIGN(shadow_end); + +- ret = apply_to_page_range(&init_mm, shadow_start, +- shadow_end - shadow_start, +- kasan_populate_vmalloc_pte, NULL); ++ ret = __kasan_populate_vmalloc(shadow_start, shadow_end); + if (ret) + return ret; + diff --git a/queue-6.14/memcg-always-call-cond_resched-after-fn.patch b/queue-6.14/memcg-always-call-cond_resched-after-fn.patch new file mode 100644 index 0000000000..a74c5166b8 --- /dev/null +++ b/queue-6.14/memcg-always-call-cond_resched-after-fn.patch @@ -0,0 +1,80 @@ +From 06717a7b6c86514dbd6ab322e8083ffaa4db5712 Mon Sep 17 00:00:00 2001 +From: Breno Leitao +Date: Fri, 23 May 2025 10:21:06 -0700 +Subject: memcg: always call cond_resched() after fn() + +From: Breno Leitao + +commit 06717a7b6c86514dbd6ab322e8083ffaa4db5712 upstream. + +I am seeing soft lockup on certain machine types when a cgroup OOMs. This +is happening because killing the process in certain machine might be very +slow, which causes the soft lockup and RCU stalls. This happens usually +when the cgroup has MANY processes and memory.oom.group is set. + +Example I am seeing in real production: + + [462012.244552] Memory cgroup out of memory: Killed process 3370438 (crosvm) .... + .... + [462037.318059] Memory cgroup out of memory: Killed process 4171372 (adb) .... + [462037.348314] watchdog: BUG: soft lockup - CPU#64 stuck for 26s! [stat_manager-ag:1618982] + .... + +Quick look at why this is so slow, it seems to be related to serial flush +for certain machine types. For all the crashes I saw, the target CPU was +at console_flush_all(). + +In the case above, there are thousands of processes in the cgroup, and it +is soft locking up before it reaches the 1024 limit in the code (which +would call the cond_resched()). So, cond_resched() in 1024 blocks is not +sufficient. + +Remove the counter-based conditional rescheduling logic and call +cond_resched() unconditionally after each task iteration, after fn() is +called. This avoids the lockup independently of how slow fn() is. + +Link: https://lkml.kernel.org/r/20250523-memcg_fix-v1-1-ad3eafb60477@debian.org +Fixes: ade81479c7dd ("memcg: fix soft lockup in the OOM process") +Signed-off-by: Breno Leitao +Suggested-by: Rik van Riel +Acked-by: Shakeel Butt +Cc: Michael van der Westhuizen +Cc: Usama Arif +Cc: Pavel Begunkov +Cc: Chen Ridong +Cc: Greg Kroah-Hartman +Cc: Johannes Weiner +Cc: Michal Hocko +Cc: Michal Hocko +Cc: Muchun Song +Cc: Roman Gushchin +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memcontrol.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -1161,7 +1161,6 @@ void mem_cgroup_scan_tasks(struct mem_cg + { + struct mem_cgroup *iter; + int ret = 0; +- int i = 0; + + BUG_ON(mem_cgroup_is_root(memcg)); + +@@ -1171,10 +1170,9 @@ void mem_cgroup_scan_tasks(struct mem_cg + + css_task_iter_start(&iter->css, CSS_TASK_ITER_PROCS, &it); + while (!ret && (task = css_task_iter_next(&it))) { +- /* Avoid potential softlockup warning */ +- if ((++i & 1023) == 0) +- cond_resched(); + ret = fn(task, arg); ++ /* Avoid potential softlockup warning */ ++ cond_resched(); + } + css_task_iter_end(&it); + if (ret) { diff --git a/queue-6.14/mm-fix-vm_uffd_minor-vm_shadow_stack-on-userfaultfd-y-arm64_gcs-y.patch b/queue-6.14/mm-fix-vm_uffd_minor-vm_shadow_stack-on-userfaultfd-y-arm64_gcs-y.patch new file mode 100644 index 0000000000..3f24acbcf0 --- /dev/null +++ b/queue-6.14/mm-fix-vm_uffd_minor-vm_shadow_stack-on-userfaultfd-y-arm64_gcs-y.patch @@ -0,0 +1,52 @@ +From 0f518255bde881d2a2605bbc080b438b532b6ab2 Mon Sep 17 00:00:00 2001 +From: Florent Revest +Date: Wed, 7 May 2025 15:09:57 +0200 +Subject: mm: fix VM_UFFD_MINOR == VM_SHADOW_STACK on USERFAULTFD=y && ARM64_GCS=y + +From: Florent Revest + +commit 0f518255bde881d2a2605bbc080b438b532b6ab2 upstream. + +On configs with CONFIG_ARM64_GCS=y, VM_SHADOW_STACK is bit 38. On configs +with CONFIG_HAVE_ARCH_USERFAULTFD_MINOR=y (selected by CONFIG_ARM64 when +CONFIG_USERFAULTFD=y), VM_UFFD_MINOR is _also_ bit 38. + +This bit being shared by two different VMA flags could lead to all sorts +of unintended behaviors. Presumably, a process could maybe call into +userfaultfd in a way that disables the shadow stack vma flag. I can't +think of any attack where this would help (presumably, if an attacker +tries to disable shadow stacks, they are trying to hijack control flow so +can't arbitrarily call into userfaultfd yet anyway) but this still feels +somewhat scary. + +Link: https://lkml.kernel.org/r/20250507131000.1204175-2-revest@chromium.org +Fixes: ae80e1629aea ("mm: Define VM_SHADOW_STACK for arm64 when we support GCS") +Signed-off-by: Florent Revest +Reviewed-by: Mark Brown +Cc: Borislav Betkov +Cc: Brendan Jackman +Cc: Catalin Marinas +Cc: Florent Revest +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Thiago Jung Bauermann +Cc: Thomas Gleinxer +Cc: Will Deacon +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -411,7 +411,7 @@ extern unsigned int kobjsize(const void + #endif + + #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR +-# define VM_UFFD_MINOR_BIT 38 ++# define VM_UFFD_MINOR_BIT 41 + # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ + #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ + # define VM_UFFD_MINOR VM_NONE diff --git a/queue-6.14/mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios.patch b/queue-6.14/mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios.patch new file mode 100644 index 0000000000..678420e97b --- /dev/null +++ b/queue-6.14/mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios.patch @@ -0,0 +1,107 @@ +From 113ed54ad276c352ee5ce109bdcf0df118a43bda Mon Sep 17 00:00:00 2001 +From: Ge Yang +Date: Thu, 22 May 2025 11:22:17 +0800 +Subject: mm/hugetlb: fix kernel NULL pointer dereference when replacing free hugetlb folios + +From: Ge Yang + +commit 113ed54ad276c352ee5ce109bdcf0df118a43bda upstream. + +A kernel crash was observed when replacing free hugetlb folios: + +BUG: kernel NULL pointer dereference, address: 0000000000000028 +PGD 0 P4D 0 +Oops: Oops: 0000 [#1] SMP NOPTI +CPU: 28 UID: 0 PID: 29639 Comm: test_cma.sh Tainted 6.15.0-rc6-zp #41 PREEMPT(voluntary) +RIP: 0010:alloc_and_dissolve_hugetlb_folio+0x1d/0x1f0 +RSP: 0018:ffffc9000b30fa90 EFLAGS: 00010286 +RAX: 0000000000000000 RBX: 0000000000342cca RCX: ffffea0043000000 +RDX: ffffc9000b30fb08 RSI: ffffea0043000000 RDI: 0000000000000000 +RBP: ffffc9000b30fb20 R08: 0000000000001000 R09: 0000000000000000 +R10: ffff88886f92eb00 R11: 0000000000000000 R12: ffffea0043000000 +R13: 0000000000000000 R14: 00000000010c0200 R15: 0000000000000004 +FS: 00007fcda5f14740(0000) GS:ffff8888ec1d8000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000000000028 CR3: 0000000391402000 CR4: 0000000000350ef0 +Call Trace: + + replace_free_hugepage_folios+0xb6/0x100 + alloc_contig_range_noprof+0x18a/0x590 + ? srso_return_thunk+0x5/0x5f + ? down_read+0x12/0xa0 + ? srso_return_thunk+0x5/0x5f + cma_range_alloc.constprop.0+0x131/0x290 + __cma_alloc+0xcf/0x2c0 + cma_alloc_write+0x43/0xb0 + simple_attr_write_xsigned.constprop.0.isra.0+0xb2/0x110 + debugfs_attr_write+0x46/0x70 + full_proxy_write+0x62/0xa0 + vfs_write+0xf8/0x420 + ? srso_return_thunk+0x5/0x5f + ? filp_flush+0x86/0xa0 + ? srso_return_thunk+0x5/0x5f + ? filp_close+0x1f/0x30 + ? srso_return_thunk+0x5/0x5f + ? do_dup2+0xaf/0x160 + ? srso_return_thunk+0x5/0x5f + ksys_write+0x65/0xe0 + do_syscall_64+0x64/0x170 + entry_SYSCALL_64_after_hwframe+0x76/0x7e + +There is a potential race between __update_and_free_hugetlb_folio() and +replace_free_hugepage_folios(): + +CPU1 CPU2 +__update_and_free_hugetlb_folio replace_free_hugepage_folios + folio_test_hugetlb(folio) + -- It's still hugetlb folio. + + __folio_clear_hugetlb(folio) + hugetlb_free_folio(folio) + h = folio_hstate(folio) + -- Here, h is NULL pointer + +When the above race condition occurs, folio_hstate(folio) returns NULL, +and subsequent access to this NULL pointer will cause the system to crash. +To resolve this issue, execute folio_hstate(folio) under the protection +of the hugetlb_lock lock, ensuring that folio_hstate(folio) does not +return NULL. + +Link: https://lkml.kernel.org/r/1747884137-26685-1-git-send-email-yangge1116@126.com +Fixes: 04f13d241b8b ("mm: replace free hugepage folios after migration") +Signed-off-by: Ge Yang +Reviewed-by: Muchun Song +Reviewed-by: Oscar Salvador +Cc: Baolin Wang +Cc: Barry Song <21cnbao@gmail.com> +Cc: David Hildenbrand +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -2926,12 +2926,20 @@ int replace_free_hugepage_folios(unsigne + + while (start_pfn < end_pfn) { + folio = pfn_folio(start_pfn); ++ ++ /* ++ * The folio might have been dissolved from under our feet, so make sure ++ * to carefully check the state under the lock. ++ */ ++ spin_lock_irq(&hugetlb_lock); + if (folio_test_hugetlb(folio)) { + h = folio_hstate(folio); + } else { ++ spin_unlock_irq(&hugetlb_lock); + start_pfn++; + continue; + } ++ spin_unlock_irq(&hugetlb_lock); + + if (!folio_ref_count(folio)) { + ret = alloc_and_dissolve_hugetlb_folio(h, folio, diff --git a/queue-6.14/mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch b/queue-6.14/mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch new file mode 100644 index 0000000000..d91436a478 --- /dev/null +++ b/queue-6.14/mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch @@ -0,0 +1,47 @@ +From 7190b3c8bd2b0cde483bd440cf91ba1c518b4261 Mon Sep 17 00:00:00 2001 +From: Ignacio Moreno Gonzalez +Date: Wed, 7 May 2025 15:28:06 +0200 +Subject: mm: mmap: map MAP_STACK to VM_NOHUGEPAGE only if THP is enabled + +From: Ignacio Moreno Gonzalez + +commit 7190b3c8bd2b0cde483bd440cf91ba1c518b4261 upstream. + +commit c4608d1bf7c6 ("mm: mmap: map MAP_STACK to VM_NOHUGEPAGE") maps the +mmap option MAP_STACK to VM_NOHUGEPAGE. This is also done if +CONFIG_TRANSPARENT_HUGEPAGE is not defined. But in that case, the +VM_NOHUGEPAGE does not make sense. + +I discovered this issue when trying to use the tool CRIU to checkpoint and +restore a container. Our running kernel is compiled without +CONFIG_TRANSPARENT_HUGEPAGE. CRIU parses the output of /proc//smaps +and saves the "nh" flag. When trying to restore the container, CRIU fails +to restore the "nh" mappings, since madvise() MADV_NOHUGEPAGE always +returns an error because CONFIG_TRANSPARENT_HUGEPAGE is not defined. + +Link: https://lkml.kernel.org/r/20250507-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled-v5-1-c6c38cfefd6e@kuka.com +Fixes: c4608d1bf7c6 ("mm: mmap: map MAP_STACK to VM_NOHUGEPAGE") +Signed-off-by: Ignacio Moreno Gonzalez +Acked-by: David Hildenbrand +Reviewed-by: Lorenzo Stoakes +Reviewed-by: Yang Shi +Reviewed-by: Liam R. Howlett +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mman.h | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/include/linux/mman.h ++++ b/include/linux/mman.h +@@ -157,7 +157,9 @@ calc_vm_flag_bits(struct file *file, uns + return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | + _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | + _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | ++#ifdef CONFIG_TRANSPARENT_HUGEPAGE + _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | ++#endif + arch_calc_vm_flag_bits(file, flags); + } + diff --git a/queue-6.14/mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch b/queue-6.14/mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch new file mode 100644 index 0000000000..2993b7d1c8 --- /dev/null +++ b/queue-6.14/mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch @@ -0,0 +1,79 @@ +From e05741fb10c38d70bbd7ec12b23c197b6355d519 Mon Sep 17 00:00:00 2001 +From: Tianyang Zhang +Date: Wed, 16 Apr 2025 16:24:05 +0800 +Subject: mm/page_alloc.c: avoid infinite retries caused by cpuset race + +From: Tianyang Zhang + +commit e05741fb10c38d70bbd7ec12b23c197b6355d519 upstream. + +__alloc_pages_slowpath has no change detection for ac->nodemask in the +part of retry path, while cpuset can modify it in parallel. For some +processes that set mempolicy as MPOL_BIND, this results ac->nodemask +changes, and then the should_reclaim_retry will judge based on the latest +nodemask and jump to retry, while the get_page_from_freelist only +traverses the zonelist from ac->preferred_zoneref, which selected by a +expired nodemask and may cause infinite retries in some cases + +cpu 64: +__alloc_pages_slowpath { + /* ..... */ +retry: + /* ac->nodemask = 0x1, ac->preferred->zone->nid = 1 */ + if (alloc_flags & ALLOC_KSWAPD) + wake_all_kswapds(order, gfp_mask, ac); + /* cpu 1: + cpuset_write_resmask + update_nodemask + update_nodemasks_hier + update_tasks_nodemask + mpol_rebind_task + mpol_rebind_policy + mpol_rebind_nodemask + // mempolicy->nodes has been modified, + // which ac->nodemask point to + + */ + /* ac->nodemask = 0x3, ac->preferred->zone->nid = 1 */ + if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, + did_some_progress > 0, &no_progress_loops)) + goto retry; +} + +Simultaneously starting multiple cpuset01 from LTP can quickly reproduce +this issue on a multi node server when the maximum memory pressure is +reached and the swap is enabled + +Link: https://lkml.kernel.org/r/20250416082405.20988-1-zhangtianyang@loongson.cn +Fixes: c33d6c06f60f ("mm, page_alloc: avoid looking up the first zone in a zonelist twice") +Signed-off-by: Tianyang Zhang +Reviewed-by: Suren Baghdasaryan +Reviewed-by: Vlastimil Babka +Cc: Michal Hocko +Cc: Brendan Jackman +Cc: Johannes Weiner +Cc: Zi Yan +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_alloc.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -4381,6 +4381,14 @@ restart: + } + + retry: ++ /* ++ * Deal with possible cpuset update races or zonelist updates to avoid ++ * infinite retries. ++ */ ++ if (check_retry_cpuset(cpuset_mems_cookie, ac) || ++ check_retry_zonelist(zonelist_iter_cookie)) ++ goto restart; ++ + /* Ensure kswapd doesn't accidentally go to sleep as long as we loop */ + if (alloc_flags & ALLOC_KSWAPD) + wake_all_kswapds(order, gfp_mask, ac); diff --git a/queue-6.14/mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch b/queue-6.14/mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch new file mode 100644 index 0000000000..6246796f94 --- /dev/null +++ b/queue-6.14/mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch @@ -0,0 +1,45 @@ +From f7a35a3c36d1e36059c5654737d9bee3454f01a3 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Thu, 15 May 2025 14:42:15 -0700 +Subject: mm: vmalloc: actually use the in-place vrealloc region + +From: Kees Cook + +commit f7a35a3c36d1e36059c5654737d9bee3454f01a3 upstream. + +Patch series "mm: vmalloc: Actually use the in-place vrealloc region". + +This fixes a performance regression[1] with vrealloc()[1]. + + +The refactoring to not build a new vmalloc region only actually worked +when shrinking. Actually return the resized area when it grows. Ugh. + +Link: https://lkml.kernel.org/r/20250515214217.619685-1-kees@kernel.org +Fixes: a0309faf1cb0 ("mm: vmalloc: support more granular vrealloc() sizing") +Signed-off-by: Kees Cook +Reported-by: Shung-Hsi Yu +Closes: https://lore.kernel.org/all/20250515-bpf-verifier-slowdown-vwo2meju4cgp2su5ckj@6gi6ssxbnfqg [1] +Tested-by: Eduard Zingerman +Tested-by: Pawan Gupta +Tested-by: Shung-Hsi Yu +Reviewed-by: "Uladzislau Rezki (Sony)" +Reviewed-by: Danilo Krummrich +Cc: "Erhard F." +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmalloc.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -4115,6 +4115,7 @@ void *vrealloc_noprof(const void *p, siz + if (want_init_on_alloc(flags)) + memset((void *)p + old_size, 0, size - old_size); + vm->requested_size = size; ++ return (void *)p; + } + + /* TODO: Grow the vm_area, i.e. allocate and map additional pages. */ diff --git a/queue-6.14/mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch b/queue-6.14/mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch new file mode 100644 index 0000000000..a70d172ebf --- /dev/null +++ b/queue-6.14/mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch @@ -0,0 +1,57 @@ +From 70d1eb031a68cbde4eed8099674be21778441c94 Mon Sep 17 00:00:00 2001 +From: Kees Cook +Date: Thu, 15 May 2025 14:42:16 -0700 +Subject: mm: vmalloc: only zero-init on vrealloc shrink + +From: Kees Cook + +commit 70d1eb031a68cbde4eed8099674be21778441c94 upstream. + +The common case is to grow reallocations, and since init_on_alloc will +have already zeroed the whole allocation, we only need to zero when +shrinking the allocation. + +Link: https://lkml.kernel.org/r/20250515214217.619685-2-kees@kernel.org +Fixes: a0309faf1cb0 ("mm: vmalloc: support more granular vrealloc() sizing") +Signed-off-by: Kees Cook +Tested-by: Pawan Gupta +Cc: Danilo Krummrich +Cc: Eduard Zingerman +Cc: "Erhard F." +Cc: Shung-Hsi Yu +Cc: "Uladzislau Rezki (Sony)" +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/vmalloc.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/mm/vmalloc.c ++++ b/mm/vmalloc.c +@@ -4097,8 +4097,8 @@ void *vrealloc_noprof(const void *p, siz + * would be a good heuristic for when to shrink the vm_area? + */ + if (size <= old_size) { +- /* Zero out "freed" memory. */ +- if (want_init_on_free()) ++ /* Zero out "freed" memory, potentially for future realloc. */ ++ if (want_init_on_free() || want_init_on_alloc(flags)) + memset((void *)p + size, 0, old_size - size); + vm->requested_size = size; + kasan_poison_vmalloc(p + size, old_size - size); +@@ -4111,9 +4111,11 @@ void *vrealloc_noprof(const void *p, siz + if (size <= alloced_size) { + kasan_unpoison_vmalloc(p + old_size, size - old_size, + KASAN_VMALLOC_PROT_NORMAL); +- /* Zero out "alloced" memory. */ +- if (want_init_on_alloc(flags)) +- memset((void *)p + old_size, 0, size - old_size); ++ /* ++ * No need to zero memory here, as unused memory will have ++ * already been zeroed at initial allocation time or during ++ * realloc shrink time. ++ */ + vm->requested_size = size; + return (void *)p; + } diff --git a/queue-6.14/module-release-codetag-section-when-module-load-fails.patch b/queue-6.14/module-release-codetag-section-when-module-load-fails.patch new file mode 100644 index 0000000000..e895effcc4 --- /dev/null +++ b/queue-6.14/module-release-codetag-section-when-module-load-fails.patch @@ -0,0 +1,38 @@ +From 221fcbf77578826fad8f4bfa0530b5b55bf9676a Mon Sep 17 00:00:00 2001 +From: David Wang <00107082@163.com> +Date: Tue, 20 May 2025 00:38:23 +0800 +Subject: module: release codetag section when module load fails + +From: David Wang <00107082@163.com> + +commit 221fcbf77578826fad8f4bfa0530b5b55bf9676a upstream. + +When module load fails after memory for codetag section is ready, codetag +section memory will not be properly released. This causes memory leak, +and if next module load happens to get the same module address, codetag +may pick the uninitialized section when manipulating tags during module +unload, and leads to "unable to handle page fault" BUG. + +Link: https://lkml.kernel.org/r/20250519163823.7540-1-00107082@163.com +Fixes: 0db6f8d7820a ("alloc_tag: load module tags into separate contiguous memory") +Closes: https://lore.kernel.org/all/20250516131246.6244-1-00107082@163.com/ +Signed-off-by: David Wang <00107082@163.com> +Acked-by: Suren Baghdasaryan +Cc: Petr Pavlu +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + kernel/module/main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/kernel/module/main.c ++++ b/kernel/module/main.c +@@ -2852,6 +2852,7 @@ static void module_deallocate(struct mod + { + percpu_modfree(mod); + module_arch_freeing_init(mod); ++ codetag_free_module_sections(mod); + + free_mod_mem(mod); + } diff --git a/queue-6.14/nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch b/queue-6.14/nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch new file mode 100644 index 0000000000..3cbe069194 --- /dev/null +++ b/queue-6.14/nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch @@ -0,0 +1,67 @@ +From fb881cd7604536b17a1927fb0533f9a6982ffcc5 Mon Sep 17 00:00:00 2001 +From: Ryusuke Konishi +Date: Sat, 3 May 2025 14:33:14 +0900 +Subject: nilfs2: fix deadlock warnings caused by lock dependency in init_nilfs() + +From: Ryusuke Konishi + +commit fb881cd7604536b17a1927fb0533f9a6982ffcc5 upstream. + +After commit c0e473a0d226 ("block: fix race between set_blocksize and read +paths") was merged, set_blocksize() called by sb_set_blocksize() now locks +the inode of the backing device file. As a result of this change, syzbot +started reporting deadlock warnings due to a circular dependency involving +the semaphore "ns_sem" of the nilfs object, the inode lock of the backing +device file, and the locks that this inode lock is transitively dependent +on. + +This is caused by a new lock dependency added by the above change, since +init_nilfs() calls sb_set_blocksize() in the lock section of "ns_sem". +However, these warnings are false positives because init_nilfs() is called +in the early stage of the mount operation and the filesystem has not yet +started. + +The reason why "ns_sem" is locked in init_nilfs() was to avoid a race +condition in nilfs_fill_super() caused by sharing a nilfs object among +multiple filesystem instances (super block structures) in the early +implementation. However, nilfs objects and super block structures have +long ago become one-to-one, and there is no longer any need to use the +semaphore there. + +So, fix this issue by removing the use of the semaphore "ns_sem" in +init_nilfs(). + +Link: https://lkml.kernel.org/r/20250503053327.12294-1-konishi.ryusuke@gmail.com +Fixes: c0e473a0d226 ("block: fix race between set_blocksize and read paths") +Signed-off-by: Ryusuke Konishi +Reported-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=00f7f5b884b117ee6773 +Tested-by: syzbot+00f7f5b884b117ee6773@syzkaller.appspotmail.com +Reported-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=f30591e72bfc24d4715b +Tested-by: syzbot+f30591e72bfc24d4715b@syzkaller.appspotmail.com> +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/nilfs2/the_nilfs.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/fs/nilfs2/the_nilfs.c ++++ b/fs/nilfs2/the_nilfs.c +@@ -705,8 +705,6 @@ int init_nilfs(struct the_nilfs *nilfs, + int blocksize; + int err; + +- down_write(&nilfs->ns_sem); +- + blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); + if (!blocksize) { + nilfs_err(sb, "unable to set blocksize"); +@@ -779,7 +777,6 @@ int init_nilfs(struct the_nilfs *nilfs, + set_nilfs_init(nilfs); + err = 0; + out: +- up_write(&nilfs->ns_sem); + return err; + + failed_sbh: diff --git a/queue-6.14/octeontx2-hide-unused-label.patch b/queue-6.14/octeontx2-hide-unused-label.patch new file mode 100644 index 0000000000..52877445e7 --- /dev/null +++ b/queue-6.14/octeontx2-hide-unused-label.patch @@ -0,0 +1,56 @@ +From ca57d1c56f4015d83fe7840b41d74783ee900b28 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Wed, 19 Feb 2025 17:21:14 +0100 +Subject: octeontx2: hide unused label + +From: Arnd Bergmann + +commit ca57d1c56f4015d83fe7840b41d74783ee900b28 upstream. + +A previous patch introduces a build-time warning when CONFIG_DCB +is disabled: + +drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c: In function 'otx2_probe': +drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c:3217:1: error: label 'err_free_zc_bmap' defined but not used [-Werror=unused-label] +drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c: In function 'otx2vf_probe': +drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c:740:1: error: label 'err_free_zc_bmap' defined but not used [-Werror=unused-label] + +Add the same #ifdef check around it. + +Fixes: efabce290151 ("octeontx2-pf: AF_XDP zero copy receive support") +Signed-off-by: Arnd Bergmann +Reviewed-by: Suman Ghosh +Link: https://patch.msgid.link/20250219162239.1376865-1-arnd@kernel.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 2 ++ + drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c | 2 ++ + 2 files changed, 4 insertions(+) + +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +@@ -3214,8 +3214,10 @@ static int otx2_probe(struct pci_dev *pd + + return 0; + ++#ifdef CONFIG_DCB + err_free_zc_bmap: + bitmap_free(pf->af_xdp_zc_qidx); ++#endif + err_sriov_cleannup: + otx2_sriov_vfcfg_cleanup(pf); + err_pf_sriov_init: +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +@@ -740,8 +740,10 @@ static int otx2vf_probe(struct pci_dev * + + return 0; + ++#ifdef CONFIG_DCB + err_free_zc_bmap: + bitmap_free(vf->af_xdp_zc_qidx); ++#endif + err_unreg_devlink: + otx2_unregister_dl(vf); + err_shutdown_tc: diff --git a/queue-6.14/series b/queue-6.14/series index d42d37220a..6880e7c5b5 100644 --- a/queue-6.14/series +++ b/queue-6.14/series @@ -754,3 +754,20 @@ smb-client-reset-all-search-buffer-pointers-when-releasing-buffer.patch revert-drm-amd-keep-display-off-while-going-into-s4.patch input-xpad-add-more-controllers.patch input-synaptics-rmi-fix-crash-with-unsupported-versions-of-f34.patch +alloc_tag-allocate-percpu-counters-for-module-tags-dynamically.patch +highmem-add-folio_test_partial_kmap.patch +kasan-avoid-sleepable-page-allocation-from-atomic-context.patch +memcg-always-call-cond_resched-after-fn.patch +mm-hugetlb-fix-kernel-null-pointer-dereference-when-replacing-free-hugetlb-folios.patch +mm-page_alloc.c-avoid-infinite-retries-caused-by-cpuset-race.patch +module-release-codetag-section-when-module-load-fails.patch +taskstats-fix-struct-taskstats-breaks-backward-compatibility-since-version-15.patch +mm-mmap-map-map_stack-to-vm_nohugepage-only-if-thp-is-enabled.patch +mm-fix-vm_uffd_minor-vm_shadow_stack-on-userfaultfd-y-arm64_gcs-y.patch +mm-vmalloc-actually-use-the-in-place-vrealloc-region.patch +mm-vmalloc-only-zero-init-on-vrealloc-shrink.patch +octeontx2-hide-unused-label.patch +wifi-mac80211-restore-monitor-for-outgoing-frames.patch +nilfs2-fix-deadlock-warnings-caused-by-lock-dependency-in-init_nilfs.patch +bluetooth-btmtksdio-check-function-enabled-before-doing-close.patch +bluetooth-btmtksdio-do-close-if-sdio-card-removed-without-close.patch diff --git a/queue-6.14/taskstats-fix-struct-taskstats-breaks-backward-compatibility-since-version-15.patch b/queue-6.14/taskstats-fix-struct-taskstats-breaks-backward-compatibility-since-version-15.patch new file mode 100644 index 0000000000..595cc6ce2e --- /dev/null +++ b/queue-6.14/taskstats-fix-struct-taskstats-breaks-backward-compatibility-since-version-15.patch @@ -0,0 +1,152 @@ +From 0bf2d838de1ffb6d0bb6f8d18a6ccc59b7d9a705 Mon Sep 17 00:00:00 2001 +From: Wang Yaxin +Date: Sat, 10 May 2025 15:54:13 +0800 +Subject: taskstats: fix struct taskstats breaks backward compatibility since version 15 + +From: Wang Yaxin + +commit 0bf2d838de1ffb6d0bb6f8d18a6ccc59b7d9a705 upstream. + +Problem +======== +commit 658eb5ab916d ("delayacct: add delay max to record delay peak") + - adding more fields +commit f65c64f311ee ("delayacct: add delay min to record delay peak") + - adding more fields +commit b016d0873777 ("taskstats: modify taskstats version") + - version bump to 15 + +Since version 15 (TASKSTATS_VERSION=15) the new layout of the structure +adds fields in the middle of the structure, rendering all old software +incompatible with newer kernels and software compiled against the new +kernel headers incompatible with older kernels. + +Solution +========= +move delay max and delay min to the end of taskstat, and bump +the version to 16 after the change + +[wang.yaxin@zte.com.cn: adjust indentation] + Link: https://lkml.kernel.org/r/202505192131489882NSciXV4EGd8zzjLuwoOK@zte.com.cn +Link: https://lkml.kernel.org/r/20250510155413259V4JNRXxukdDgzsaL0Fo6a@zte.com.cn +Fixes: f65c64f311ee ("delayacct: add delay min to record delay peak") +Signed-off-by: Wang Yaxin +Signed-off-by: xu xin +Signed-off-by: Kun Jiang +Reviewed-by: Yang Yang +Cc: Balbir Singh +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/taskstats.h | 47 +++++++++++++++++++++------------- + 1 file changed, 29 insertions(+), 18 deletions(-) + +diff --git a/include/uapi/linux/taskstats.h b/include/uapi/linux/taskstats.h +index 95762232e018..5929030d4e8b 100644 +--- a/include/uapi/linux/taskstats.h ++++ b/include/uapi/linux/taskstats.h +@@ -34,7 +34,7 @@ + */ + + +-#define TASKSTATS_VERSION 15 ++#define TASKSTATS_VERSION 16 + #define TS_COMM_LEN 32 /* should be >= TASK_COMM_LEN + * in linux/sched.h */ + +@@ -72,8 +72,6 @@ struct taskstats { + */ + __u64 cpu_count __attribute__((aligned(8))); + __u64 cpu_delay_total; +- __u64 cpu_delay_max; +- __u64 cpu_delay_min; + + /* Following four fields atomically updated using task->delays->lock */ + +@@ -82,14 +80,10 @@ struct taskstats { + */ + __u64 blkio_count; + __u64 blkio_delay_total; +- __u64 blkio_delay_max; +- __u64 blkio_delay_min; + + /* Delay waiting for page fault I/O (swap in only) */ + __u64 swapin_count; + __u64 swapin_delay_total; +- __u64 swapin_delay_max; +- __u64 swapin_delay_min; + + /* cpu "wall-clock" running time + * On some architectures, value will adjust for cpu time stolen +@@ -172,14 +166,11 @@ struct taskstats { + /* Delay waiting for memory reclaim */ + __u64 freepages_count; + __u64 freepages_delay_total; +- __u64 freepages_delay_max; +- __u64 freepages_delay_min; ++ + + /* Delay waiting for thrashing page */ + __u64 thrashing_count; + __u64 thrashing_delay_total; +- __u64 thrashing_delay_max; +- __u64 thrashing_delay_min; + + /* v10: 64-bit btime to avoid overflow */ + __u64 ac_btime64; /* 64-bit begin time */ +@@ -187,8 +178,6 @@ struct taskstats { + /* v11: Delay waiting for memory compact */ + __u64 compact_count; + __u64 compact_delay_total; +- __u64 compact_delay_max; +- __u64 compact_delay_min; + + /* v12 begin */ + __u32 ac_tgid; /* thread group ID */ +@@ -210,15 +199,37 @@ struct taskstats { + /* v13: Delay waiting for write-protect copy */ + __u64 wpcopy_count; + __u64 wpcopy_delay_total; +- __u64 wpcopy_delay_max; +- __u64 wpcopy_delay_min; + + /* v14: Delay waiting for IRQ/SOFTIRQ */ + __u64 irq_count; + __u64 irq_delay_total; +- __u64 irq_delay_max; +- __u64 irq_delay_min; +- /* v15: add Delay max */ ++ ++ /* v15: add Delay max and Delay min */ ++ ++ /* v16: move Delay max and Delay min to the end of taskstat */ ++ __u64 cpu_delay_max; ++ __u64 cpu_delay_min; ++ ++ __u64 blkio_delay_max; ++ __u64 blkio_delay_min; ++ ++ __u64 swapin_delay_max; ++ __u64 swapin_delay_min; ++ ++ __u64 freepages_delay_max; ++ __u64 freepages_delay_min; ++ ++ __u64 thrashing_delay_max; ++ __u64 thrashing_delay_min; ++ ++ __u64 compact_delay_max; ++ __u64 compact_delay_min; ++ ++ __u64 wpcopy_delay_max; ++ __u64 wpcopy_delay_min; ++ ++ __u64 irq_delay_max; ++ __u64 irq_delay_min; + }; + + +-- +2.49.0 + diff --git a/queue-6.14/wifi-mac80211-restore-monitor-for-outgoing-frames.patch b/queue-6.14/wifi-mac80211-restore-monitor-for-outgoing-frames.patch new file mode 100644 index 0000000000..5273b9415d --- /dev/null +++ b/queue-6.14/wifi-mac80211-restore-monitor-for-outgoing-frames.patch @@ -0,0 +1,38 @@ +From abf078c0a322159f5ebe2adaa0cd69dc45b1e710 Mon Sep 17 00:00:00 2001 +From: Johannes Berg +Date: Tue, 22 Apr 2025 21:32:51 +0200 +Subject: wifi: mac80211: restore monitor for outgoing frames + +From: Johannes Berg + +commit abf078c0a322159f5ebe2adaa0cd69dc45b1e710 upstream. + +This code was accidentally dropped during the cooked +monitor removal, but really should've been simplified +instead. Add the simple version back. + +Fixes: 286e69677065 ("wifi: mac80211: Drop cooked monitor support") +Link: https://patch.msgid.link/20250422213251.b3d65fd0f323.Id2a6901583f7af86bbe94deb355968b238f350c6@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Greg Kroah-Hartman +--- + net/mac80211/status.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +--- a/net/mac80211/status.c ++++ b/net/mac80211/status.c +@@ -1085,7 +1085,13 @@ static void __ieee80211_tx_status(struct + + ieee80211_report_used_skb(local, skb, false, status->ack_hwtstamp); + +- if (status->free_list) ++ /* ++ * This is a bit racy but we can avoid a lot of work ++ * with this test... ++ */ ++ if (local->tx_mntrs) ++ ieee80211_tx_monitor(local, skb, retry_count, status); ++ else if (status->free_list) + list_add_tail(&skb->list, status->free_list); + else + dev_kfree_skb(skb); -- 2.47.2