]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 20 May 2025 10:57:11 +0000 (12:57 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 20 May 2025 10:57:11 +0000 (12:57 +0200)
added patches:
bpf-arm64-fix-address-emission-with-tag-based-kasan-enabled.patch
bpf-arm64-fix-trampoline-for-bpf_tramp_f_call_orig.patch
btrfs-don-t-bug_on-when-0-reference-count-at-btrfs_lookup_extent_info.patch
hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch
loongarch-explicitly-specify-code-model-in-makefile.patch
memblock-accept-allocated-memory-before-use-in-memblock_double_array.patch
mm-migrate-correct-nr_failed-in-migrate_pages_sync.patch
sctp-add-mutual-exclusion-in-proc_sctp_do_udp_port.patch
selftests-mm-compaction_test-support-platform-with-huge-mount-of-memory.patch

queue-6.6/bpf-arm64-fix-address-emission-with-tag-based-kasan-enabled.patch [new file with mode: 0644]
queue-6.6/bpf-arm64-fix-trampoline-for-bpf_tramp_f_call_orig.patch [new file with mode: 0644]
queue-6.6/btrfs-don-t-bug_on-when-0-reference-count-at-btrfs_lookup_extent_info.patch [new file with mode: 0644]
queue-6.6/hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch [new file with mode: 0644]
queue-6.6/loongarch-explicitly-specify-code-model-in-makefile.patch [new file with mode: 0644]
queue-6.6/memblock-accept-allocated-memory-before-use-in-memblock_double_array.patch [new file with mode: 0644]
queue-6.6/mm-migrate-correct-nr_failed-in-migrate_pages_sync.patch [new file with mode: 0644]
queue-6.6/sctp-add-mutual-exclusion-in-proc_sctp_do_udp_port.patch [new file with mode: 0644]
queue-6.6/selftests-mm-compaction_test-support-platform-with-huge-mount-of-memory.patch [new file with mode: 0644]
queue-6.6/series

diff --git a/queue-6.6/bpf-arm64-fix-address-emission-with-tag-based-kasan-enabled.patch b/queue-6.6/bpf-arm64-fix-address-emission-with-tag-based-kasan-enabled.patch
new file mode 100644 (file)
index 0000000..b7399a4
--- /dev/null
@@ -0,0 +1,62 @@
+From a552e2ef5fd1a6c78267cd4ec5a9b49aa11bbb1c Mon Sep 17 00:00:00 2001
+From: Peter Collingbourne <pcc@google.com>
+Date: Fri, 18 Oct 2024 15:16:43 -0700
+Subject: bpf, arm64: Fix address emission with tag-based KASAN enabled
+
+From: Peter Collingbourne <pcc@google.com>
+
+commit a552e2ef5fd1a6c78267cd4ec5a9b49aa11bbb1c upstream.
+
+When BPF_TRAMP_F_CALL_ORIG is enabled, the address of a bpf_tramp_image
+struct on the stack is passed during the size calculation pass and
+an address on the heap is passed during code generation. This may
+cause a heap buffer overflow if the heap address is tagged because
+emit_a64_mov_i64() will emit longer code than it did during the size
+calculation pass. The same problem could occur without tag-based
+KASAN if one of the 16-bit words of the stack address happened to
+be all-ones during the size calculation pass. Fix the problem by
+assuming the worst case (4 instructions) when calculating the size
+of the bpf_tramp_image address emission.
+
+Fixes: 19d3c179a377 ("bpf, arm64: Fix trampoline for BPF_TRAMP_F_CALL_ORIG")
+Signed-off-by: Peter Collingbourne <pcc@google.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Xu Kuohai <xukuohai@huawei.com>
+Link: https://linux-review.googlesource.com/id/I1496f2bc24fba7a1d492e16e2b94cf43714f2d3c
+Link: https://lore.kernel.org/bpf/20241018221644.3240898-1-pcc@google.com
+[Minor context change fixed.]
+Signed-off-by: Bin Lan <bin.lan.cn@windriver.com>
+Signed-off-by: He Zhe <zhe.he@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/net/bpf_jit_comp.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/net/bpf_jit_comp.c
++++ b/arch/arm64/net/bpf_jit_comp.c
+@@ -2001,7 +2001,11 @@ static int prepare_trampoline(struct jit
+       emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
+       if (flags & BPF_TRAMP_F_CALL_ORIG) {
+-              emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
++              /* for the first pass, assume the worst case */
++              if (!ctx->image)
++                      ctx->idx += 4;
++              else
++                      emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
+               emit_call((const u64)__bpf_tramp_enter, ctx);
+       }
+@@ -2045,7 +2049,11 @@ static int prepare_trampoline(struct jit
+       if (flags & BPF_TRAMP_F_CALL_ORIG) {
+               im->ip_epilogue = ctx->image + ctx->idx;
+-              emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
++              /* for the first pass, assume the worst case */
++              if (!ctx->image)
++                      ctx->idx += 4;
++              else
++                      emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
+               emit_call((const u64)__bpf_tramp_exit, ctx);
+       }
diff --git a/queue-6.6/bpf-arm64-fix-trampoline-for-bpf_tramp_f_call_orig.patch b/queue-6.6/bpf-arm64-fix-trampoline-for-bpf_tramp_f_call_orig.patch
new file mode 100644 (file)
index 0000000..a1d7c2f
--- /dev/null
@@ -0,0 +1,57 @@
+From 19d3c179a37730caf600a97fed3794feac2b197b Mon Sep 17 00:00:00 2001
+From: Puranjay Mohan <puranjay@kernel.org>
+Date: Thu, 11 Jul 2024 15:18:38 +0000
+Subject: bpf, arm64: Fix trampoline for BPF_TRAMP_F_CALL_ORIG
+
+From: Puranjay Mohan <puranjay@kernel.org>
+
+commit 19d3c179a37730caf600a97fed3794feac2b197b upstream.
+
+When BPF_TRAMP_F_CALL_ORIG is set, the trampoline calls
+__bpf_tramp_enter() and __bpf_tramp_exit() functions, passing them
+the struct bpf_tramp_image *im pointer as an argument in R0.
+
+The trampoline generation code uses emit_addr_mov_i64() to emit
+instructions for moving the bpf_tramp_image address into R0, but
+emit_addr_mov_i64() assumes the address to be in the vmalloc() space
+and uses only 48 bits. Because bpf_tramp_image is allocated using
+kzalloc(), its address can use more than 48-bits, in this case the
+trampoline will pass an invalid address to __bpf_tramp_enter/exit()
+causing a kernel crash.
+
+Fix this by using emit_a64_mov_i64() in place of emit_addr_mov_i64()
+as it can work with addresses that are greater than 48-bits.
+
+Fixes: efc9909fdce0 ("bpf, arm64: Add bpf trampoline for arm64")
+Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Closes: https://lore.kernel.org/all/SJ0PR15MB461564D3F7E7A763498CA6A8CBDB2@SJ0PR15MB4615.namprd15.prod.outlook.com/
+Link: https://lore.kernel.org/bpf/20240711151838.43469-1-puranjay@kernel.org
+[Minor context change fixed.]
+Signed-off-by: Bin Lan <bin.lan.cn@windriver.com>
+Signed-off-by: He Zhe <zhe.he@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/net/bpf_jit_comp.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/net/bpf_jit_comp.c
++++ b/arch/arm64/net/bpf_jit_comp.c
+@@ -2001,7 +2001,7 @@ static int prepare_trampoline(struct jit
+       emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
+       if (flags & BPF_TRAMP_F_CALL_ORIG) {
+-              emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
++              emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
+               emit_call((const u64)__bpf_tramp_enter, ctx);
+       }
+@@ -2045,7 +2045,7 @@ static int prepare_trampoline(struct jit
+       if (flags & BPF_TRAMP_F_CALL_ORIG) {
+               im->ip_epilogue = ctx->image + ctx->idx;
+-              emit_addr_mov_i64(A64_R(0), (const u64)im, ctx);
++              emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
+               emit_call((const u64)__bpf_tramp_exit, ctx);
+       }
diff --git a/queue-6.6/btrfs-don-t-bug_on-when-0-reference-count-at-btrfs_lookup_extent_info.patch b/queue-6.6/btrfs-don-t-bug_on-when-0-reference-count-at-btrfs_lookup_extent_info.patch
new file mode 100644 (file)
index 0000000..7e663df
--- /dev/null
@@ -0,0 +1,72 @@
+From 28cb13f29faf6290597b24b728dc3100c019356f Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 18 Jun 2024 12:15:01 +0100
+Subject: btrfs: don't BUG_ON() when 0 reference count at btrfs_lookup_extent_info()
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 28cb13f29faf6290597b24b728dc3100c019356f upstream.
+
+Instead of doing a BUG_ON() handle the error by returning -EUCLEAN,
+aborting the transaction and logging an error message.
+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+[Minor conflict resolved due to code context change.]
+Signed-off-by: Jianqi Ren <jianqi.ren.cn@windriver.com>
+Signed-off-by: He Zhe <zhe.he@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/extent-tree.c |   25 ++++++++++++++++++++-----
+ 1 file changed, 20 insertions(+), 5 deletions(-)
+
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -164,6 +164,14 @@ search_again:
+                       ei = btrfs_item_ptr(leaf, path->slots[0],
+                                           struct btrfs_extent_item);
+                       num_refs = btrfs_extent_refs(leaf, ei);
++                      if (unlikely(num_refs == 0)) {
++                              ret = -EUCLEAN;
++                              btrfs_err(fs_info,
++                      "unexpected zero reference count for extent item (%llu %u %llu)",
++                                        key.objectid, key.type, key.offset);
++                              btrfs_abort_transaction(trans, ret);
++                              goto out_free;
++                      }
+                       extent_flags = btrfs_extent_flags(leaf, ei);
+               } else {
+                       ret = -EUCLEAN;
+@@ -177,8 +185,6 @@ search_again:
+                       goto out_free;
+               }
+-
+-              BUG_ON(num_refs == 0);
+       } else {
+               num_refs = 0;
+               extent_flags = 0;
+@@ -208,10 +214,19 @@ search_again:
+                       goto search_again;
+               }
+               spin_lock(&head->lock);
+-              if (head->extent_op && head->extent_op->update_flags)
++              if (head->extent_op && head->extent_op->update_flags) {
+                       extent_flags |= head->extent_op->flags_to_set;
+-              else
+-                      BUG_ON(num_refs == 0);
++              } else if (unlikely(num_refs == 0)) {
++                      spin_unlock(&head->lock);
++                      mutex_unlock(&head->mutex);
++                      spin_unlock(&delayed_refs->lock);
++                      ret = -EUCLEAN;
++                      btrfs_err(fs_info,
++                        "unexpected zero reference count for extent %llu (%s)",
++                                bytenr, metadata ? "metadata" : "data");
++                      btrfs_abort_transaction(trans, ret);
++                      goto out_free;
++              }
+               num_refs += head->ref_mod;
+               spin_unlock(&head->lock);
diff --git a/queue-6.6/hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch b/queue-6.6/hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch
new file mode 100644 (file)
index 0000000..2c9df9b
--- /dev/null
@@ -0,0 +1,88 @@
+From af288a426c3e3552b62595c6138ec6371a17dbba Mon Sep 17 00:00:00 2001
+From: Ma Wupeng <mawupeng1@huawei.com>
+Date: Mon, 17 Feb 2025 09:43:29 +0800
+Subject: hwpoison, memory_hotplug: lock folio before unmap hwpoisoned folio
+
+From: Ma Wupeng <mawupeng1@huawei.com>
+
+commit af288a426c3e3552b62595c6138ec6371a17dbba upstream.
+
+Commit b15c87263a69 ("hwpoison, memory_hotplug: allow hwpoisoned pages to
+be offlined) add page poison checks in do_migrate_range in order to make
+offline hwpoisoned page possible by introducing isolate_lru_page and
+try_to_unmap for hwpoisoned page.  However folio lock must be held before
+calling try_to_unmap.  Add it to fix this problem.
+
+Warning will be produced if folio is not locked during unmap:
+
+  ------------[ cut here ]------------
+  kernel BUG at ./include/linux/swapops.h:400!
+  Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP
+  Modules linked in:
+  CPU: 4 UID: 0 PID: 411 Comm: bash Tainted: G        W          6.13.0-rc1-00016-g3c434c7ee82a-dirty #41
+  Tainted: [W]=WARN
+  Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015
+  pstate: 40400005 (nZcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+  pc : try_to_unmap_one+0xb08/0xd3c
+  lr : try_to_unmap_one+0x3dc/0xd3c
+  Call trace:
+   try_to_unmap_one+0xb08/0xd3c (P)
+   try_to_unmap_one+0x3dc/0xd3c (L)
+   rmap_walk_anon+0xdc/0x1f8
+   rmap_walk+0x3c/0x58
+   try_to_unmap+0x88/0x90
+   unmap_poisoned_folio+0x30/0xa8
+   do_migrate_range+0x4a0/0x568
+   offline_pages+0x5a4/0x670
+   memory_block_action+0x17c/0x374
+   memory_subsys_offline+0x3c/0x78
+   device_offline+0xa4/0xd0
+   state_store+0x8c/0xf0
+   dev_attr_store+0x18/0x2c
+   sysfs_kf_write+0x44/0x54
+   kernfs_fop_write_iter+0x118/0x1a8
+   vfs_write+0x3a8/0x4bc
+   ksys_write+0x6c/0xf8
+   __arm64_sys_write+0x1c/0x28
+   invoke_syscall+0x44/0x100
+   el0_svc_common.constprop.0+0x40/0xe0
+   do_el0_svc+0x1c/0x28
+   el0_svc+0x30/0xd0
+   el0t_64_sync_handler+0xc8/0xcc
+   el0t_64_sync+0x198/0x19c
+  Code: f9407be0 b5fff320 d4210000 17ffff97 (d4210000)
+  ---[ end trace 0000000000000000 ]---
+
+Link: https://lkml.kernel.org/r/20250217014329.3610326-4-mawupeng1@huawei.com
+Fixes: b15c87263a69 ("hwpoison, memory_hotplug: allow hwpoisoned pages to be offlined")
+Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Acked-by: Miaohe Lin <linmiaohe@huawei.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Xiangyu Chen <xiangyu.chen@windriver.com>
+Signed-off-by: He Zhe <zhe.he@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory_hotplug.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -1735,8 +1735,12 @@ static void do_migrate_range(unsigned lo
+               if (PageHWPoison(page)) {
+                       if (WARN_ON(folio_test_lru(folio)))
+                               folio_isolate_lru(folio);
+-                      if (folio_mapped(folio))
++                      if (folio_mapped(folio)) {
++                              folio_lock(folio);
+                               try_to_unmap(folio, TTU_IGNORE_MLOCK);
++                              folio_unlock(folio);
++                      }
++
+                       continue;
+               }
diff --git a/queue-6.6/loongarch-explicitly-specify-code-model-in-makefile.patch b/queue-6.6/loongarch-explicitly-specify-code-model-in-makefile.patch
new file mode 100644 (file)
index 0000000..e9d2ead
--- /dev/null
@@ -0,0 +1,33 @@
+From e67e0eb6a98b261caf45048f9eb95fd7609289c0 Mon Sep 17 00:00:00 2001
+From: Huacai Chen <chenhuacai@loongson.cn>
+Date: Fri, 22 Nov 2024 15:47:47 +0800
+Subject: LoongArch: Explicitly specify code model in Makefile
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+commit e67e0eb6a98b261caf45048f9eb95fd7609289c0 upstream.
+
+LoongArch's toolchain may change the default code model from normal to
+medium. This is unnecessary for kernel, and generates some relocations
+which cannot be handled by the module loader. So explicitly specify the
+code model to normal in Makefile (for Rust 'normal' is 'small').
+
+Cc: stable@vger.kernel.org
+Tested-by: Haiyong Sun <sunhaiyong@loongson.cn>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/loongarch/Makefile |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/loongarch/Makefile
++++ b/arch/loongarch/Makefile
+@@ -43,7 +43,7 @@ endif
+ ifdef CONFIG_64BIT
+ ld-emul                       = $(64bit-emul)
+-cflags-y              += -mabi=lp64s
++cflags-y              += -mabi=lp64s -mcmodel=normal
+ endif
+ cflags-y                      += -pipe -msoft-float
diff --git a/queue-6.6/memblock-accept-allocated-memory-before-use-in-memblock_double_array.patch b/queue-6.6/memblock-accept-allocated-memory-before-use-in-memblock_double_array.patch
new file mode 100644 (file)
index 0000000..6bfa1a5
--- /dev/null
@@ -0,0 +1,72 @@
+From da8bf5daa5e55a6af2b285ecda460d6454712ff4 Mon Sep 17 00:00:00 2001
+From: Tom Lendacky <thomas.lendacky@amd.com>
+Date: Thu, 8 May 2025 12:24:10 -0500
+Subject: memblock: Accept allocated memory before use in memblock_double_array()
+
+From: Tom Lendacky <thomas.lendacky@amd.com>
+
+commit da8bf5daa5e55a6af2b285ecda460d6454712ff4 upstream.
+
+When increasing the array size in memblock_double_array() and the slab
+is not yet available, a call to memblock_find_in_range() is used to
+reserve/allocate memory. However, the range returned may not have been
+accepted, which can result in a crash when booting an SNP guest:
+
+  RIP: 0010:memcpy_orig+0x68/0x130
+  Code: ...
+  RSP: 0000:ffffffff9cc03ce8 EFLAGS: 00010006
+  RAX: ff11001ff83e5000 RBX: 0000000000000000 RCX: fffffffffffff000
+  RDX: 0000000000000bc0 RSI: ffffffff9dba8860 RDI: ff11001ff83e5c00
+  RBP: 0000000000002000 R08: 0000000000000000 R09: 0000000000002000
+  R10: 000000207fffe000 R11: 0000040000000000 R12: ffffffff9d06ef78
+  R13: ff11001ff83e5000 R14: ffffffff9dba7c60 R15: 0000000000000c00
+  memblock_double_array+0xff/0x310
+  memblock_add_range+0x1fb/0x2f0
+  memblock_reserve+0x4f/0xa0
+  memblock_alloc_range_nid+0xac/0x130
+  memblock_alloc_internal+0x53/0xc0
+  memblock_alloc_try_nid+0x3d/0xa0
+  swiotlb_init_remap+0x149/0x2f0
+  mem_init+0xb/0xb0
+  mm_core_init+0x8f/0x350
+  start_kernel+0x17e/0x5d0
+  x86_64_start_reservations+0x14/0x30
+  x86_64_start_kernel+0x92/0xa0
+  secondary_startup_64_no_verify+0x194/0x19b
+
+Mitigate this by calling accept_memory() on the memory range returned
+before the slab is available.
+
+Prior to v6.12, the accept_memory() interface used a 'start' and 'end'
+parameter instead of 'start' and 'size', therefore the accept_memory()
+call must be adjusted to specify 'start + size' for 'end' when applying
+to kernels prior to v6.12.
+
+Cc: stable@vger.kernel.org # see patch description, needs adjustments for <= 6.11
+Fixes: dcdfdd40fa82 ("mm: Add support for unaccepted memory")
+Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
+Link: https://lore.kernel.org/r/da1ac73bf4ded761e21b4e4bb5178382a580cd73.1746725050.git.thomas.lendacky@amd.com
+Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memblock.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/mm/memblock.c
++++ b/mm/memblock.c
+@@ -460,7 +460,14 @@ static int __init_memblock memblock_doub
+                               min(new_area_start, memblock.current_limit),
+                               new_alloc_size, PAGE_SIZE);
+-              new_array = addr ? __va(addr) : NULL;
++              if (addr) {
++                      /* The memory may not have been accepted, yet. */
++                      accept_memory(addr, addr + new_alloc_size);
++
++                      new_array = __va(addr);
++              } else {
++                      new_array = NULL;
++              }
+       }
+       if (!addr) {
+               pr_err("memblock: Failed to double %s array from %ld to %ld entries !\n",
diff --git a/queue-6.6/mm-migrate-correct-nr_failed-in-migrate_pages_sync.patch b/queue-6.6/mm-migrate-correct-nr_failed-in-migrate_pages_sync.patch
new file mode 100644 (file)
index 0000000..8c1ed64
--- /dev/null
@@ -0,0 +1,109 @@
+From a259945efe6ada94087ef666e9b38f8e34ea34ba Mon Sep 17 00:00:00 2001
+From: Zi Yan <ziy@nvidia.com>
+Date: Tue, 17 Oct 2023 12:31:28 -0400
+Subject: mm/migrate: correct nr_failed in migrate_pages_sync()
+
+From: Zi Yan <ziy@nvidia.com>
+
+commit a259945efe6ada94087ef666e9b38f8e34ea34ba upstream.
+
+nr_failed was missing the large folio splits from migrate_pages_batch()
+and can cause a mismatch between migrate_pages() return value and the
+number of not migrated pages, i.e., when the return value of
+migrate_pages() is 0, there are still pages left in the from page list.
+It will happen when a non-PMD THP large folio fails to migrate due to
+-ENOMEM and is split successfully but not all the split pages are not
+migrated, migrate_pages_batch() would return non-zero, but
+astats.nr_thp_split = 0.  nr_failed would be 0 and returned to the caller
+of migrate_pages(), but the not migrated pages are left in the from page
+list without being added back to LRU lists.
+
+Fix it by adding a new nr_split counter for large folio splits and adding
+it to nr_failed in migrate_page_sync() after migrate_pages_batch() is
+done.
+
+Link: https://lkml.kernel.org/r/20231017163129.2025214-1-zi.yan@sent.com
+Fixes: 2ef7dbb26990 ("migrate_pages: try migrate in batch asynchronously firstly")
+Signed-off-by: Zi Yan <ziy@nvidia.com>
+Acked-by: Huang Ying <ying.huang@intel.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/migrate.c |   16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1504,6 +1504,7 @@ struct migrate_pages_stats {
+       int nr_thp_succeeded;   /* THP migrated successfully */
+       int nr_thp_failed;      /* THP failed to be migrated */
+       int nr_thp_split;       /* THP split before migrating */
++      int nr_split;   /* Large folio (include THP) split before migrating */
+ };
+ /*
+@@ -1623,6 +1624,7 @@ static int migrate_pages_batch(struct li
+       int nr_retry_pages = 0;
+       int pass = 0;
+       bool is_thp = false;
++      bool is_large = false;
+       struct folio *folio, *folio2, *dst = NULL, *dst2;
+       int rc, rc_saved = 0, nr_pages;
+       LIST_HEAD(unmap_folios);
+@@ -1638,7 +1640,8 @@ static int migrate_pages_batch(struct li
+               nr_retry_pages = 0;
+               list_for_each_entry_safe(folio, folio2, from, lru) {
+-                      is_thp = folio_test_large(folio) && folio_test_pmd_mappable(folio);
++                      is_large = folio_test_large(folio);
++                      is_thp = is_large && folio_test_pmd_mappable(folio);
+                       nr_pages = folio_nr_pages(folio);
+                       cond_resched();
+@@ -1658,6 +1661,7 @@ static int migrate_pages_batch(struct li
+                               stats->nr_thp_failed++;
+                               if (!try_split_folio(folio, split_folios)) {
+                                       stats->nr_thp_split++;
++                                      stats->nr_split++;
+                                       continue;
+                               }
+                               stats->nr_failed_pages += nr_pages;
+@@ -1686,11 +1690,12 @@ static int migrate_pages_batch(struct li
+                               nr_failed++;
+                               stats->nr_thp_failed += is_thp;
+                               /* Large folio NUMA faulting doesn't split to retry. */
+-                              if (folio_test_large(folio) && !nosplit) {
++                              if (is_large && !nosplit) {
+                                       int ret = try_split_folio(folio, split_folios);
+                                       if (!ret) {
+                                               stats->nr_thp_split += is_thp;
++                                              stats->nr_split += is_large;
+                                               break;
+                                       } else if (reason == MR_LONGTERM_PIN &&
+                                                  ret == -EAGAIN) {
+@@ -1836,6 +1841,7 @@ static int migrate_pages_sync(struct lis
+       stats->nr_succeeded += astats.nr_succeeded;
+       stats->nr_thp_succeeded += astats.nr_thp_succeeded;
+       stats->nr_thp_split += astats.nr_thp_split;
++      stats->nr_split += astats.nr_split;
+       if (rc < 0) {
+               stats->nr_failed_pages += astats.nr_failed_pages;
+               stats->nr_thp_failed += astats.nr_thp_failed;
+@@ -1843,7 +1849,11 @@ static int migrate_pages_sync(struct lis
+               return rc;
+       }
+       stats->nr_thp_failed += astats.nr_thp_split;
+-      nr_failed += astats.nr_thp_split;
++      /*
++       * Do not count rc, as pages will be retried below.
++       * Count nr_split only, since it includes nr_thp_split.
++       */
++      nr_failed += astats.nr_split;
+       /*
+        * Fall back to migrate all failed folios one by one synchronously. All
+        * failed folios except split THPs will be retried, so their failure
diff --git a/queue-6.6/sctp-add-mutual-exclusion-in-proc_sctp_do_udp_port.patch b/queue-6.6/sctp-add-mutual-exclusion-in-proc_sctp_do_udp_port.patch
new file mode 100644 (file)
index 0000000..b0c60ed
--- /dev/null
@@ -0,0 +1,78 @@
+From 10206302af856791fbcc27a33ed3c3eb09b2793d Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 31 Mar 2025 09:15:32 +0000
+Subject: sctp: add mutual exclusion in proc_sctp_do_udp_port()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 10206302af856791fbcc27a33ed3c3eb09b2793d upstream.
+
+We must serialize calls to sctp_udp_sock_stop() and sctp_udp_sock_start()
+or risk a crash as syzbot reported:
+
+Oops: general protection fault, probably for non-canonical address 0xdffffc000000000d: 0000 [#1] SMP KASAN PTI
+KASAN: null-ptr-deref in range [0x0000000000000068-0x000000000000006f]
+CPU: 1 UID: 0 PID: 6551 Comm: syz.1.44 Not tainted 6.14.0-syzkaller-g7f2ff7b62617 #0 PREEMPT(full)
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025
+ RIP: 0010:kernel_sock_shutdown+0x47/0x70 net/socket.c:3653
+Call Trace:
+ <TASK>
+  udp_tunnel_sock_release+0x68/0x80 net/ipv4/udp_tunnel_core.c:181
+  sctp_udp_sock_stop+0x71/0x160 net/sctp/protocol.c:930
+  proc_sctp_do_udp_port+0x264/0x450 net/sctp/sysctl.c:553
+  proc_sys_call_handler+0x3d0/0x5b0 fs/proc/proc_sysctl.c:601
+  iter_file_splice_write+0x91c/0x1150 fs/splice.c:738
+  do_splice_from fs/splice.c:935 [inline]
+  direct_splice_actor+0x18f/0x6c0 fs/splice.c:1158
+  splice_direct_to_actor+0x342/0xa30 fs/splice.c:1102
+  do_splice_direct_actor fs/splice.c:1201 [inline]
+  do_splice_direct+0x174/0x240 fs/splice.c:1227
+  do_sendfile+0xafd/0xe50 fs/read_write.c:1368
+  __do_sys_sendfile64 fs/read_write.c:1429 [inline]
+  __se_sys_sendfile64 fs/read_write.c:1415 [inline]
+  __x64_sys_sendfile64+0x1d8/0x220 fs/read_write.c:1415
+  do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
+
+Fixes: 046c052b475e ("sctp: enable udp tunneling socks")
+Reported-by: syzbot+fae49d997eb56fa7c74d@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/netdev/67ea5c01.050a0220.1547ec.012b.GAE@google.com/T/#u
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Xin Long <lucien.xin@gmail.com>
+Link: https://patch.msgid.link/20250331091532.224982-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[Minor conflict resolved due to code context change.]
+Signed-off-by: Jianqi Ren <jianqi.ren.cn@windriver.com>
+Signed-off-by: He Zhe <zhe.he@windriver.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/sysctl.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/sctp/sysctl.c
++++ b/net/sctp/sysctl.c
+@@ -529,6 +529,8 @@ static int proc_sctp_do_auth(struct ctl_
+       return ret;
+ }
++static DEFINE_MUTEX(sctp_sysctl_mutex);
++
+ static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write,
+                                void *buffer, size_t *lenp, loff_t *ppos)
+ {
+@@ -553,6 +555,7 @@ static int proc_sctp_do_udp_port(struct
+               if (new_value > max || new_value < min)
+                       return -EINVAL;
++              mutex_lock(&sctp_sysctl_mutex);
+               net->sctp.udp_port = new_value;
+               sctp_udp_sock_stop(net);
+               if (new_value) {
+@@ -565,6 +568,7 @@ static int proc_sctp_do_udp_port(struct
+               lock_sock(sk);
+               sctp_sk(sk)->udp_port = htons(net->sctp.udp_port);
+               release_sock(sk);
++              mutex_unlock(&sctp_sysctl_mutex);
+       }
+       return ret;
diff --git a/queue-6.6/selftests-mm-compaction_test-support-platform-with-huge-mount-of-memory.patch b/queue-6.6/selftests-mm-compaction_test-support-platform-with-huge-mount-of-memory.patch
new file mode 100644 (file)
index 0000000..9117377
--- /dev/null
@@ -0,0 +1,72 @@
+From ab00ddd802f80e31fc9639c652d736fe3913feae Mon Sep 17 00:00:00 2001
+From: Feng Tang <feng.tang@linux.alibaba.com>
+Date: Wed, 23 Apr 2025 18:36:45 +0800
+Subject: selftests/mm: compaction_test: support platform with huge mount of memory
+
+From: Feng Tang <feng.tang@linux.alibaba.com>
+
+commit ab00ddd802f80e31fc9639c652d736fe3913feae upstream.
+
+When running mm selftest to verify mm patches, 'compaction_test' case
+failed on an x86 server with 1TB memory.  And the root cause is that it
+has too much free memory than what the test supports.
+
+The test case tries to allocate 100000 huge pages, which is about 200 GB
+for that x86 server, and when it succeeds, it expects it's large than 1/3
+of 80% of the free memory in system.  This logic only works for platform
+with 750 GB ( 200 / (1/3) / 80% ) or less free memory, and may raise false
+alarm for others.
+
+Fix it by changing the fixed page number to self-adjustable number
+according to the real number of free memory.
+
+Link: https://lkml.kernel.org/r/20250423103645.2758-1-feng.tang@linux.alibaba.com
+Fixes: bd67d5c15cc1 ("Test compaction of mlocked memory")
+Signed-off-by: Feng Tang <feng.tang@linux.alibaba.com>
+Acked-by: Dev Jain <dev.jain@arm.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Tested-by: Baolin Wang <baolin.wang@inux.alibaba.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Sri Jayaramappa <sjayaram@akamai.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/mm/compaction_test.c |   19 ++++++++++++++-----
+ 1 file changed, 14 insertions(+), 5 deletions(-)
+
+--- a/tools/testing/selftests/mm/compaction_test.c
++++ b/tools/testing/selftests/mm/compaction_test.c
+@@ -89,6 +89,8 @@ int check_compaction(unsigned long mem_f
+       int compaction_index = 0;
+       char initial_nr_hugepages[20] = {0};
+       char nr_hugepages[20] = {0};
++      char target_nr_hugepages[24] = {0};
++      int slen;
+       /* We want to test with 80% of available memory. Else, OOM killer comes
+          in to play */
+@@ -119,11 +121,18 @@ int check_compaction(unsigned long mem_f
+       lseek(fd, 0, SEEK_SET);
+-      /* Request a large number of huge pages. The Kernel will allocate
+-         as much as it can */
+-      if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
+-              ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
+-                             strerror(errno));
++      /*
++       * Request huge pages for about half of the free memory. The Kernel
++       * will allocate as much as it can, and we expect it will get at least 1/3
++       */
++      nr_hugepages_ul = mem_free / hugepage_size / 2;
++      snprintf(target_nr_hugepages, sizeof(target_nr_hugepages),
++               "%lu", nr_hugepages_ul);
++
++      slen = strlen(target_nr_hugepages);
++      if (write(fd, target_nr_hugepages, slen) != slen) {
++              ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n",
++                             nr_hugepages_ul, strerror(errno));
+               goto close_fd;
+       }
index d139d403e6a87691bcdd7d853b91bc079d951333..0b721d581b409f0fd471038810f3a2598783f883 100644 (file)
@@ -104,3 +104,12 @@ x86-its-fix-build-error-for-its_static_thunk.patch
 mm-page_alloc-fix-race-condition-in-unaccepted-memory-handling.patch
 bluetooth-btnxpuart-fix-kernel-panic-during-fw-release.patch
 usb-typec-ucsi-displayport-fix-deadlock.patch
+selftests-mm-compaction_test-support-platform-with-huge-mount-of-memory.patch
+mm-migrate-correct-nr_failed-in-migrate_pages_sync.patch
+bpf-arm64-fix-trampoline-for-bpf_tramp_f_call_orig.patch
+bpf-arm64-fix-address-emission-with-tag-based-kasan-enabled.patch
+loongarch-explicitly-specify-code-model-in-makefile.patch
+memblock-accept-allocated-memory-before-use-in-memblock_double_array.patch
+hwpoison-memory_hotplug-lock-folio-before-unmap-hwpoisoned-folio.patch
+sctp-add-mutual-exclusion-in-proc_sctp_do_udp_port.patch
+btrfs-don-t-bug_on-when-0-reference-count-at-btrfs_lookup_extent_info.patch