Fixes for 6.6

author Sasha Levin <sashal@kernel.org>

Sun, 2 Mar 2025 14:46:02 +0000 (09:46 -0500)

committer Sasha Levin <sashal@kernel.org>

Sun, 2 Mar 2025 14:46:02 +0000 (09:46 -0500)
author Sasha Levin <sashal@kernel.org>
Sun, 2 Mar 2025 14:46:02 +0000 (09:46 -0500)
committer Sasha Levin <sashal@kernel.org>
Sun, 2 Mar 2025 14:46:02 +0000 (09:46 -0500)
diff --git a/queue-6.6/io_uring-net-save-msg_control-for-compat.patch b/queue-6.6/io_uring-net-save-msg_control-for-compat.patch

new file mode 100644 (file)

index 0000000..f05ab13
--- /dev/null
+++ b/queue-6.6/io_uring-net-save-msg_control-for-compat.patch
@@ -0,0 +1,39 @@
+From a952a9cf8c0bb0dde6be0de544f99687ff34a514 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Feb 2025 15:59:02 +0000
+Subject: io_uring/net: save msg_control for compat
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ Upstream commit 6ebf05189dfc6d0d597c99a6448a4d1064439a18 ]
+
+Match the compat part of io_sendmsg_copy_hdr() with its counterpart and
+save msg_control.
+
+Fixes: c55978024d123 ("io_uring/net: move receive multishot out of the generic msghdr path")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/2a8418821fe83d3b64350ad2b3c0303e9b732bbd.1740498502.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/net.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/io_uring/net.c b/io_uring/net.c
+index 56091292950fd..1a0e98e19dc0e 100644
+--- a/io_uring/net.c
++++ b/io_uring/net.c
+@@ -303,7 +303,9 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req,
+               if (unlikely(ret))
+                       return ret;
+ 
+-              return __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
++              ret = __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
++              sr->msg_control = iomsg->msg.msg_control_user;
++              return ret;
+       }
+ #endif
+ 
+-- 
+2.39.5
+
diff --git a/queue-6.6/perf-core-order-the-pmu-list-to-fix-warning-about-un.patch b/queue-6.6/perf-core-order-the-pmu-list-to-fix-warning-about-un.patch

new file mode 100644 (file)

index 0000000..70ced24
--- /dev/null
+++ b/queue-6.6/perf-core-order-the-pmu-list-to-fix-warning-about-un.patch
@@ -0,0 +1,108 @@
+From 2bcac5ca80bca1ed629a077b81afcea5c24d1f77 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 Jan 2025 07:33:56 +0000
+Subject: perf/core: Order the PMU list to fix warning about unordered
+ pmu_ctx_list
+
+From: Luo Gengkun <luogengkun@huaweicloud.com>
+
+[ Upstream commit 2016066c66192a99d9e0ebf433789c490a6785a2 ]
+
+Syskaller triggers a warning due to prev_epc->pmu != next_epc->pmu in
+perf_event_swap_task_ctx_data(). vmcore shows that two lists have the same
+perf_event_pmu_context, but not in the same order.
+
+The problem is that the order of pmu_ctx_list for the parent is impacted by
+the time when an event/PMU is added. While the order for a child is
+impacted by the event order in the pinned_groups and flexible_groups. So
+the order of pmu_ctx_list in the parent and child may be different.
+
+To fix this problem, insert the perf_event_pmu_context to its proper place
+after iteration of the pmu_ctx_list.
+
+The follow testcase can trigger above warning:
+
+ # perf record -e cycles --call-graph lbr -- taskset -c 3 ./a.out &
+ # perf stat -e cpu-clock,cs -p xxx // xxx is the pid of a.out
+
+ test.c
+
+ void main() {
+        int count = 0;
+        pid_t pid;
+
+        printf("%d running\n", getpid());
+        sleep(30);
+        printf("running\n");
+
+        pid = fork();
+        if (pid == -1) {
+                printf("fork error\n");
+                return;
+        }
+        if (pid == 0) {
+                while (1) {
+                        count++;
+                }
+        } else {
+                while (1) {
+                        count++;
+                }
+        }
+ }
+
+The testcase first opens an LBR event, so it will allocate task_ctx_data,
+and then open tracepoint and software events, so the parent context will
+have 3 different perf_event_pmu_contexts. On inheritance, child ctx will
+insert the perf_event_pmu_context in another order and the warning will
+trigger.
+
+[ mingo: Tidied up the changelog. ]
+
+Fixes: bd2756811766 ("perf: Rewrite core context handling")
+Signed-off-by: Luo Gengkun <luogengkun@huaweicloud.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
+Link: https://lore.kernel.org/r/20250122073356.1824736-1-luogengkun@huaweicloud.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/core.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 5d6458ea675e9..18f0a3aa6d7c6 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -4842,7 +4842,7 @@ static struct perf_event_pmu_context *
+ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
+                    struct perf_event *event)
+ {
+-      struct perf_event_pmu_context *new = NULL, *epc;
++      struct perf_event_pmu_context *new = NULL, *pos = NULL, *epc;
+       void *task_ctx_data = NULL;
+ 
+       if (!ctx->task) {
+@@ -4899,12 +4899,19 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
+                       atomic_inc(&epc->refcount);
+                       goto found_epc;
+               }
++              /* Make sure the pmu_ctx_list is sorted by PMU type: */
++              if (!pos && epc->pmu->type > pmu->type)
++                      pos = epc;
+       }
+ 
+       epc = new;
+       new = NULL;
+ 
+-      list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
++      if (!pos)
++              list_add_tail(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
++      else
++              list_add(&epc->pmu_ctx_entry, pos->pmu_ctx_entry.prev);
++
+       epc->ctx = ctx;
+ 
+ found_epc:
+-- 
+2.39.5
+
diff --git a/queue-6.6/series b/queue-6.6/series

index 68f40fb00c6f0c1b78f9ee1428c6a3886a1dfca8..405c492eadebde9cfa921117fdd9336b11f498e6 100644 (file)
--- a/queue-6.6/series
+++ b/queue-6.6/series
@@ -49,3 +49,7 @@ net-ipv6-rpl_iptunnel-mitigate-2-realloc-issue.patch
  net-ipv6-fix-dst-ref-loop-on-input-in-rpl-lwt.patch
  net-ti-icss-iep-remove-spinlock-based-synchronizatio.patch
  net-ti-icss-iep-reject-perout-generation-request.patch
+perf-core-order-the-pmu-list-to-fix-warning-about-un.patch
+uprobes-reject-the-shared-zeropage-in-uprobe_write_o.patch
+io_uring-net-save-msg_control-for-compat.patch
+x86-cpu-fix-warm-boot-hang-regression-on-amd-sc1100-.patch
diff --git a/queue-6.6/uprobes-reject-the-shared-zeropage-in-uprobe_write_o.patch b/queue-6.6/uprobes-reject-the-shared-zeropage-in-uprobe_write_o.patch

new file mode 100644 (file)

index 0000000..0d6ef7b
--- /dev/null
+++ b/queue-6.6/uprobes-reject-the-shared-zeropage-in-uprobe_write_o.patch
@@ -0,0 +1,112 @@
+From 824fb0c7046e572b31e2d1edc360016f1635a428 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Feb 2025 11:11:49 +0800
+Subject: uprobes: Reject the shared zeropage in uprobe_write_opcode()
+
+From: Tong Tiangen <tongtiangen@huawei.com>
+
+[ Upstream commit bddf10d26e6e5114e7415a0e442ec6f51a559468 ]
+
+We triggered the following crash in syzkaller tests:
+
+  BUG: Bad page state in process syz.7.38  pfn:1eff3
+  page: refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1eff3
+  flags: 0x3fffff00004004(referenced|reserved|node=0|zone=1|lastcpupid=0x1fffff)
+  raw: 003fffff00004004 ffffe6c6c07bfcc8 ffffe6c6c07bfcc8 0000000000000000
+  raw: 0000000000000000 0000000000000000 00000000fffffffe 0000000000000000
+  page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
+  Call Trace:
+   <TASK>
+   dump_stack_lvl+0x32/0x50
+   bad_page+0x69/0xf0
+   free_unref_page_prepare+0x401/0x500
+   free_unref_page+0x6d/0x1b0
+   uprobe_write_opcode+0x460/0x8e0
+   install_breakpoint.part.0+0x51/0x80
+   register_for_each_vma+0x1d9/0x2b0
+   __uprobe_register+0x245/0x300
+   bpf_uprobe_multi_link_attach+0x29b/0x4f0
+   link_create+0x1e2/0x280
+   __sys_bpf+0x75f/0xac0
+   __x64_sys_bpf+0x1a/0x30
+   do_syscall_64+0x56/0x100
+   entry_SYSCALL_64_after_hwframe+0x78/0xe2
+
+   BUG: Bad rss-counter state mm:00000000452453e0 type:MM_FILEPAGES val:-1
+
+The following syzkaller test case can be used to reproduce:
+
+  r2 = creat(&(0x7f0000000000)='./file0\x00', 0x8)
+  write$nbd(r2, &(0x7f0000000580)=ANY=[], 0x10)
+  r4 = openat(0xffffffffffffff9c, &(0x7f0000000040)='./file0\x00', 0x42, 0x0)
+  mmap$IORING_OFF_SQ_RING(&(0x7f0000ffd000/0x3000)=nil, 0x3000, 0x0, 0x12, r4, 0x0)
+  r5 = userfaultfd(0x80801)
+  ioctl$UFFDIO_API(r5, 0xc018aa3f, &(0x7f0000000040)={0xaa, 0x20})
+  r6 = userfaultfd(0x80801)
+  ioctl$UFFDIO_API(r6, 0xc018aa3f, &(0x7f0000000140))
+  ioctl$UFFDIO_REGISTER(r6, 0xc020aa00, &(0x7f0000000100)={{&(0x7f0000ffc000/0x4000)=nil, 0x4000}, 0x2})
+  ioctl$UFFDIO_ZEROPAGE(r5, 0xc020aa04, &(0x7f0000000000)={{&(0x7f0000ffd000/0x1000)=nil, 0x1000}})
+  r7 = bpf$PROG_LOAD(0x5, &(0x7f0000000140)={0x2, 0x3, &(0x7f0000000200)=ANY=[@ANYBLOB="1800000000120000000000000000000095"], &(0x7f0000000000)='GPL\x00', 0x7, 0x0, 0x0, 0x0, 0x0, '\x00', 0x0, @fallback=0x30, 0xffffffffffffffff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x10, 0x0, @void, @value}, 0x94)
+  bpf$BPF_LINK_CREATE_XDP(0x1c, &(0x7f0000000040)={r7, 0x0, 0x30, 0x1e, @val=@uprobe_multi={&(0x7f0000000080)='./file0\x00', &(0x7f0000000100)=[0x2], 0x0, 0x0, 0x1}}, 0x40)
+
+The cause is that zero pfn is set to the PTE without increasing the RSS
+count in mfill_atomic_pte_zeropage() and the refcount of zero folio does
+not increase accordingly. Then, the operation on the same pfn is performed
+in uprobe_write_opcode()->__replace_page() to unconditional decrease the
+RSS count and old_folio's refcount.
+
+Therefore, two bugs are introduced:
+
+ 1. The RSS count is incorrect, when process exit, the check_mm() report
+    error "Bad rss-count".
+
+ 2. The reserved folio (zero folio) is freed when folio->refcount is zero,
+    then free_pages_prepare->free_page_is_bad() report error
+    "Bad page state".
+
+There is more, the following warning could also theoretically be triggered:
+
+  __replace_page()
+    -> ...
+      -> folio_remove_rmap_pte()
+        -> VM_WARN_ON_FOLIO(is_zero_folio(folio), folio)
+
+Considering that uprobe hit on the zero folio is a very rare case, just
+reject zero old folio immediately after get_user_page_vma_remote().
+
+[ mingo: Cleaned up the changelog ]
+
+Fixes: 7396fa818d62 ("uprobes/core: Make background page replacement logic account for rss_stat counters")
+Fixes: 2b1444983508 ("uprobes, mm, x86: Add the ability to install and remove uprobes breakpoints")
+Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Oleg Nesterov <oleg@redhat.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Link: https://lore.kernel.org/r/20250224031149.1598949-1-tongtiangen@huawei.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/events/uprobes.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
+index 6dac0b5798213..7e2edd1b06939 100644
+--- a/kernel/events/uprobes.c
++++ b/kernel/events/uprobes.c
+@@ -481,6 +481,11 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
+       if (ret <= 0)
+               goto put_old;
+ 
++      if (is_zero_page(old_page)) {
++              ret = -EINVAL;
++              goto put_old;
++      }
++
+       if (WARN(!is_register && PageCompound(old_page),
+                "uprobe unregister should never work on compound page\n")) {
+               ret = -EINVAL;
+-- 
+2.39.5
+
diff --git a/queue-6.6/x86-cpu-fix-warm-boot-hang-regression-on-amd-sc1100-.patch b/queue-6.6/x86-cpu-fix-warm-boot-hang-regression-on-amd-sc1100-.patch

new file mode 100644 (file)

index 0000000..e67975a
--- /dev/null
+++ b/queue-6.6/x86-cpu-fix-warm-boot-hang-regression-on-amd-sc1100-.patch
@@ -0,0 +1,95 @@
+From f627c882b0f7b8398f0a8800a537962ce05ffcf1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Feb 2025 22:31:20 +0100
+Subject: x86/CPU: Fix warm boot hang regression on AMD SC1100 SoC systems
+
+From: Russell Senior <russell@personaltelco.net>
+
+[ Upstream commit bebe35bb738b573c32a5033499cd59f20293f2a3 ]
+
+I still have some Soekris net4826 in a Community Wireless Network I
+volunteer with. These devices use an AMD SC1100 SoC. I am running
+OpenWrt on them, which uses a patched kernel, that naturally has
+evolved over time.  I haven't updated the ones in the field in a
+number of years (circa 2017), but have one in a test bed, where I have
+intermittently tried out test builds.
+
+A few years ago, I noticed some trouble, particularly when "warm
+booting", that is, doing a reboot without removing power, and noticed
+the device was hanging after the kernel message:
+
+  [    0.081615] Working around Cyrix MediaGX virtual DMA bugs.
+
+If I removed power and then restarted, it would boot fine, continuing
+through the message above, thusly:
+
+  [    0.081615] Working around Cyrix MediaGX virtual DMA bugs.
+  [    0.090076] Enable Memory-Write-back mode on Cyrix/NSC processor.
+  [    0.100000] Enable Memory access reorder on Cyrix/NSC processor.
+  [    0.100070] Last level iTLB entries: 4KB 0, 2MB 0, 4MB 0
+  [    0.110058] Last level dTLB entries: 4KB 0, 2MB 0, 4MB 0, 1GB 0
+  [    0.120037] CPU: NSC Geode(TM) Integrated Processor by National Semi (family: 0x5, model: 0x9, stepping: 0x1)
+  [...]
+
+In order to continue using modern tools, like ssh, to interact with
+the software on these old devices, I need modern builds of the OpenWrt
+firmware on the devices. I confirmed that the warm boot hang was still
+an issue in modern OpenWrt builds (currently using a patched linux
+v6.6.65).
+
+Last night, I decided it was time to get to the bottom of the warm
+boot hang, and began bisecting. From preserved builds, I narrowed down
+the bisection window from late February to late May 2019. During this
+period, the OpenWrt builds were using 4.14.x. I was able to build
+using period-correct Ubuntu 18.04.6. After a number of bisection
+iterations, I identified a kernel bump from 4.14.112 to 4.14.113 as
+the commit that introduced the warm boot hang.
+
+  https://github.com/openwrt/openwrt/commit/07aaa7e3d62ad32767d7067107db64b6ade81537
+
+Looking at the upstream changes in the stable kernel between 4.14.112
+and 4.14.113 (tig v4.14.112..v4.14.113), I spotted a likely suspect:
+
+  https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=20afb90f730982882e65b01fb8bdfe83914339c5
+
+So, I tried reverting just that kernel change on top of the breaking
+OpenWrt commit, and my warm boot hang went away.
+
+Presumably, the warm boot hang is due to some register not getting
+cleared in the same way that a loss of power does. That is
+approximately as much as I understand about the problem.
+
+More poking/prodding and coaching from Jonas Gorski, it looks
+like this test patch fixes the problem on my board: Tested against
+v6.6.67 and v4.14.113.
+
+Fixes: 18fb053f9b82 ("x86/cpu/cyrix: Use correct macros for Cyrix calls on Geode processors")
+Debugged-by: Jonas Gorski <jonas.gorski@gmail.com>
+Signed-off-by: Russell Senior <russell@personaltelco.net>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lore.kernel.org/r/CAHP3WfOgs3Ms4Z+L9i0-iBOE21sdMk5erAiJurPjnrL9LSsgRA@mail.gmail.com
+Cc: Matthew Whitehead <tedheadster@gmail.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/cyrix.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
+index 9651275aecd1b..dfec2c61e3547 100644
+--- a/arch/x86/kernel/cpu/cyrix.c
++++ b/arch/x86/kernel/cpu/cyrix.c
+@@ -153,8 +153,8 @@ static void geode_configure(void)
+       u8 ccr3;
+       local_irq_save(flags);
+ 
+-      /* Suspend on halt power saving and enable #SUSP pin */
+-      setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88);
++      /* Suspend on halt power saving */
++      setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x08);
+ 
+       ccr3 = getCx86(CX86_CCR3);
+       setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);       /* enable MAPEN */
+-- 
+2.39.5
+
author	Sasha Levin <sashal@kernel.org>
	Sun, 2 Mar 2025 14:46:02 +0000 (09:46 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Sun, 2 Mar 2025 14:46:02 +0000 (09:46 -0500)
queue-6.6/io_uring-net-save-msg_control-for-compat.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/perf-core-order-the-pmu-list-to-fix-warning-about-un.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/series		patch \| blob \| blame \| history
queue-6.6/uprobes-reject-the-shared-zeropage-in-uprobe_write_o.patch	[new file with mode: 0644]	patch \| blob
queue-6.6/x86-cpu-fix-warm-boot-hang-regression-on-amd-sc1100-.patch	[new file with mode: 0644]	patch \| blob