From: Greg Kroah-Hartman Date: Fri, 29 Apr 2022 08:57:15 +0000 (+0200) Subject: 4.19-stable patches X-Git-Tag: v4.19.241~14 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e080d2b5dc355e1c36681d0f8215da5dd0b9c4a8;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: net-sched-cls_u32-fix-netns-refcount-changes-in-u32_change.patch powerpc-64-interrupt-temporarily-save-ppr-on-stack-to-fix-register-corruption-due-to-slb-miss.patch powerpc-64s-unmerge-ex_lr-and-ex_dar.patch --- diff --git a/queue-4.19/net-sched-cls_u32-fix-netns-refcount-changes-in-u32_change.patch b/queue-4.19/net-sched-cls_u32-fix-netns-refcount-changes-in-u32_change.patch new file mode 100644 index 00000000000..b76e3748990 --- /dev/null +++ b/queue-4.19/net-sched-cls_u32-fix-netns-refcount-changes-in-u32_change.patch @@ -0,0 +1,142 @@ +From 3db09e762dc79584a69c10d74a6b98f89a9979f8 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet +Date: Wed, 13 Apr 2022 10:35:41 -0700 +Subject: net/sched: cls_u32: fix netns refcount changes in u32_change() + +From: Eric Dumazet + +commit 3db09e762dc79584a69c10d74a6b98f89a9979f8 upstream. + +We are now able to detect extra put_net() at the moment +they happen, instead of much later in correct code paths. + +u32_init_knode() / tcf_exts_init() populates the ->exts.net +pointer, but as mentioned in tcf_exts_init(), +the refcount on netns has not been elevated yet. + +The refcount is taken only once tcf_exts_get_net() +is called. + +So the two u32_destroy_key() calls from u32_change() +are attempting to release an invalid reference on the netns. + +syzbot report: + +refcount_t: decrement hit 0; leaking memory. +WARNING: CPU: 0 PID: 21708 at lib/refcount.c:31 refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31 +Modules linked in: +CPU: 0 PID: 21708 Comm: syz-executor.5 Not tainted 5.18.0-rc2-next-20220412-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31 +Code: 1d 14 b6 b2 09 31 ff 89 de e8 6d e9 89 fd 84 db 75 e0 e8 84 e5 89 fd 48 c7 c7 40 aa 26 8a c6 05 f4 b5 b2 09 01 e8 e5 81 2e 05 <0f> 0b eb c4 e8 68 e5 89 fd 0f b6 1d e3 b5 b2 09 31 ff 89 de e8 38 +RSP: 0018:ffffc900051af1b0 EFLAGS: 00010286 +RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 +RDX: 0000000000040000 RSI: ffffffff8160a0c8 RDI: fffff52000a35e28 +RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000000 +R10: ffffffff81604a9e R11: 0000000000000000 R12: 1ffff92000a35e3b +R13: 00000000ffffffef R14: ffff8880211a0194 R15: ffff8880577d0a00 +FS: 00007f25d183e700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f19c859c028 CR3: 0000000051009000 CR4: 00000000003506f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + __refcount_dec include/linux/refcount.h:344 [inline] + refcount_dec include/linux/refcount.h:359 [inline] + ref_tracker_free+0x535/0x6b0 lib/ref_tracker.c:118 + netns_tracker_free include/net/net_namespace.h:327 [inline] + put_net_track include/net/net_namespace.h:341 [inline] + tcf_exts_put_net include/net/pkt_cls.h:255 [inline] + u32_destroy_key.isra.0+0xa7/0x2b0 net/sched/cls_u32.c:394 + u32_change+0xe01/0x3140 net/sched/cls_u32.c:909 + tc_new_tfilter+0x98d/0x2200 net/sched/cls_api.c:2148 + rtnetlink_rcv_msg+0x80d/0xb80 net/core/rtnetlink.c:6016 + netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2495 + netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] + netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345 + netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1921 + sock_sendmsg_nosec net/socket.c:705 [inline] + sock_sendmsg+0xcf/0x120 net/socket.c:725 + ____sys_sendmsg+0x6e2/0x800 net/socket.c:2413 + ___sys_sendmsg+0xf3/0x170 net/socket.c:2467 + __sys_sendmsg+0xe5/0x1b0 net/socket.c:2496 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae +RIP: 0033:0x7f25d0689049 +Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007f25d183e168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f25d079c030 RCX: 00007f25d0689049 +RDX: 0000000000000000 RSI: 0000000020000340 RDI: 0000000000000005 +RBP: 00007f25d06e308d R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +R13: 00007ffd0b752e3f R14: 00007f25d183e300 R15: 0000000000022000 + + +Fixes: 35c55fc156d8 ("cls_u32: use tcf_exts_get_net() before call_rcu()") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Cong Wang +Cc: Jiri Pirko +Acked-by: Jamal Hadi Salim +Signed-off-by: Jakub Kicinski +[rkolchmeyer: Backported to 4.19: adjusted u32_destroy_key() signature] +Signed-off-by: Robert Kolchmeyer +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_u32.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +--- a/net/sched/cls_u32.c ++++ b/net/sched/cls_u32.c +@@ -404,15 +404,20 @@ static int u32_init(struct tcf_proto *tp + return 0; + } + +-static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, +- bool free_pf) ++static void __u32_destroy_key(struct tc_u_knode *n) + { + struct tc_u_hnode *ht = rtnl_dereference(n->ht_down); + + tcf_exts_destroy(&n->exts); +- tcf_exts_put_net(&n->exts); + if (ht && --ht->refcnt == 0) + kfree(ht); ++ kfree(n); ++} ++ ++static void u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, ++ bool free_pf) ++{ ++ tcf_exts_put_net(&n->exts); + #ifdef CONFIG_CLS_U32_PERF + if (free_pf) + free_percpu(n->pf); +@@ -421,8 +426,7 @@ static int u32_destroy_key(struct tcf_pr + if (free_pf) + free_percpu(n->pcpu_success); + #endif +- kfree(n); +- return 0; ++ __u32_destroy_key(n); + } + + /* u32_delete_key_rcu should be called when free'ing a copied +@@ -965,13 +969,13 @@ static int u32_change(struct net *net, s + tca[TCA_RATE], ovr, extack); + + if (err) { +- u32_destroy_key(tp, new, false); ++ __u32_destroy_key(new); + return err; + } + + err = u32_replace_hw_knode(tp, new, flags, extack); + if (err) { +- u32_destroy_key(tp, new, false); ++ __u32_destroy_key(new); + return err; + } + diff --git a/queue-4.19/powerpc-64-interrupt-temporarily-save-ppr-on-stack-to-fix-register-corruption-due-to-slb-miss.patch b/queue-4.19/powerpc-64-interrupt-temporarily-save-ppr-on-stack-to-fix-register-corruption-due-to-slb-miss.patch new file mode 100644 index 00000000000..15be509c9cd --- /dev/null +++ b/queue-4.19/powerpc-64-interrupt-temporarily-save-ppr-on-stack-to-fix-register-corruption-due-to-slb-miss.patch @@ -0,0 +1,103 @@ +From foo@baz Fri Apr 29 10:56:14 AM CEST 2022 +From: Michael Ellerman +Date: Thu, 28 Apr 2022 22:41:49 +1000 +Subject: powerpc/64/interrupt: Temporarily save PPR on stack to fix register corruption due to SLB miss +To: , +Cc: , +Message-ID: <20220428124150.375623-2-mpe@ellerman.id.au> + +From: Nicholas Piggin + +This is a minimal stable kernel fix for the problem solved by +4c2de74cc869 ("powerpc/64: Interrupts save PPR on stack rather than +thread_struct"). + +Upstream kernels between 4.17-4.20 have this bug, so I propose this +patch for 4.19 stable. + +Longer description from mpe: + +In commit f384796c4 ("powerpc/mm: Add support for handling > 512TB +address in SLB miss") we added support for using multiple context ids +per process. Previously accessing past the first context id was a fatal +error for the process. With the new support it became non-fatal, and so +the previous "bad_addr_slb" handler was changed to be the +"large_addr_slb" handler. + +That handler uses the EXCEPTION_PROLOG_COMMON() macro, which in-turn +calls the SAVE_PPR() macro. At the point where SAVE_PPR() is used, the +r9-13 register values from the original user fault are saved in +paca->exslb. It's not until later in EXCEPTION_PROLOG_COMMON_2() that +they are saved from paca->exslb onto the kernel stack. + +The PPR is saved into current->thread.ppr, which is notably not on the +kernel stack the way pt_regs are. This means we can take an SLB miss on +current->thread.ppr. If that happens in the "large_addr_slb" case we +will clobber the saved user r9-r13 in paca->exslb with kernel values. +Later we will save those clobbered values into the pt_regs on the stack, +and when we return to userspace those kernel values will be restored. + +Typically this appears as some sort of segfault in userspace, with an +address that looks like a kernel address. In dmesg it can appear as: + + [19117.440331] some_program[1869625]: unhandled signal 11 at c00000000f6bda10 nip 00007fff780d559c lr 00007fff781ae56c code 30001 + +The upstream fix for this issue was to move PPR into pt_regs, on the +kernel stack, avoiding the possibility of an SLB fault when saving it. + +However changing the size of pt_regs is an intrusive change, and has +side effects in other parts of the kernel. A minimal fix is to +temporarily save the PPR in an unused part of pt_regs, then save the +user register values from paca->exslb into pt_regs, and then move the +saved PPR into thread.ppr. + +Fixes: f384796c40dc ("powerpc/mm: Add support for handling > 512TB address in SLB miss") +Signed-off-by: Nicholas Piggin +Signed-off-by: Michael Ellerman +Link: https://lore.kernel.org/r/20220316033235.903657-1-npiggin@gmail.com +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/include/asm/exception-64s.h | 22 ++++++++++++++++++---- + 1 file changed, 18 insertions(+), 4 deletions(-) + +--- a/arch/powerpc/include/asm/exception-64s.h ++++ b/arch/powerpc/include/asm/exception-64s.h +@@ -243,10 +243,22 @@ + * PPR save/restore macros used in exceptions_64s.S + * Used for P7 or later processors + */ +-#define SAVE_PPR(area, ra, rb) \ ++#define SAVE_PPR(area, ra) \ ++BEGIN_FTR_SECTION_NESTED(940) \ ++ ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \ ++ std ra,RESULT(r1); /* Store PPR in RESULT for now */ \ ++END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940) ++ ++/* ++ * This is called after we are finished accessing 'area', so we can now take ++ * SLB faults accessing the thread struct, which will use PACA_EXSLB area. ++ * This is required because the large_addr_slb handler uses EXSLB and it also ++ * uses the common exception macros including this PPR saving. ++ */ ++#define MOVE_PPR_TO_THREAD(ra, rb) \ + BEGIN_FTR_SECTION_NESTED(940) \ + ld ra,PACACURRENT(r13); \ +- ld rb,area+EX_PPR(r13); /* Read PPR from paca */ \ ++ ld rb,RESULT(r1); /* Read PPR from stack */ \ + std rb,TASKTHREADPPR(ra); \ + END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940) + +@@ -515,9 +527,11 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) + 3: EXCEPTION_PROLOG_COMMON_1(); \ + beq 4f; /* if from kernel mode */ \ + ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ +- SAVE_PPR(area, r9, r10); \ ++ SAVE_PPR(area, r9); \ + 4: EXCEPTION_PROLOG_COMMON_2(area) \ +- EXCEPTION_PROLOG_COMMON_3(n) \ ++ beq 5f; /* if from kernel mode */ \ ++ MOVE_PPR_TO_THREAD(r9, r10); \ ++5: EXCEPTION_PROLOG_COMMON_3(n) \ + ACCOUNT_STOLEN_TIME + + /* Save original regs values from save area to stack frame. */ diff --git a/queue-4.19/powerpc-64s-unmerge-ex_lr-and-ex_dar.patch b/queue-4.19/powerpc-64s-unmerge-ex_lr-and-ex_dar.patch new file mode 100644 index 00000000000..52601d4bf9b --- /dev/null +++ b/queue-4.19/powerpc-64s-unmerge-ex_lr-and-ex_dar.patch @@ -0,0 +1,120 @@ +From foo@baz Fri Apr 29 10:56:14 AM CEST 2022 +From: Michael Ellerman +Date: Thu, 28 Apr 2022 22:41:50 +1000 +Subject: powerpc/64s: Unmerge EX_LR and EX_DAR +To: , +Cc: , +Message-ID: <20220428124150.375623-3-mpe@ellerman.id.au> + +From: Michael Ellerman + +The SLB miss handler is not fully re-entrant, it is able to work because +we ensure that the SLB entries for the kernel text and data segment, as +well as the kernel stack are pinned in the SLB. Accesses to kernel data +outside of those areas has to be carefully managed and can only occur in +certain parts of the code. One way we deal with that is by storing some +values in temporary slots in the paca. + +In v4.13 in commit dbeea1d6b4bd ("powerpc/64s/paca: EX_LR can be merged +with EX_DAR") we merged the storage for two temporary slots for register +storage during SLB miss handling. That was safe at the time because the +two slots were never used at the same time. + +Unfortunately in v4.17 in commit c2b4d8b7417a ("powerpc/mm/hash64: +Increase the VA range") we broke that condition, and introduced a case +where the two slots could be in use at the same time, leading to one +being corrupted. + +Specifically in slb_miss_common() when we detect that we're handling a +fault for a large virtual address (> 512TB) we go to the "8" label, +there we store the original fault address into paca->exslb[EX_DAR], +before jumping to large_addr_slb() (using rfid). + +We then use the EXCEPTION_PROLOG_COMMON and RECONCILE_IRQ_STATE macros +to do exception setup, before reloading the fault address from +paca->exslb[EX_DAR] and storing it into pt_regs->dar (Data Address +Register). + +However the code generated by those macros can cause a recursive SLB +miss on a kernel address in three places. + +Firstly is the saving of the PPR (Program Priority Register), which +happens on all CPUs since Power7, the PPR is saved to the thread struct +which can be anywhere in memory. There is also the call to +accumulate_stolen_time() if CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y and +CONFIG_PPC_SPLPAR=y, and also the call to trace_hardirqs_off() if +CONFIG_TRACE_IRQFLAGS=y. The latter two call into generic C code and can +lead to accesses anywhere in memory. + +On modern 64-bit CPUs we have 1TB segments, so for any of those accesses +to cause an SLB fault they must access memory more than 1TB away from +the kernel text, data and kernel stack. That typically only happens on +machines with more than 1TB of RAM. However it is possible on multi-node +Power9 systems, because memory on the 2nd node begins at 32TB in the +linear mapping. + +If we take a recursive SLB fault then we will corrupt the original fault +address with the LR (Link Register) value, because the EX_DAR and EX_LR +slots share storage. Subsequently we will think we're trying to fault +that LR address, which is the wrong address, and will also mostly likely +lead to a segfault because the LR address will be < 512TB and so will be +rejected by slb_miss_large_addr(). + +This appears as a spurious segfault to userspace, and if +show_unhandled_signals is enabled you will see a fault reported in dmesg +with the LR address, not the expected fault address, eg: + + prog[123]: segfault (11) at 128a61808 nip 128a618cc lr 128a61808 code 3 in prog[128a60000+10000] + prog[123]: code: 4bffffa4 39200040 3ce00004 7d2903a6 3c000200 78e707c6 780083e4 7d3b4b78 + prog[123]: code: 7d455378 7d7d5b78 7d9f6378 7da46b78 7d3a4b78 7d465378 7d7c5b78 + +Notice that the fault address == the LR, and the faulting instruction is +a simple store that should never use LR. + +In upstream this was fixed in v4.20 in commit +48e7b7695745 ("powerpc/64s/hash: Convert SLB miss handlers to C"), +however that is a huge rewrite and not backportable. + +The minimal fix for stable is to just unmerge the EX_LR and EX_DAR slots +again, avoiding the corruption of the DAR value. This uses an extra 8 +bytes per CPU, which is negligble. + +Signed-off-by: Michael Ellerman +Signed-off-by: Greg Kroah-Hartman +--- + arch/powerpc/include/asm/exception-64s.h | 15 ++++----------- + 1 file changed, 4 insertions(+), 11 deletions(-) + +--- a/arch/powerpc/include/asm/exception-64s.h ++++ b/arch/powerpc/include/asm/exception-64s.h +@@ -48,11 +48,12 @@ + #define EX_CCR 52 + #define EX_CFAR 56 + #define EX_PPR 64 ++#define EX_LR 72 + #if defined(CONFIG_RELOCATABLE) +-#define EX_CTR 72 +-#define EX_SIZE 10 /* size in u64 units */ ++#define EX_CTR 80 ++#define EX_SIZE 11 /* size in u64 units */ + #else +-#define EX_SIZE 9 /* size in u64 units */ ++#define EX_SIZE 10 /* size in u64 units */ + #endif + + /* +@@ -61,14 +62,6 @@ + #define MAX_MCE_DEPTH 4 + + /* +- * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR +- * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole +- * in the save area so it's not necessary to overlap them. Could be used +- * for future savings though if another 4 byte register was to be saved. +- */ +-#define EX_LR EX_DAR +- +-/* + * EX_R3 is only used by the bad_stack handler. bad_stack reloads and + * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap + * with EX_DAR. diff --git a/queue-4.19/series b/queue-4.19/series index 3ba4da31d2a..9c106c94d41 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -2,3 +2,6 @@ media-vicodec-upon-release-call-m2m-release-before-freeing-ctrl-handler.patch floppy-disable-fdrawcmd-by-default.patch hamradio-defer-6pack-kfree-after-unregister_netdev.patch hamradio-remove-needs_free_netdev-to-avoid-uaf.patch +net-sched-cls_u32-fix-netns-refcount-changes-in-u32_change.patch +powerpc-64-interrupt-temporarily-save-ppr-on-stack-to-fix-register-corruption-due-to-slb-miss.patch +powerpc-64s-unmerge-ex_lr-and-ex_dar.patch