From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 29 Apr 2022 08:57:15 +0000 (+0200)
Subject: 4.19-stable patches
X-Git-Tag: v4.19.241~14
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e080d2b5dc355e1c36681d0f8215da5dd0b9c4a8;p=thirdparty%2Fkernel%2Fstable-queue.git

4.19-stable patches

added patches:
	net-sched-cls_u32-fix-netns-refcount-changes-in-u32_change.patch
	powerpc-64-interrupt-temporarily-save-ppr-on-stack-to-fix-register-corruption-due-to-slb-miss.patch
	powerpc-64s-unmerge-ex_lr-and-ex_dar.patch
---

diff --git a/queue-4.19/net-sched-cls_u32-fix-netns-refcount-changes-in-u32_change.patch b/queue-4.19/net-sched-cls_u32-fix-netns-refcount-changes-in-u32_change.patch
new file mode 100644
index 00000000000..b76e3748990
--- /dev/null
+++ b/queue-4.19/net-sched-cls_u32-fix-netns-refcount-changes-in-u32_change.patch
@@ -0,0 +1,142 @@
+From 3db09e762dc79584a69c10d74a6b98f89a9979f8 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 13 Apr 2022 10:35:41 -0700
+Subject: net/sched: cls_u32: fix netns refcount changes in u32_change()
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 3db09e762dc79584a69c10d74a6b98f89a9979f8 upstream.
+
+We are now able to detect extra put_net() at the moment
+they happen, instead of much later in correct code paths.
+
+u32_init_knode() / tcf_exts_init() populates the ->exts.net
+pointer, but as mentioned in tcf_exts_init(),
+the refcount on netns has not been elevated yet.
+
+The refcount is taken only once tcf_exts_get_net()
+is called.
+
+So the two u32_destroy_key() calls from u32_change()
+are attempting to release an invalid reference on the netns.
+
+syzbot report:
+
+refcount_t: decrement hit 0; leaking memory.
+WARNING: CPU: 0 PID: 21708 at lib/refcount.c:31 refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31
+Modules linked in:
+CPU: 0 PID: 21708 Comm: syz-executor.5 Not tainted 5.18.0-rc2-next-20220412-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+RIP: 0010:refcount_warn_saturate+0xbf/0x1e0 lib/refcount.c:31
+Code: 1d 14 b6 b2 09 31 ff 89 de e8 6d e9 89 fd 84 db 75 e0 e8 84 e5 89 fd 48 c7 c7 40 aa 26 8a c6 05 f4 b5 b2 09 01 e8 e5 81 2e 05 <0f> 0b eb c4 e8 68 e5 89 fd 0f b6 1d e3 b5 b2 09 31 ff 89 de e8 38
+RSP: 0018:ffffc900051af1b0 EFLAGS: 00010286
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
+RDX: 0000000000040000 RSI: ffffffff8160a0c8 RDI: fffff52000a35e28
+RBP: 0000000000000004 R08: 0000000000000000 R09: 0000000000000000
+R10: ffffffff81604a9e R11: 0000000000000000 R12: 1ffff92000a35e3b
+R13: 00000000ffffffef R14: ffff8880211a0194 R15: ffff8880577d0a00
+FS:  00007f25d183e700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f19c859c028 CR3: 0000000051009000 CR4: 00000000003506f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ __refcount_dec include/linux/refcount.h:344 [inline]
+ refcount_dec include/linux/refcount.h:359 [inline]
+ ref_tracker_free+0x535/0x6b0 lib/ref_tracker.c:118
+ netns_tracker_free include/net/net_namespace.h:327 [inline]
+ put_net_track include/net/net_namespace.h:341 [inline]
+ tcf_exts_put_net include/net/pkt_cls.h:255 [inline]
+ u32_destroy_key.isra.0+0xa7/0x2b0 net/sched/cls_u32.c:394
+ u32_change+0xe01/0x3140 net/sched/cls_u32.c:909
+ tc_new_tfilter+0x98d/0x2200 net/sched/cls_api.c:2148
+ rtnetlink_rcv_msg+0x80d/0xb80 net/core/rtnetlink.c:6016
+ netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2495
+ netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
+ netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345
+ netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1921
+ sock_sendmsg_nosec net/socket.c:705 [inline]
+ sock_sendmsg+0xcf/0x120 net/socket.c:725
+ ____sys_sendmsg+0x6e2/0x800 net/socket.c:2413
+ ___sys_sendmsg+0xf3/0x170 net/socket.c:2467
+ __sys_sendmsg+0xe5/0x1b0 net/socket.c:2496
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+RIP: 0033:0x7f25d0689049
+Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007f25d183e168 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 00007f25d079c030 RCX: 00007f25d0689049
+RDX: 0000000000000000 RSI: 0000000020000340 RDI: 0000000000000005
+RBP: 00007f25d06e308d R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007ffd0b752e3f R14: 00007f25d183e300 R15: 0000000000022000
+ </TASK>
+
+Fixes: 35c55fc156d8 ("cls_u32: use tcf_exts_get_net() before call_rcu()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Cc: Cong Wang <xiyou.wangcong@gmail.com>
+Cc: Jiri Pirko <jiri@resnulli.us>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[rkolchmeyer: Backported to 4.19: adjusted u32_destroy_key() signature]
+Signed-off-by: Robert Kolchmeyer <rkolchmeyer@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_u32.c |   18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -404,15 +404,20 @@ static int u32_init(struct tcf_proto *tp
+ 	return 0;
+ }
+ 
+-static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
+-			   bool free_pf)
++static void __u32_destroy_key(struct tc_u_knode *n)
+ {
+ 	struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
+ 
+ 	tcf_exts_destroy(&n->exts);
+-	tcf_exts_put_net(&n->exts);
+ 	if (ht && --ht->refcnt == 0)
+ 		kfree(ht);
++	kfree(n);
++}
++
++static void u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n,
++			    bool free_pf)
++{
++	tcf_exts_put_net(&n->exts);
+ #ifdef CONFIG_CLS_U32_PERF
+ 	if (free_pf)
+ 		free_percpu(n->pf);
+@@ -421,8 +426,7 @@ static int u32_destroy_key(struct tcf_pr
+ 	if (free_pf)
+ 		free_percpu(n->pcpu_success);
+ #endif
+-	kfree(n);
+-	return 0;
++	__u32_destroy_key(n);
+ }
+ 
+ /* u32_delete_key_rcu should be called when free'ing a copied
+@@ -965,13 +969,13 @@ static int u32_change(struct net *net, s
+ 				    tca[TCA_RATE], ovr, extack);
+ 
+ 		if (err) {
+-			u32_destroy_key(tp, new, false);
++			__u32_destroy_key(new);
+ 			return err;
+ 		}
+ 
+ 		err = u32_replace_hw_knode(tp, new, flags, extack);
+ 		if (err) {
+-			u32_destroy_key(tp, new, false);
++			__u32_destroy_key(new);
+ 			return err;
+ 		}
+ 
diff --git a/queue-4.19/powerpc-64-interrupt-temporarily-save-ppr-on-stack-to-fix-register-corruption-due-to-slb-miss.patch b/queue-4.19/powerpc-64-interrupt-temporarily-save-ppr-on-stack-to-fix-register-corruption-due-to-slb-miss.patch
new file mode 100644
index 00000000000..15be509c9cd
--- /dev/null
+++ b/queue-4.19/powerpc-64-interrupt-temporarily-save-ppr-on-stack-to-fix-register-corruption-due-to-slb-miss.patch
@@ -0,0 +1,103 @@
+From foo@baz Fri Apr 29 10:56:14 AM CEST 2022
+From: Michael Ellerman <mpe@ellerman.id.au>
+Date: Thu, 28 Apr 2022 22:41:49 +1000
+Subject: powerpc/64/interrupt: Temporarily save PPR on stack to fix register corruption due to SLB miss
+To: <stable@vger.kernel.org>, <gregkh@linuxfoundation.org>
+Cc: <linuxppc-dev@lists.ozlabs.org>, <npiggin@gmail.com>
+Message-ID: <20220428124150.375623-2-mpe@ellerman.id.au>
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+This is a minimal stable kernel fix for the problem solved by
+4c2de74cc869 ("powerpc/64: Interrupts save PPR on stack rather than
+thread_struct").
+
+Upstream kernels between 4.17-4.20 have this bug, so I propose this
+patch for 4.19 stable.
+
+Longer description from mpe:
+
+In commit f384796c4 ("powerpc/mm: Add support for handling > 512TB
+address in SLB miss") we added support for using multiple context ids
+per process. Previously accessing past the first context id was a fatal
+error for the process. With the new support it became non-fatal, and so
+the previous "bad_addr_slb" handler was changed to be the
+"large_addr_slb" handler.
+
+That handler uses the EXCEPTION_PROLOG_COMMON() macro, which in-turn
+calls the SAVE_PPR() macro. At the point where SAVE_PPR() is used, the
+r9-13 register values from the original user fault are saved in
+paca->exslb. It's not until later in EXCEPTION_PROLOG_COMMON_2() that
+they are saved from paca->exslb onto the kernel stack.
+
+The PPR is saved into current->thread.ppr, which is notably not on the
+kernel stack the way pt_regs are. This means we can take an SLB miss on
+current->thread.ppr. If that happens in the "large_addr_slb" case we
+will clobber the saved user r9-r13 in paca->exslb with kernel values.
+Later we will save those clobbered values into the pt_regs on the stack,
+and when we return to userspace those kernel values will be restored.
+
+Typically this appears as some sort of segfault in userspace, with an
+address that looks like a kernel address. In dmesg it can appear as:
+
+  [19117.440331] some_program[1869625]: unhandled signal 11 at c00000000f6bda10 nip 00007fff780d559c lr 00007fff781ae56c code 30001
+
+The upstream fix for this issue was to move PPR into pt_regs, on the
+kernel stack, avoiding the possibility of an SLB fault when saving it.
+
+However changing the size of pt_regs is an intrusive change, and has
+side effects in other parts of the kernel. A minimal fix is to
+temporarily save the PPR in an unused part of pt_regs, then save the
+user register values from paca->exslb into pt_regs, and then move the
+saved PPR into thread.ppr.
+
+Fixes: f384796c40dc ("powerpc/mm: Add support for handling > 512TB address in SLB miss")
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220316033235.903657-1-npiggin@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/include/asm/exception-64s.h |   22 ++++++++++++++++++----
+ 1 file changed, 18 insertions(+), 4 deletions(-)
+
+--- a/arch/powerpc/include/asm/exception-64s.h
++++ b/arch/powerpc/include/asm/exception-64s.h
+@@ -243,10 +243,22 @@
+  * PPR save/restore macros used in exceptions_64s.S  
+  * Used for P7 or later processors
+  */
+-#define SAVE_PPR(area, ra, rb)						\
++#define SAVE_PPR(area, ra)						\
++BEGIN_FTR_SECTION_NESTED(940)						\
++	ld	ra,area+EX_PPR(r13);	/* Read PPR from paca */	\
++	std	ra,RESULT(r1);		/* Store PPR in RESULT for now */ \
++END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
++
++/*
++ * This is called after we are finished accessing 'area', so we can now take
++ * SLB faults accessing the thread struct, which will use PACA_EXSLB area.
++ * This is required because the large_addr_slb handler uses EXSLB and it also
++ * uses the common exception macros including this PPR saving.
++ */
++#define MOVE_PPR_TO_THREAD(ra, rb)					\
+ BEGIN_FTR_SECTION_NESTED(940)						\
+ 	ld	ra,PACACURRENT(r13);					\
+-	ld	rb,area+EX_PPR(r13);	/* Read PPR from paca */	\
++	ld	rb,RESULT(r1);		/* Read PPR from stack */	\
+ 	std	rb,TASKTHREADPPR(ra);					\
+ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
+ 
+@@ -515,9 +527,11 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
+ 3:	EXCEPTION_PROLOG_COMMON_1();					   \
+ 	beq	4f;			/* if from kernel mode		*/ \
+ 	ACCOUNT_CPU_USER_ENTRY(r13, r9, r10);				   \
+-	SAVE_PPR(area, r9, r10);					   \
++	SAVE_PPR(area, r9);						   \
+ 4:	EXCEPTION_PROLOG_COMMON_2(area)					   \
+-	EXCEPTION_PROLOG_COMMON_3(n)					   \
++	beq	5f;			/* if from kernel mode		*/ \
++	MOVE_PPR_TO_THREAD(r9, r10);					   \
++5:	EXCEPTION_PROLOG_COMMON_3(n)					   \
+ 	ACCOUNT_STOLEN_TIME
+ 
+ /* Save original regs values from save area to stack frame. */
diff --git a/queue-4.19/powerpc-64s-unmerge-ex_lr-and-ex_dar.patch b/queue-4.19/powerpc-64s-unmerge-ex_lr-and-ex_dar.patch
new file mode 100644
index 00000000000..52601d4bf9b
--- /dev/null
+++ b/queue-4.19/powerpc-64s-unmerge-ex_lr-and-ex_dar.patch
@@ -0,0 +1,120 @@
+From foo@baz Fri Apr 29 10:56:14 AM CEST 2022
+From: Michael Ellerman <mpe@ellerman.id.au>
+Date: Thu, 28 Apr 2022 22:41:50 +1000
+Subject: powerpc/64s: Unmerge EX_LR and EX_DAR
+To: <stable@vger.kernel.org>, <gregkh@linuxfoundation.org>
+Cc: <linuxppc-dev@lists.ozlabs.org>, <npiggin@gmail.com>
+Message-ID: <20220428124150.375623-3-mpe@ellerman.id.au>
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+The SLB miss handler is not fully re-entrant, it is able to work because
+we ensure that the SLB entries for the kernel text and data segment, as
+well as the kernel stack are pinned in the SLB. Accesses to kernel data
+outside of those areas has to be carefully managed and can only occur in
+certain parts of the code. One way we deal with that is by storing some
+values in temporary slots in the paca.
+
+In v4.13 in commit dbeea1d6b4bd ("powerpc/64s/paca: EX_LR can be merged
+with EX_DAR") we merged the storage for two temporary slots for register
+storage during SLB miss handling. That was safe at the time because the
+two slots were never used at the same time.
+
+Unfortunately in v4.17 in commit c2b4d8b7417a ("powerpc/mm/hash64:
+Increase the VA range") we broke that condition, and introduced a case
+where the two slots could be in use at the same time, leading to one
+being corrupted.
+
+Specifically in slb_miss_common() when we detect that we're handling a
+fault for a large virtual address (> 512TB) we go to the "8" label,
+there we store the original fault address into paca->exslb[EX_DAR],
+before jumping to large_addr_slb() (using rfid).
+
+We then use the EXCEPTION_PROLOG_COMMON and RECONCILE_IRQ_STATE macros
+to do exception setup, before reloading the fault address from
+paca->exslb[EX_DAR] and storing it into pt_regs->dar (Data Address
+Register).
+
+However the code generated by those macros can cause a recursive SLB
+miss on a kernel address in three places.
+
+Firstly is the saving of the PPR (Program Priority Register), which
+happens on all CPUs since Power7, the PPR is saved to the thread struct
+which can be anywhere in memory. There is also the call to
+accumulate_stolen_time() if CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y and
+CONFIG_PPC_SPLPAR=y, and also the call to trace_hardirqs_off() if
+CONFIG_TRACE_IRQFLAGS=y. The latter two call into generic C code and can
+lead to accesses anywhere in memory.
+
+On modern 64-bit CPUs we have 1TB segments, so for any of those accesses
+to cause an SLB fault they must access memory more than 1TB away from
+the kernel text, data and kernel stack. That typically only happens on
+machines with more than 1TB of RAM. However it is possible on multi-node
+Power9 systems, because memory on the 2nd node begins at 32TB in the
+linear mapping.
+
+If we take a recursive SLB fault then we will corrupt the original fault
+address with the LR (Link Register) value, because the EX_DAR and EX_LR
+slots share storage. Subsequently we will think we're trying to fault
+that LR address, which is the wrong address, and will also mostly likely
+lead to a segfault because the LR address will be < 512TB and so will be
+rejected by slb_miss_large_addr().
+
+This appears as a spurious segfault to userspace, and if
+show_unhandled_signals is enabled you will see a fault reported in dmesg
+with the LR address, not the expected fault address, eg:
+
+  prog[123]: segfault (11) at 128a61808 nip 128a618cc lr 128a61808 code 3 in prog[128a60000+10000]
+  prog[123]: code: 4bffffa4 39200040 3ce00004 7d2903a6 3c000200 78e707c6 780083e4 7d3b4b78
+  prog[123]: code: 7d455378 7d7d5b78 7d9f6378 7da46b78 <f8670000> 7d3a4b78 7d465378 7d7c5b78
+
+Notice that the fault address == the LR, and the faulting instruction is
+a simple store that should never use LR.
+
+In upstream this was fixed in v4.20 in commit
+48e7b7695745 ("powerpc/64s/hash: Convert SLB miss handlers to C"),
+however that is a huge rewrite and not backportable.
+
+The minimal fix for stable is to just unmerge the EX_LR and EX_DAR slots
+again, avoiding the corruption of the DAR value. This uses an extra 8
+bytes per CPU, which is negligble.
+
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/include/asm/exception-64s.h |   15 ++++-----------
+ 1 file changed, 4 insertions(+), 11 deletions(-)
+
+--- a/arch/powerpc/include/asm/exception-64s.h
++++ b/arch/powerpc/include/asm/exception-64s.h
+@@ -48,11 +48,12 @@
+ #define EX_CCR		52
+ #define EX_CFAR		56
+ #define EX_PPR		64
++#define EX_LR		72
+ #if defined(CONFIG_RELOCATABLE)
+-#define EX_CTR		72
+-#define EX_SIZE		10	/* size in u64 units */
++#define EX_CTR		80
++#define EX_SIZE		11	/* size in u64 units */
+ #else
+-#define EX_SIZE		9	/* size in u64 units */
++#define EX_SIZE		10	/* size in u64 units */
+ #endif
+ 
+ /*
+@@ -61,14 +62,6 @@
+ #define MAX_MCE_DEPTH	4
+ 
+ /*
+- * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
+- * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole
+- * in the save area so it's not necessary to overlap them. Could be used
+- * for future savings though if another 4 byte register was to be saved.
+- */
+-#define EX_LR		EX_DAR
+-
+-/*
+  * EX_R3 is only used by the bad_stack handler. bad_stack reloads and
+  * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap
+  * with EX_DAR.
diff --git a/queue-4.19/series b/queue-4.19/series
index 3ba4da31d2a..9c106c94d41 100644
--- a/queue-4.19/series
+++ b/queue-4.19/series
@@ -2,3 +2,6 @@ media-vicodec-upon-release-call-m2m-release-before-freeing-ctrl-handler.patch
 floppy-disable-fdrawcmd-by-default.patch
 hamradio-defer-6pack-kfree-after-unregister_netdev.patch
 hamradio-remove-needs_free_netdev-to-avoid-uaf.patch
+net-sched-cls_u32-fix-netns-refcount-changes-in-u32_change.patch
+powerpc-64-interrupt-temporarily-save-ppr-on-stack-to-fix-register-corruption-due-to-slb-miss.patch
+powerpc-64s-unmerge-ex_lr-and-ex_dar.patch