--- /dev/null
+From 169c8ffd95e10e13013b1e2e8f51fbb0e387b99d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Dec 2021 10:08:23 +0100
+Subject: ARM: 9170/1: fix panic when kasan and kprobe are enabled
+
+From: sparkhuang <huangshaobo6@huawei.com>
+
+[ Upstream commit 8b59b0a53c840921b625378f137e88adfa87647e ]
+
+arm32 uses software to simulate the instruction replaced
+by kprobe. some instructions may be simulated by constructing
+assembly functions. therefore, before executing instruction
+simulation, it is necessary to construct assembly function
+execution environment in C language through binding registers.
+after kasan is enabled, the register binding relationship will
+be destroyed, resulting in instruction simulation errors and
+causing kernel panic.
+
+the kprobe emulate instruction function is distributed in three
+files: actions-common.c actions-arm.c actions-thumb.c, so disable
+KASAN when compiling these files.
+
+for example, use kprobe insert on cap_capable+20 after kasan
+enabled, the cap_capable assembly code is as follows:
+<cap_capable>:
+e92d47f0 push {r4, r5, r6, r7, r8, r9, sl, lr}
+e1a05000 mov r5, r0
+e280006c add r0, r0, #108 ; 0x6c
+e1a04001 mov r4, r1
+e1a06002 mov r6, r2
+e59fa090 ldr sl, [pc, #144] ;
+ebfc7bf8 bl c03aa4b4 <__asan_load4>
+e595706c ldr r7, [r5, #108] ; 0x6c
+e2859014 add r9, r5, #20
+......
+The emulate_ldr assembly code after enabling kasan is as follows:
+c06f1384 <emulate_ldr>:
+e92d47f0 push {r4, r5, r6, r7, r8, r9, sl, lr}
+e282803c add r8, r2, #60 ; 0x3c
+e1a05000 mov r5, r0
+e7e37855 ubfx r7, r5, #16, #4
+e1a00008 mov r0, r8
+e1a09001 mov r9, r1
+e1a04002 mov r4, r2
+ebf35462 bl c03c6530 <__asan_load4>
+e357000f cmp r7, #15
+e7e36655 ubfx r6, r5, #12, #4
+e205a00f and sl, r5, #15
+0a000001 beq c06f13bc <emulate_ldr+0x38>
+e0840107 add r0, r4, r7, lsl #2
+ebf3545c bl c03c6530 <__asan_load4>
+e084010a add r0, r4, sl, lsl #2
+ebf3545a bl c03c6530 <__asan_load4>
+e2890010 add r0, r9, #16
+ebf35458 bl c03c6530 <__asan_load4>
+e5990010 ldr r0, [r9, #16]
+e12fff30 blx r0
+e356000f cm r6, #15
+1a000014 bne c06f1430 <emulate_ldr+0xac>
+e1a06000 mov r6, r0
+e2840040 add r0, r4, #64 ; 0x40
+......
+
+when running in emulate_ldr to simulate the ldr instruction, panic
+occurred, and the log is as follows:
+Unable to handle kernel NULL pointer dereference at virtual address
+00000090
+pgd = ecb46400
+[00000090] *pgd=2e0fa003, *pmd=00000000
+Internal error: Oops: 206 [#1] SMP ARM
+PC is at cap_capable+0x14/0xb0
+LR is at emulate_ldr+0x50/0xc0
+psr: 600d0293 sp : ecd63af8 ip : 00000004 fp : c0a7c30c
+r10: 00000000 r9 : c30897f4 r8 : ecd63cd4
+r7 : 0000000f r6 : 0000000a r5 : e59fa090 r4 : ecd63c98
+r3 : c06ae294 r2 : 00000000 r1 : b7611300 r0 : bf4ec008
+Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user
+Control: 32c5387d Table: 2d546400 DAC: 55555555
+Process bash (pid: 1643, stack limit = 0xecd60190)
+(cap_capable) from (kprobe_handler+0x218/0x340)
+(kprobe_handler) from (kprobe_trap_handler+0x24/0x48)
+(kprobe_trap_handler) from (do_undefinstr+0x13c/0x364)
+(do_undefinstr) from (__und_svc_finish+0x0/0x30)
+(__und_svc_finish) from (cap_capable+0x18/0xb0)
+(cap_capable) from (cap_vm_enough_memory+0x38/0x48)
+(cap_vm_enough_memory) from
+(security_vm_enough_memory_mm+0x48/0x6c)
+(security_vm_enough_memory_mm) from
+(copy_process.constprop.5+0x16b4/0x25c8)
+(copy_process.constprop.5) from (_do_fork+0xe8/0x55c)
+(_do_fork) from (SyS_clone+0x1c/0x24)
+(SyS_clone) from (__sys_trace_return+0x0/0x10)
+Code: 0050a0e1 6c0080e2 0140a0e1 0260a0e1 (f801f0e7)
+
+Fixes: 35aa1df43283 ("ARM kprobes: instruction single-stepping support")
+Fixes: 421015713b30 ("ARM: 9017/2: Enable KASan for ARM")
+Signed-off-by: huangshaobo <huangshaobo6@huawei.com>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/probes/kprobes/Makefile | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/arch/arm/probes/kprobes/Makefile b/arch/arm/probes/kprobes/Makefile
+index 14db56f49f0a3..6159010dac4a6 100644
+--- a/arch/arm/probes/kprobes/Makefile
++++ b/arch/arm/probes/kprobes/Makefile
+@@ -1,4 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
++KASAN_SANITIZE_actions-common.o := n
++KASAN_SANITIZE_actions-arm.o := n
++KASAN_SANITIZE_actions-thumb.o := n
+ obj-$(CONFIG_KPROBES) += core.o actions-common.o checkers-common.o
+ obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o
+ test-kprobes-objs := test-core.o
+--
+2.34.1
+
--- /dev/null
+From ef3193384de640c25668a8dde90b6a4397e864b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jan 2022 18:14:27 +0200
+Subject: efi/libstub: arm64: Fix image check alignment at entry
+
+From: Mihai Carabas <mihai.carabas@oracle.com>
+
+[ Upstream commit e9b7c3a4263bdcfd31bc3d03d48ce0ded7a94635 ]
+
+The kernel is aligned at SEGMENT_SIZE and this is the size populated in the PE
+headers:
+
+arch/arm64/kernel/efi-header.S: .long SEGMENT_ALIGN // SectionAlignment
+
+EFI_KIMG_ALIGN is defined as: (SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN :
+THREAD_ALIGN)
+
+So it depends on THREAD_ALIGN. On newer builds this message started to appear
+even though the loader is taking into account the PE header (which is stating
+SEGMENT_ALIGN).
+
+Fixes: c32ac11da3f8 ("efi/libstub: arm64: Double check image alignment at entry")
+Signed-off-by: Mihai Carabas <mihai.carabas@oracle.com>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/efi/libstub/arm64-stub.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
+index 2363fee9211c9..9cc556013d085 100644
+--- a/drivers/firmware/efi/libstub/arm64-stub.c
++++ b/drivers/firmware/efi/libstub/arm64-stub.c
+@@ -119,9 +119,9 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
+ if (image->image_base != _text)
+ efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n");
+
+- if (!IS_ALIGNED((u64)_text, EFI_KIMG_ALIGN))
+- efi_err("FIRMWARE BUG: kernel image not aligned on %ldk boundary\n",
+- EFI_KIMG_ALIGN >> 10);
++ if (!IS_ALIGNED((u64)_text, SEGMENT_ALIGN))
++ efi_err("FIRMWARE BUG: kernel image not aligned on %dk boundary\n",
++ SEGMENT_ALIGN >> 10);
+
+ kernel_size = _edata - _text;
+ kernel_memsize = kernel_size + (_end - _edata);
+--
+2.34.1
+
--- /dev/null
+From 2adb2fe8cd76a1617463803e1c7ed2bc1f2f0768 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jan 2022 09:41:12 -0800
+Subject: ipv6: annotate accesses to fn->fn_sernum
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit aafc2e3285c2d7a79b7ee15221c19fbeca7b1509 ]
+
+struct fib6_node's fn_sernum field can be
+read while other threads change it.
+
+Add READ_ONCE()/WRITE_ONCE() annotations.
+
+Do not change existing smp barriers in fib6_get_cookie_safe()
+and __fib6_update_sernum_upto_root()
+
+syzbot reported:
+
+BUG: KCSAN: data-race in fib6_clean_node / inet6_csk_route_socket
+
+write to 0xffff88813df62e2c of 4 bytes by task 1920 on cpu 1:
+ fib6_clean_node+0xc2/0x260 net/ipv6/ip6_fib.c:2178
+ fib6_walk_continue+0x38e/0x430 net/ipv6/ip6_fib.c:2112
+ fib6_walk net/ipv6/ip6_fib.c:2160 [inline]
+ fib6_clean_tree net/ipv6/ip6_fib.c:2240 [inline]
+ __fib6_clean_all+0x1a9/0x2e0 net/ipv6/ip6_fib.c:2256
+ fib6_flush_trees+0x6c/0x80 net/ipv6/ip6_fib.c:2281
+ rt_genid_bump_ipv6 include/net/net_namespace.h:488 [inline]
+ addrconf_dad_completed+0x57f/0x870 net/ipv6/addrconf.c:4230
+ addrconf_dad_work+0x908/0x1170
+ process_one_work+0x3f6/0x960 kernel/workqueue.c:2307
+ worker_thread+0x616/0xa70 kernel/workqueue.c:2454
+ kthread+0x1bf/0x1e0 kernel/kthread.c:359
+ ret_from_fork+0x1f/0x30
+
+read to 0xffff88813df62e2c of 4 bytes by task 15701 on cpu 0:
+ fib6_get_cookie_safe include/net/ip6_fib.h:285 [inline]
+ rt6_get_cookie include/net/ip6_fib.h:306 [inline]
+ ip6_dst_store include/net/ip6_route.h:234 [inline]
+ inet6_csk_route_socket+0x352/0x3c0 net/ipv6/inet6_connection_sock.c:109
+ inet6_csk_xmit+0x91/0x1e0 net/ipv6/inet6_connection_sock.c:121
+ __tcp_transmit_skb+0x1323/0x1840 net/ipv4/tcp_output.c:1402
+ tcp_transmit_skb net/ipv4/tcp_output.c:1420 [inline]
+ tcp_write_xmit+0x1450/0x4460 net/ipv4/tcp_output.c:2680
+ __tcp_push_pending_frames+0x68/0x1c0 net/ipv4/tcp_output.c:2864
+ tcp_push+0x2d9/0x2f0 net/ipv4/tcp.c:725
+ mptcp_push_release net/mptcp/protocol.c:1491 [inline]
+ __mptcp_push_pending+0x46c/0x490 net/mptcp/protocol.c:1578
+ mptcp_sendmsg+0x9ec/0xa50 net/mptcp/protocol.c:1764
+ inet6_sendmsg+0x5f/0x80 net/ipv6/af_inet6.c:643
+ sock_sendmsg_nosec net/socket.c:705 [inline]
+ sock_sendmsg net/socket.c:725 [inline]
+ kernel_sendmsg+0x97/0xd0 net/socket.c:745
+ sock_no_sendpage+0x84/0xb0 net/core/sock.c:3086
+ inet_sendpage+0x9d/0xc0 net/ipv4/af_inet.c:834
+ kernel_sendpage+0x187/0x200 net/socket.c:3492
+ sock_sendpage+0x5a/0x70 net/socket.c:1007
+ pipe_to_sendpage+0x128/0x160 fs/splice.c:364
+ splice_from_pipe_feed fs/splice.c:418 [inline]
+ __splice_from_pipe+0x207/0x500 fs/splice.c:562
+ splice_from_pipe fs/splice.c:597 [inline]
+ generic_splice_sendpage+0x94/0xd0 fs/splice.c:746
+ do_splice_from fs/splice.c:767 [inline]
+ direct_splice_actor+0x80/0xa0 fs/splice.c:936
+ splice_direct_to_actor+0x345/0x650 fs/splice.c:891
+ do_splice_direct+0x106/0x190 fs/splice.c:979
+ do_sendfile+0x675/0xc40 fs/read_write.c:1245
+ __do_sys_sendfile64 fs/read_write.c:1310 [inline]
+ __se_sys_sendfile64 fs/read_write.c:1296 [inline]
+ __x64_sys_sendfile64+0x102/0x140 fs/read_write.c:1296
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+value changed: 0x0000026f -> 0x00000271
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 15701 Comm: syz-executor.2 Not tainted 5.16.0-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+The Fixes tag I chose is probably arbitrary, I do not think
+we need to backport this patch to older kernels.
+
+Fixes: c5cff8561d2d ("ipv6: add rcu grace period before freeing fib6_node")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Link: https://lore.kernel.org/r/20220120174112.1126644-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip6_fib.h | 2 +-
+ net/ipv6/ip6_fib.c | 23 +++++++++++++----------
+ net/ipv6/route.c | 2 +-
+ 3 files changed, 15 insertions(+), 12 deletions(-)
+
+diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
+index 83b8070d1cc93..c85b040728d7e 100644
+--- a/include/net/ip6_fib.h
++++ b/include/net/ip6_fib.h
+@@ -281,7 +281,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i,
+ fn = rcu_dereference(f6i->fib6_node);
+
+ if (fn) {
+- *cookie = fn->fn_sernum;
++ *cookie = READ_ONCE(fn->fn_sernum);
+ /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */
+ smp_rmb();
+ status = true;
+diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
+index 0371d2c141455..a506e57c4032a 100644
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -111,7 +111,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
+ fn = rcu_dereference_protected(f6i->fib6_node,
+ lockdep_is_held(&f6i->fib6_table->tb6_lock));
+ if (fn)
+- fn->fn_sernum = fib6_new_sernum(net);
++ WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
+ }
+
+ /*
+@@ -589,12 +589,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
+ spin_unlock_bh(&table->tb6_lock);
+ if (res > 0) {
+ cb->args[4] = 1;
+- cb->args[5] = w->root->fn_sernum;
++ cb->args[5] = READ_ONCE(w->root->fn_sernum);
+ }
+ } else {
+- if (cb->args[5] != w->root->fn_sernum) {
++ int sernum = READ_ONCE(w->root->fn_sernum);
++ if (cb->args[5] != sernum) {
+ /* Begin at the root if the tree changed */
+- cb->args[5] = w->root->fn_sernum;
++ cb->args[5] = sernum;
+ w->state = FWS_INIT;
+ w->node = w->root;
+ w->skip = w->count;
+@@ -1344,7 +1345,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
+ /* paired with smp_rmb() in fib6_get_cookie_safe() */
+ smp_wmb();
+ while (fn) {
+- fn->fn_sernum = sernum;
++ WRITE_ONCE(fn->fn_sernum, sernum);
+ fn = rcu_dereference_protected(fn->parent,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
+ }
+@@ -2173,8 +2174,8 @@ static int fib6_clean_node(struct fib6_walker *w)
+ };
+
+ if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
+- w->node->fn_sernum != c->sernum)
+- w->node->fn_sernum = c->sernum;
++ READ_ONCE(w->node->fn_sernum) != c->sernum)
++ WRITE_ONCE(w->node->fn_sernum, c->sernum);
+
+ if (!c->func) {
+ WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
+@@ -2542,7 +2543,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
+ iter->w.state = FWS_INIT;
+ iter->w.node = iter->w.root;
+ iter->w.args = iter;
+- iter->sernum = iter->w.root->fn_sernum;
++ iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
+ INIT_LIST_HEAD(&iter->w.lh);
+ fib6_walker_link(net, &iter->w);
+ }
+@@ -2570,8 +2571,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
+
+ static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
+ {
+- if (iter->sernum != iter->w.root->fn_sernum) {
+- iter->sernum = iter->w.root->fn_sernum;
++ int sernum = READ_ONCE(iter->w.root->fn_sernum);
++
++ if (iter->sernum != sernum) {
++ iter->sernum = sernum;
+ iter->w.state = FWS_INIT;
+ iter->w.node = iter->w.root;
+ WARN_ON(iter->w.skip);
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 0632382a5427b..3c5bb49692206 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -2802,7 +2802,7 @@ static void ip6_link_failure(struct sk_buff *skb)
+ if (from) {
+ fn = rcu_dereference(from->fib6_node);
+ if (fn && (rt->rt6i_flags & RTF_DEFAULT))
+- fn->fn_sernum = -1;
++ WRITE_ONCE(fn->fn_sernum, -1);
+ }
+ }
+ rcu_read_unlock();
+--
+2.34.1
+
--- /dev/null
+From 006c8ca2dd19aa9e6a73cb7e89e4f4a7af8f271b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jan 2022 10:05:46 +0200
+Subject: ipv6_tunnel: Rate limit warning messages
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 6cee105e7f2ced596373951d9ea08dacc3883c68 ]
+
+The warning messages can be invoked from the data path for every packet
+transmitted through an ip6gre netdev, leading to high CPU utilization.
+
+Fix that by rate limiting the messages.
+
+Fixes: 09c6bbf090ec ("[IPV6]: Do mandatory IPv6 tunnel endpoint checks in realtime")
+Reported-by: Maksym Yaremchuk <maksymy@nvidia.com>
+Tested-by: Maksym Yaremchuk <maksymy@nvidia.com>
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Amit Cohen <amcohen@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ip6_tunnel.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
+index 20a67efda47f5..fa8da8ff35b42 100644
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -1036,14 +1036,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
+
+ if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+ 0, IFA_F_TENTATIVE)))
+- pr_warn("%s xmit: Local address not yet configured!\n",
+- p->name);
++ pr_warn_ratelimited("%s xmit: Local address not yet configured!\n",
++ p->name);
+ else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
+ !ipv6_addr_is_multicast(raddr) &&
+ unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+ true, 0, IFA_F_TENTATIVE)))
+- pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
+- p->name);
++ pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n",
++ p->name);
+ else
+ ret = 1;
+ rcu_read_unlock();
+--
+2.34.1
+
--- /dev/null
+From faa9223e29cc788ae6234c967529d1978f9d9e27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jan 2022 08:57:58 +0000
+Subject: KVM: arm64: pkvm: Use the mm_ops indirection for cache maintenance
+
+From: Marc Zyngier <maz@kernel.org>
+
+[ Upstream commit 094d00f8ca58c5d29b25e23b4daaed1ff1f13b41 ]
+
+CMOs issued from EL2 cannot directly use the kernel helpers,
+as EL2 doesn't have a mapping of the guest pages. Oops.
+
+Instead, use the mm_ops indirection to use helpers that will
+perform a mapping at EL2 and allow the CMO to be effective.
+
+Fixes: 25aa28691bb9 ("KVM: arm64: Move guest CMOs to the fault handlers")
+Reviewed-by: Quentin Perret <qperret@google.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20220114125038.1336965-1-maz@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/kvm/hyp/pgtable.c | 18 ++++++------------
+ 1 file changed, 6 insertions(+), 12 deletions(-)
+
+diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
+index f8ceebe4982eb..4c77ff556f0ae 100644
+--- a/arch/arm64/kvm/hyp/pgtable.c
++++ b/arch/arm64/kvm/hyp/pgtable.c
+@@ -921,13 +921,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+ */
+ stage2_put_pte(ptep, mmu, addr, level, mm_ops);
+
+- if (need_flush) {
+- kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
+-
+- dcache_clean_inval_poc((unsigned long)pte_follow,
+- (unsigned long)pte_follow +
+- kvm_granule_size(level));
+- }
++ if (need_flush && mm_ops->dcache_clean_inval_poc)
++ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
++ kvm_granule_size(level));
+
+ if (childp)
+ mm_ops->put_page(childp);
+@@ -1089,15 +1085,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+ struct kvm_pgtable *pgt = arg;
+ struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
+ kvm_pte_t pte = *ptep;
+- kvm_pte_t *pte_follow;
+
+ if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
+ return 0;
+
+- pte_follow = kvm_pte_follow(pte, mm_ops);
+- dcache_clean_inval_poc((unsigned long)pte_follow,
+- (unsigned long)pte_follow +
+- kvm_granule_size(level));
++ if (mm_ops->dcache_clean_inval_poc)
++ mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
++ kvm_granule_size(level));
+ return 0;
+ }
+
+--
+2.34.1
+
--- /dev/null
+From 68a8ab696c5cbc469b8d85f8fcb5da8df8bb4f90 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Dec 2021 14:35:36 -0800
+Subject: mptcp: allow changing the "backup" bit by endpoint id
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+[ Upstream commit 602837e8479d20d49559b4b97b79d34c0efe7ecb ]
+
+a non-zero 'id' is sufficient to identify MPTCP endpoints: allow changing
+the value of 'backup' bit by simply specifying the endpoint id.
+
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/158
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm_netlink.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index d96860053816a..3be10bf22cf7c 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -1711,22 +1711,28 @@ next:
+
+ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
+ {
++ struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
+ struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+ struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
+- struct mptcp_pm_addr_entry addr, *entry;
+ struct net *net = sock_net(skb->sk);
+- u8 bkup = 0;
++ u8 bkup = 0, lookup_by_id = 0;
+ int ret;
+
+- ret = mptcp_pm_parse_addr(attr, info, true, &addr);
++ ret = mptcp_pm_parse_addr(attr, info, false, &addr);
+ if (ret < 0)
+ return ret;
+
+ if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+ bkup = 1;
++ if (addr.addr.family == AF_UNSPEC) {
++ lookup_by_id = 1;
++ if (!addr.addr.id)
++ return -EOPNOTSUPP;
++ }
+
+ list_for_each_entry(entry, &pernet->local_addr_list, list) {
+- if (addresses_equal(&entry->addr, &addr.addr, true)) {
++ if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) ||
++ (lookup_by_id && entry->addr.id == addr.addr.id)) {
+ mptcp_nl_addr_backup(net, &entry->addr, bkup);
+
+ if (bkup)
+--
+2.34.1
+
--- /dev/null
+From ba9ade4de4e8f2fc4cbf8320e093ebf8f059014f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Dec 2021 15:37:02 -0800
+Subject: mptcp: clean up harmless false expressions
+
+From: Jean Sacren <sakiwit@gmail.com>
+
+[ Upstream commit 59060a47ca50bbdb1d863b73667a1065873ecc06 ]
+
+entry->addr.id is u8 with a range from 0 to 255 and MAX_ADDR_ID is 255.
+We should drop both false expressions of (entry->addr.id > MAX_ADDR_ID).
+
+We should also remove the obsolete parentheses in the first if branch.
+
+Use U8_MAX for MAX_ADDR_ID and add a comment to show the link to
+mptcp_addr_info.id as suggested by Mr. Matthieu Baerts.
+
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Jean Sacren <sakiwit@gmail.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm_netlink.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index 3be10bf22cf7c..15c89d4fea4d2 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -38,7 +38,8 @@ struct mptcp_pm_add_entry {
+ u8 retrans_times;
+ };
+
+-#define MAX_ADDR_ID 255
++/* max value of mptcp_addr_info.id */
++#define MAX_ADDR_ID U8_MAX
+ #define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG)
+
+ struct pm_nl_pernet {
+@@ -831,14 +832,13 @@ find_next:
+ entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
+ MAX_ADDR_ID + 1,
+ pernet->next_id);
+- if ((!entry->addr.id || entry->addr.id > MAX_ADDR_ID) &&
+- pernet->next_id != 1) {
++ if (!entry->addr.id && pernet->next_id != 1) {
+ pernet->next_id = 1;
+ goto find_next;
+ }
+ }
+
+- if (!entry->addr.id || entry->addr.id > MAX_ADDR_ID)
++ if (!entry->addr.id)
+ goto out;
+
+ __set_bit(entry->addr.id, pernet->id_bitmap);
+--
+2.34.1
+
--- /dev/null
+From 1eefa603bbf4fca0a4f4208e8f95f139588869e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jan 2022 16:35:27 -0800
+Subject: mptcp: fix msk traversal in mptcp_nl_cmd_set_flags()
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 8e9eacad7ec7a9cbf262649ebf1fa6e6f6cc7d82 ]
+
+The MPTCP endpoint list is under RCU protection, guarded by the
+pernet spinlock. mptcp_nl_cmd_set_flags() traverses the list
+without acquiring the spin-lock nor under the RCU critical section.
+
+This change addresses the issue performing the lookup and the endpoint
+update under the pernet spinlock.
+
+Fixes: 0f9f696a502e ("mptcp: add set_flags command in PM netlink")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm_netlink.c | 37 +++++++++++++++++++++++++++----------
+ 1 file changed, 27 insertions(+), 10 deletions(-)
+
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index bba166ddacc78..7f11eb3e35137 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -469,6 +469,20 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
+ return NULL;
+ }
+
++static struct mptcp_pm_addr_entry *
++__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
++ bool lookup_by_id)
++{
++ struct mptcp_pm_addr_entry *entry;
++
++ list_for_each_entry(entry, &pernet->local_addr_list, list) {
++ if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) ||
++ (lookup_by_id && entry->addr.id == info->id))
++ return entry;
++ }
++ return NULL;
++}
++
+ static int
+ lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
+ {
+@@ -1753,18 +1767,21 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
+ return -EOPNOTSUPP;
+ }
+
+- list_for_each_entry(entry, &pernet->local_addr_list, list) {
+- if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) ||
+- (lookup_by_id && entry->addr.id == addr.addr.id)) {
+- mptcp_nl_addr_backup(net, &entry->addr, bkup);
+-
+- if (bkup)
+- entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+- else
+- entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+- }
++ spin_lock_bh(&pernet->lock);
++ entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
++ if (!entry) {
++ spin_unlock_bh(&pernet->lock);
++ return -EINVAL;
+ }
+
++ if (bkup)
++ entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
++ else
++ entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
++ addr = *entry;
++ spin_unlock_bh(&pernet->lock);
++
++ mptcp_nl_addr_backup(net, &addr.addr, bkup);
+ return 0;
+ }
+
+--
+2.34.1
+
--- /dev/null
+From b607a1d248f0c837a4a59bd0f54b35c871916765 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jan 2022 16:35:28 -0800
+Subject: mptcp: fix removing ids bitmap setting
+
+From: Geliang Tang <geliang.tang@suse.com>
+
+[ Upstream commit a4c0214fbee97c46e3f41fee37931d66c0fc3cb1 ]
+
+In mptcp_pm_nl_rm_addr_or_subflow(), the bit of rm_list->ids[i] in the
+id_avail_bitmap should be set, not rm_list->ids[1]. This patch fixed it.
+
+Fixes: 86e39e04482b ("mptcp: keep track of local endpoint still available for each msk")
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Geliang Tang <geliang.tang@suse.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm_netlink.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index 7f11eb3e35137..84e6b55375e1d 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -781,7 +781,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
+ msk->pm.subflows--;
+ __MPTCP_INC_STATS(sock_net(sk), rm_type);
+ }
+- __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
++ __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap);
+ if (!removed)
+ continue;
+
+--
+2.34.1
+
--- /dev/null
+From 96327b679e618f5e286c988cdfa012750e660571 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jan 2022 16:20:22 -0800
+Subject: mptcp: keep track of local endpoint still available for each msk
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 86e39e04482b0aadf3ee3ed5fcf2d63816559d36 ]
+
+Include into the path manager status a bitmap tracking the list
+of local endpoints still available - not yet used - for the
+relevant mptcp socket.
+
+Keep such map updated at endpoint creation/deletion time, so
+that we can easily skip already used endpoint at local address
+selection time.
+
+The endpoint used by the initial subflow is lazyly accounted at
+subflow creation time: the usage bitmap is be up2date before
+endpoint selection and we avoid such unneeded task in some relevant
+scenarios - e.g. busy servers accepting incoming subflows but
+not creating any additional ones nor annuncing additional addresses.
+
+Overall this allows for fair local endpoints usage in case of
+subflow failure.
+
+As a side effect, this patch also enforces that each endpoint
+is used at most once for each mptcp connection.
+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm.c | 1 +
+ net/mptcp/pm_netlink.c | 125 +++++++++++-------
+ net/mptcp/protocol.c | 3 +-
+ net/mptcp/protocol.h | 12 +-
+ .../testing/selftests/net/mptcp/mptcp_join.sh | 5 +-
+ 5 files changed, 91 insertions(+), 55 deletions(-)
+
+diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
+index 6ab386ff32944..332ac6eda3ba4 100644
+--- a/net/mptcp/pm.c
++++ b/net/mptcp/pm.c
+@@ -370,6 +370,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
+ WRITE_ONCE(msk->pm.accept_subflow, false);
+ WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
+ msk->pm.status = 0;
++ bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+
+ spin_lock_init(&msk->pm.lock);
+ INIT_LIST_HEAD(&msk->pm.anno_list);
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index 15c89d4fea4d2..bba166ddacc78 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -38,10 +38,6 @@ struct mptcp_pm_add_entry {
+ u8 retrans_times;
+ };
+
+-/* max value of mptcp_addr_info.id */
+-#define MAX_ADDR_ID U8_MAX
+-#define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG)
+-
+ struct pm_nl_pernet {
+ /* protects pernet updates */
+ spinlock_t lock;
+@@ -53,14 +49,14 @@ struct pm_nl_pernet {
+ unsigned int local_addr_max;
+ unsigned int subflows_max;
+ unsigned int next_id;
+- unsigned long id_bitmap[BITMAP_SZ];
++ DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+ };
+
+ #define MPTCP_PM_ADDR_MAX 8
+ #define ADD_ADDR_RETRANS_MAX 3
+
+ static bool addresses_equal(const struct mptcp_addr_info *a,
+- struct mptcp_addr_info *b, bool use_port)
++ const struct mptcp_addr_info *b, bool use_port)
+ {
+ bool addr_equals = false;
+
+@@ -174,6 +170,9 @@ select_local_address(const struct pm_nl_pernet *pernet,
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
+ continue;
+
++ if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
++ continue;
++
+ if (entry->addr.family != sk->sk_family) {
+ #if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if ((entry->addr.family == AF_INET &&
+@@ -184,23 +183,17 @@ select_local_address(const struct pm_nl_pernet *pernet,
+ continue;
+ }
+
+- /* avoid any address already in use by subflows and
+- * pending join
+- */
+- if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
+- ret = entry;
+- break;
+- }
++ ret = entry;
++ break;
+ }
+ rcu_read_unlock();
+ return ret;
+ }
+
+ static struct mptcp_pm_addr_entry *
+-select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
++select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk)
+ {
+ struct mptcp_pm_addr_entry *entry, *ret = NULL;
+- int i = 0;
+
+ rcu_read_lock();
+ /* do not keep any additional per socket state, just signal
+@@ -209,12 +202,14 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
+ * can lead to additional addresses not being announced.
+ */
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
++ if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
++ continue;
++
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
+ continue;
+- if (i++ == pos) {
+- ret = entry;
+- break;
+- }
++
++ ret = entry;
++ break;
+ }
+ rcu_read_unlock();
+ return ret;
+@@ -258,9 +253,11 @@ EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
+
+ static void check_work_pending(struct mptcp_sock *msk)
+ {
+- if (msk->pm.add_addr_signaled == mptcp_pm_get_add_addr_signal_max(msk) &&
+- (msk->pm.local_addr_used == mptcp_pm_get_local_addr_max(msk) ||
+- msk->pm.subflows == mptcp_pm_get_subflows_max(msk)))
++ struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
++
++ if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
++ (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
++ MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1))
+ WRITE_ONCE(msk->pm.work_pending, false);
+ }
+
+@@ -460,6 +457,35 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
+ return i;
+ }
+
++static struct mptcp_pm_addr_entry *
++__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
++{
++ struct mptcp_pm_addr_entry *entry;
++
++ list_for_each_entry(entry, &pernet->local_addr_list, list) {
++ if (entry->addr.id == id)
++ return entry;
++ }
++ return NULL;
++}
++
++static int
++lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
++{
++ struct mptcp_pm_addr_entry *entry;
++ int ret = -1;
++
++ rcu_read_lock();
++ list_for_each_entry(entry, &pernet->local_addr_list, list) {
++ if (addresses_equal(&entry->addr, addr, entry->addr.port)) {
++ ret = entry->addr.id;
++ break;
++ }
++ }
++ rcu_read_unlock();
++ return ret;
++}
++
+ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
+ {
+ struct sock *sk = (struct sock *)msk;
+@@ -475,6 +501,19 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
+ local_addr_max = mptcp_pm_get_local_addr_max(msk);
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
++ /* do lazy endpoint usage accounting for the MPC subflows */
++ if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) {
++ struct mptcp_addr_info local;
++ int mpc_id;
++
++ local_address((struct sock_common *)msk->first, &local);
++ mpc_id = lookup_id_by_addr(pernet, &local);
++ if (mpc_id < 0)
++ __clear_bit(mpc_id, msk->pm.id_avail_bitmap);
++
++ msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED);
++ }
++
+ pr_debug("local %d:%d signal %d:%d subflows %d:%d\n",
+ msk->pm.local_addr_used, local_addr_max,
+ msk->pm.add_addr_signaled, add_addr_signal_max,
+@@ -482,21 +521,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
+
+ /* check first for announce */
+ if (msk->pm.add_addr_signaled < add_addr_signal_max) {
+- local = select_signal_address(pernet,
+- msk->pm.add_addr_signaled);
++ local = select_signal_address(pernet, msk);
+
+ if (local) {
+ if (mptcp_pm_alloc_anno_list(msk, local)) {
++ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ msk->pm.add_addr_signaled++;
+ mptcp_pm_announce_addr(msk, &local->addr, false);
+ mptcp_pm_nl_addr_send_ack(msk);
+ }
+- } else {
+- /* pick failed, avoid fourther attempts later */
+- msk->pm.local_addr_used = add_addr_signal_max;
+ }
+-
+- check_work_pending(msk);
+ }
+
+ /* check if should create a new subflow */
+@@ -510,19 +544,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
+ int i, nr;
+
+ msk->pm.local_addr_used++;
+- check_work_pending(msk);
+ nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
++ if (nr)
++ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ spin_unlock_bh(&msk->pm.lock);
+ for (i = 0; i < nr; i++)
+ __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
+ spin_lock_bh(&msk->pm.lock);
+- return;
+ }
+-
+- /* lookup failed, avoid fourther attempts later */
+- msk->pm.local_addr_used = local_addr_max;
+- check_work_pending(msk);
+ }
++ check_work_pending(msk);
+ }
+
+ static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
+@@ -736,6 +767,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
+ msk->pm.subflows--;
+ __MPTCP_INC_STATS(sock_net(sk), rm_type);
+ }
++ __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
+ if (!removed)
+ continue;
+
+@@ -765,6 +797,9 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
+
+ msk_owned_by_me(msk);
+
++ if (!(pm->status & MPTCP_PM_WORK_MASK))
++ return;
++
+ spin_lock_bh(&msk->pm.lock);
+
+ pr_debug("msk=%p status=%x", msk, pm->status);
+@@ -810,7 +845,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
+ /* to keep the code simple, don't do IDR-like allocation for address ID,
+ * just bail when we exceed limits
+ */
+- if (pernet->next_id == MAX_ADDR_ID)
++ if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID)
+ pernet->next_id = 1;
+ if (pernet->addrs >= MPTCP_PM_ADDR_MAX)
+ goto out;
+@@ -830,7 +865,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
+ if (!entry->addr.id) {
+ find_next:
+ entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
+- MAX_ADDR_ID + 1,
++ MPTCP_PM_MAX_ADDR_ID + 1,
+ pernet->next_id);
+ if (!entry->addr.id && pernet->next_id != 1) {
+ pernet->next_id = 1;
+@@ -1197,18 +1232,6 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
+ return 0;
+ }
+
+-static struct mptcp_pm_addr_entry *
+-__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
+-{
+- struct mptcp_pm_addr_entry *entry;
+-
+- list_for_each_entry(entry, &pernet->local_addr_list, list) {
+- if (entry->addr.id == id)
+- return entry;
+- }
+- return NULL;
+-}
+-
+ int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+ u8 *flags, int *ifindex)
+ {
+@@ -1467,7 +1490,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
+ list_splice_init(&pernet->local_addr_list, &free_list);
+ __reset_counters(pernet);
+ pernet->next_id = 1;
+- bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
++ bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+ spin_unlock_bh(&pernet->lock);
+ mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
+ synchronize_rcu();
+@@ -1577,7 +1600,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
+ pernet = net_generic(net, pm_nl_pernet_id);
+
+ spin_lock_bh(&pernet->lock);
+- for (i = id; i < MAX_ADDR_ID + 1; i++) {
++ for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
+ if (test_bit(i, pernet->id_bitmap)) {
+ entry = __lookup_addr_by_id(pernet, i);
+ if (!entry)
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 4c889552cde77..354f169ca120a 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2435,8 +2435,7 @@ static void mptcp_worker(struct work_struct *work)
+
+ mptcp_check_fastclose(msk);
+
+- if (msk->pm.status)
+- mptcp_pm_nl_work(msk);
++ mptcp_pm_nl_work(msk);
+
+ if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
+ mptcp_check_for_eof(msk);
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index 82c5dc4d6b49d..9fc6f494075fa 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -174,16 +174,25 @@ enum mptcp_pm_status {
+ MPTCP_PM_ADD_ADDR_SEND_ACK,
+ MPTCP_PM_RM_ADDR_RECEIVED,
+ MPTCP_PM_ESTABLISHED,
+- MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
+ MPTCP_PM_SUBFLOW_ESTABLISHED,
++ MPTCP_PM_ALREADY_ESTABLISHED, /* persistent status, set after ESTABLISHED event */
++ MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is
++ * accounted int id_avail_bitmap
++ */
+ };
+
++/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
++#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
++
+ enum mptcp_addr_signal_status {
+ MPTCP_ADD_ADDR_SIGNAL,
+ MPTCP_ADD_ADDR_ECHO,
+ MPTCP_RM_ADDR_SIGNAL,
+ };
+
++/* max value of mptcp_addr_info.id */
++#define MPTCP_PM_MAX_ADDR_ID U8_MAX
++
+ struct mptcp_pm_data {
+ struct mptcp_addr_info local;
+ struct mptcp_addr_info remote;
+@@ -202,6 +211,7 @@ struct mptcp_pm_data {
+ u8 local_addr_used;
+ u8 subflows;
+ u8 status;
++ DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+ struct mptcp_rm_list rm_list_tx;
+ struct mptcp_rm_list rm_list_rx;
+ };
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index 586af88194e56..0c12602fa22e8 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -1068,7 +1068,10 @@ signal_address_tests()
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+- chk_add_nr 4 4
++
++ # the server will not signal the address terminating
++ # the MPC subflow
++ chk_add_nr 3 3
+ }
+
+ link_failure_tests()
+--
+2.34.1
+
--- /dev/null
+From 62cf11a278e6b862b0ad7d05a518dbd832f6935c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jan 2022 14:20:13 -0500
+Subject: net: fix information leakage in /proc/net/ptype
+
+From: Congyu Liu <liu3101@purdue.edu>
+
+[ Upstream commit 47934e06b65637c88a762d9c98329ae6e3238888 ]
+
+In one net namespace, after creating a packet socket without binding
+it to a device, users in other net namespaces can observe the new
+`packet_type` added by this packet socket by reading `/proc/net/ptype`
+file. This is minor information leakage as packet socket is
+namespace aware.
+
+Add a net pointer in `packet_type` to keep the net namespace of
+of corresponding packet socket. In `ptype_seq_show`, this net pointer
+must be checked when it is not NULL.
+
+Fixes: 2feb27dbe00c ("[NETNS]: Minor information leak via /proc/net/ptype file.")
+Signed-off-by: Congyu Liu <liu3101@purdue.edu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h | 1 +
+ net/core/net-procfs.c | 3 ++-
+ net/packet/af_packet.c | 2 ++
+ 3 files changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index ce81cc96a98d9..fba54624191a2 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2636,6 +2636,7 @@ struct packet_type {
+ struct net_device *);
+ bool (*id_match)(struct packet_type *ptype,
+ struct sock *sk);
++ struct net *af_packet_net;
+ void *af_packet_priv;
+ struct list_head list;
+ };
+diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
+index d8b9dbabd4a43..5b8016335acaf 100644
+--- a/net/core/net-procfs.c
++++ b/net/core/net-procfs.c
+@@ -260,7 +260,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
+
+ if (v == SEQ_START_TOKEN)
+ seq_puts(seq, "Type Device Function\n");
+- else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
++ else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
++ (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) {
+ if (pt->type == htons(ETH_P_ALL))
+ seq_puts(seq, "ALL ");
+ else
+diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
+index 1bc7ef49e1487..1a138e8d32d66 100644
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1738,6 +1738,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
+ match->prot_hook.dev = po->prot_hook.dev;
+ match->prot_hook.func = packet_rcv_fanout;
+ match->prot_hook.af_packet_priv = match;
++ match->prot_hook.af_packet_net = read_pnet(&match->net);
+ match->prot_hook.id_match = match_fanout_group;
+ match->max_num_members = args->max_num_members;
+ list_add(&match->list, &fanout_list);
+@@ -3323,6 +3324,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
+ po->prot_hook.func = packet_rcv_spkt;
+
+ po->prot_hook.af_packet_priv = sk;
++ po->prot_hook.af_packet_net = sock_net(sk);
+
+ if (proto) {
+ po->prot_hook.type = proto;
+--
+2.34.1
+
--- /dev/null
+From 340b01a2aa30d880662fbccca87b15467d53c257 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jan 2022 15:52:43 -0600
+Subject: net: phy: broadcom: hook up soft_reset for BCM54616S
+
+From: Robert Hancock <robert.hancock@calian.com>
+
+[ Upstream commit d15c7e875d44367005370e6a82e8f3a382a04f9b ]
+
+A problem was encountered with the Bel-Fuse 1GBT-SFP05 SFP module (which
+is a 1 Gbps copper module operating in SGMII mode with an internal
+BCM54616S PHY device) using the Xilinx AXI Ethernet MAC core, where the
+module would work properly on the initial insertion or boot of the
+device, but after the device was rebooted, the link would either only
+come up at 100 Mbps speeds or go up and down erratically.
+
+I found no meaningful changes in the PHY configuration registers between
+the working and non-working boots, but the status registers seemed to
+have a lot of error indications set on the SERDES side of the device on
+the non-working boot. I suspect the problem is that whatever happens on
+the SGMII link when the device is rebooted and the FPGA logic gets
+reloaded ends up putting the module's onboard PHY into a bad state.
+
+Since commit 6e2d85ec0559 ("net: phy: Stop with excessive soft reset")
+the genphy_soft_reset call is not made automatically by the PHY core
+unless the callback is explicitly specified in the driver structure. For
+most of these Broadcom devices, there is probably a hardware reset that
+gets asserted to reset the PHY during boot, however for SFP modules
+(where the BCM54616S is commonly found) no such reset line exists, so if
+the board keeps the SFP cage powered up across a reboot, it will end up
+with no reset occurring during reboots.
+
+Hook up the genphy_soft_reset callback for BCM54616S to ensure that a
+PHY reset is performed before the device is initialized. This appears to
+fix the issue with erratic operation after a reboot with this SFP
+module.
+
+Fixes: 6e2d85ec0559 ("net: phy: Stop with excessive soft reset")
+Signed-off-by: Robert Hancock <robert.hancock@calian.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/broadcom.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
+index 83aea5c5cd03c..db26ff8ce7dbb 100644
+--- a/drivers/net/phy/broadcom.c
++++ b/drivers/net/phy/broadcom.c
+@@ -768,6 +768,7 @@ static struct phy_driver broadcom_drivers[] = {
+ .phy_id_mask = 0xfffffff0,
+ .name = "Broadcom BCM54616S",
+ /* PHY_GBIT_FEATURES */
++ .soft_reset = genphy_soft_reset,
+ .config_init = bcm54xx_config_init,
+ .config_aneg = bcm54616s_config_aneg,
+ .config_intr = bcm_phy_config_intr,
+--
+2.34.1
+
--- /dev/null
+From 2838c988be07cfa57a51a5eb22da09d8f039fad9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jan 2022 13:46:47 +0900
+Subject: net: stmmac: dwmac-visconti: Fix bit definitions for ETHER_CLK_SEL
+
+From: Yuji Ishikawa <yuji2.ishikawa@toshiba.co.jp>
+
+[ Upstream commit 1ba1a4a90fa416a6f389206416c5f488cf8b1543 ]
+
+just 0 should be used to represent cleared bits
+
+* ETHER_CLK_SEL_DIV_SEL_20
+* ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN
+* ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN
+* ETHER_CLK_SEL_TX_CLK_O_TX_I
+* ETHER_CLK_SEL_RMII_CLK_SEL_IN
+
+Fixes: b38dd98ff8d0 ("net: stmmac: Add Toshiba Visconti SoCs glue driver")
+Signed-off-by: Yuji Ishikawa <yuji2.ishikawa@toshiba.co.jp>
+Reviewed-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+index fac788718c045..1c599a005aab6 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+@@ -22,21 +22,21 @@
+ #define ETHER_CLK_SEL_RMII_CLK_EN BIT(2)
+ #define ETHER_CLK_SEL_RMII_CLK_RST BIT(3)
+ #define ETHER_CLK_SEL_DIV_SEL_2 BIT(4)
+-#define ETHER_CLK_SEL_DIV_SEL_20 BIT(0)
++#define ETHER_CLK_SEL_DIV_SEL_20 0
+ #define ETHER_CLK_SEL_FREQ_SEL_125M (BIT(9) | BIT(8))
+ #define ETHER_CLK_SEL_FREQ_SEL_50M BIT(9)
+ #define ETHER_CLK_SEL_FREQ_SEL_25M BIT(8)
+ #define ETHER_CLK_SEL_FREQ_SEL_2P5M 0
+-#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN BIT(0)
++#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN 0
+ #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC BIT(10)
+ #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV BIT(11)
+-#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN BIT(0)
++#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN 0
+ #define ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC BIT(12)
+ #define ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV BIT(13)
+-#define ETHER_CLK_SEL_TX_CLK_O_TX_I BIT(0)
++#define ETHER_CLK_SEL_TX_CLK_O_TX_I 0
+ #define ETHER_CLK_SEL_TX_CLK_O_RMII_I BIT(14)
+ #define ETHER_CLK_SEL_TX_O_E_N_IN BIT(15)
+-#define ETHER_CLK_SEL_RMII_CLK_SEL_IN BIT(0)
++#define ETHER_CLK_SEL_RMII_CLK_SEL_IN 0
+ #define ETHER_CLK_SEL_RMII_CLK_SEL_RX_C BIT(16)
+
+ #define ETHER_CLK_SEL_RX_TX_CLK_EN (ETHER_CLK_SEL_RX_CLK_EN | ETHER_CLK_SEL_TX_CLK_EN)
+--
+2.34.1
+
--- /dev/null
+From 9a25d5f55f530a13abb63b04f35997acfb20c983 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jan 2022 13:46:48 +0900
+Subject: net: stmmac: dwmac-visconti: Fix clock configuration for RMII mode
+
+From: Yuji Ishikawa <yuji2.ishikawa@toshiba.co.jp>
+
+[ Upstream commit 0959bc4bd4206433ed101a1332a23e93ad16ec77 ]
+
+Bit pattern of the ETHER_CLOCK_SEL register for RMII/MII mode should be fixed.
+Also, some control bits should be modified with a specific sequence.
+
+Fixes: b38dd98ff8d0 ("net: stmmac: Add Toshiba Visconti SoCs glue driver")
+Signed-off-by: Yuji Ishikawa <yuji2.ishikawa@toshiba.co.jp>
+Reviewed-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/stmicro/stmmac/dwmac-visconti.c | 32 ++++++++++++-------
+ 1 file changed, 21 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+index 1c599a005aab6..4578c64953eac 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+@@ -96,31 +96,41 @@ static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed)
+ val |= ETHER_CLK_SEL_TX_O_E_N_IN;
+ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+
++ /* Set Clock-Mux, Start clock, Set TX_O direction */
+ switch (dwmac->phy_intf_sel) {
+ case ETHER_CONFIG_INTF_RGMII:
+ val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++ val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++ val &= ~ETHER_CLK_SEL_TX_O_E_N_IN;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+ break;
+ case ETHER_CONFIG_INTF_RMII:
+ val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV |
+- ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN |
++ ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN |
+ ETHER_CLK_SEL_RMII_CLK_SEL_RX_C;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++ val |= ETHER_CLK_SEL_RMII_CLK_RST;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++ val |= ETHER_CLK_SEL_RMII_CLK_EN | ETHER_CLK_SEL_RX_TX_CLK_EN;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+ break;
+ case ETHER_CONFIG_INTF_MII:
+ default:
+ val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC |
+- ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN |
+- ETHER_CLK_SEL_RMII_CLK_EN;
++ ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++ val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
++ writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+ break;
+ }
+
+- /* Start clock */
+- writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+- val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
+- writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+-
+- val &= ~ETHER_CLK_SEL_TX_O_E_N_IN;
+- writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+-
+ spin_unlock_irqrestore(&dwmac->lock, flags);
+ }
+
+--
+2.34.1
+
--- /dev/null
+From 27e186efa8e0b0f6f604bc4c41dc716cdf6d770f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jan 2022 21:37:58 +0100
+Subject: netfilter: conntrack: don't increment invalid counter on NF_REPEAT
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 830af2eba40327abec64325a5b08b1e85c37a2e0 ]
+
+The packet isn't invalid, REPEAT means we're trying again after cleaning
+out a stale connection, e.g. via tcp tracker.
+
+This caused increases of invalid stat counter in a test case involving
+frequent connection reuse, even though no packet is actually invalid.
+
+Fixes: 56a62e2218f5 ("netfilter: conntrack: fix NF_REPEAT handling")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_conntrack_core.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index 4712a90a1820c..7f79974607643 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -1922,15 +1922,17 @@ repeat:
+ pr_debug("nf_conntrack_in: Can't track with proto module\n");
+ nf_conntrack_put(&ct->ct_general);
+ skb->_nfct = 0;
+- NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+- if (ret == -NF_DROP)
+- NF_CT_STAT_INC_ATOMIC(state->net, drop);
+ /* Special case: TCP tracker reports an attempt to reopen a
+ * closed/aborted connection. We have to go back and create a
+ * fresh conntrack.
+ */
+ if (ret == -NF_REPEAT)
+ goto repeat;
++
++ NF_CT_STAT_INC_ATOMIC(state->net, invalid);
++ if (ret == -NF_DROP)
++ NF_CT_STAT_INC_ATOMIC(state->net, drop);
++
+ ret = -ret;
+ goto out;
+ }
+--
+2.34.1
+
--- /dev/null
+From 819a1638d01ceef0d04387333898e7e819560df2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Dec 2021 16:38:15 -0500
+Subject: NFS: Ensure the server has an up to date ctime before hardlinking
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 204975036b34f55237bc44c8a302a88468ef21b5 ]
+
+Creating a hard link is required by POSIX to update the file ctime, so
+ensure that the file data is synced to disk so that we don't clobber the
+updated ctime by writing back after creating the hard link.
+
+Fixes: 9f7682728728 ("NFS: Move the delegation return down into nfs4_proc_link()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/dir.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index 5b68c44848caf..fdea6ff824cf8 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -2382,6 +2382,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
+
+ trace_nfs_link_enter(inode, dir, dentry);
+ d_drop(dentry);
++ if (S_ISREG(inode->i_mode))
++ nfs_sync_inode(inode);
+ error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
+ if (error == 0) {
+ ihold(inode);
+--
+2.34.1
+
--- /dev/null
+From f255aff22cfe3d57206c276db792e6b8c7619b60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Dec 2021 16:38:16 -0500
+Subject: NFS: Ensure the server has an up to date ctime before renaming
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 6ff9d99bb88faebf134ca668842349d9718e5464 ]
+
+Renaming a file is required by POSIX to update the file ctime, so
+ensure that the file data is synced to disk so that we don't clobber the
+updated ctime by writing back after creating the hard link.
+
+Fixes: f2c2c552f119 ("NFS: Move delegation recall into the NFSv4 callback for rename_setup()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/dir.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index fdea6ff824cf8..ac0e1ab1b71ba 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -2472,6 +2472,8 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+ }
+ }
+
++ if (S_ISREG(old_inode->i_mode))
++ nfs_sync_inode(old_inode);
+ task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
+ if (IS_ERR(task)) {
+ error = PTR_ERR(task);
+--
+2.34.1
+
--- /dev/null
+From c407b6e1c8205cf17ffa2218dbc9bd5fb00b7000 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jan 2022 18:24:02 -0500
+Subject: NFSv4: Handle case where the lookup of a directory fails
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit ac795161c93699d600db16c1a8cc23a65a1eceaf ]
+
+If the application sets the O_DIRECTORY flag, and tries to open a
+regular file, nfs_atomic_open() will punt to doing a regular lookup.
+If the server then returns a regular file, we will happily return a
+file descriptor with uninitialised open state.
+
+The fix is to return the expected ENOTDIR error in these cases.
+
+Reported-by: Lyu Tao <tao.lyu@epfl.ch>
+Fixes: 0dd2b474d0b6 ("nfs: implement i_op->atomic_open()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/dir.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index ac0e1ab1b71ba..2d156311c374d 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -1982,6 +1982,19 @@ out:
+
+ no_open:
+ res = nfs_lookup(dir, dentry, lookup_flags);
++ if (!res) {
++ inode = d_inode(dentry);
++ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
++ !S_ISDIR(inode->i_mode))
++ res = ERR_PTR(-ENOTDIR);
++ } else if (!IS_ERR(res)) {
++ inode = d_inode(res);
++ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
++ !S_ISDIR(inode->i_mode)) {
++ dput(res);
++ res = ERR_PTR(-ENOTDIR);
++ }
++ }
+ if (switched) {
+ d_lookup_done(dentry);
+ if (!res)
+--
+2.34.1
+
--- /dev/null
+From 46db83ff5fbfc5cb89d9903bad6b205eab622c9b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jan 2022 18:24:03 -0500
+Subject: NFSv4: nfs_atomic_open() can race when looking up a non-regular file
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 1751fc1db36f6f411709e143d5393f92d12137a9 ]
+
+If the file type changes back to being a regular file on the server
+between the failed OPEN and our LOOKUP, then we need to re-run the OPEN.
+
+Fixes: 0dd2b474d0b6 ("nfs: implement i_op->atomic_open()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/dir.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index 2d156311c374d..ed79c1bd84a29 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -1987,12 +1987,17 @@ no_open:
+ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
+ !S_ISDIR(inode->i_mode))
+ res = ERR_PTR(-ENOTDIR);
++ else if (inode && S_ISREG(inode->i_mode))
++ res = ERR_PTR(-EOPENSTALE);
+ } else if (!IS_ERR(res)) {
+ inode = d_inode(res);
+ if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
+ !S_ISDIR(inode->i_mode)) {
+ dput(res);
+ res = ERR_PTR(-ENOTDIR);
++ } else if (inode && S_ISREG(inode->i_mode)) {
++ dput(res);
++ res = ERR_PTR(-EOPENSTALE);
+ }
+ }
+ if (switched) {
+--
+2.34.1
+
--- /dev/null
+From 069df600c33a90fe62f8eddf777e4da0a31795a9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:45 +0530
+Subject: octeontx2-af: cn10k: Do not enable RPM loopback for LPC interfaces
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit df66b6ebc5dcf7253e35a640b9ec4add54195c25 ]
+
+Internal looback is not supported to low rate LPCS interface like
+SGMII/QSGMII. Hence don't allow to enable for such interfaces.
+
+Fixes: 3ad3f8f93c81 ("octeontx2-af: cn10k: MAC internal loopback support")
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/marvell/octeontx2/af/rpm.c | 27 +++++++++----------
+ 1 file changed, 12 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+index 07b0eafccad87..b3803577324e6 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+@@ -251,22 +251,19 @@ int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable)
+ if (!rpm || lmac_id >= rpm->lmac_count)
+ return -ENODEV;
+ lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id);
+- if (lmac_type == LMAC_MODE_100G_R) {
+- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1);
+-
+- if (enable)
+- cfg |= RPMX_MTI_PCS_LBK;
+- else
+- cfg &= ~RPMX_MTI_PCS_LBK;
+- rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg);
+- } else {
+- cfg = rpm_read(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1);
+- if (enable)
+- cfg |= RPMX_MTI_PCS_LBK;
+- else
+- cfg &= ~RPMX_MTI_PCS_LBK;
+- rpm_write(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1, cfg);
++
++ if (lmac_type == LMAC_MODE_QSGMII || lmac_type == LMAC_MODE_SGMII) {
++ dev_err(&rpm->pdev->dev, "loopback not supported for LPC mode\n");
++ return 0;
+ }
+
++ cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1);
++
++ if (enable)
++ cfg |= RPMX_MTI_PCS_LBK;
++ else
++ cfg &= ~RPMX_MTI_PCS_LBK;
++ rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg);
++
+ return 0;
+ }
+--
+2.34.1
+
--- /dev/null
+From 89fd81ca0a9542b5236f7e29affc9a072b18deed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:39 +0530
+Subject: octeontx2-af: Do not fixup all VF action entries
+
+From: Subbaraya Sundeep <sbhatta@marvell.com>
+
+[ Upstream commit d225c449ab2be25273a3674f476c6c0b57c50254 ]
+
+AF modifies all the rules destined for VF to use
+the action same as default RSS action. This fixup
+was needed because AF only installs default rules with
+RSS action. But the action in rules installed by a PF
+for its VFs should not be changed by this fixup.
+This is because action can be drop or direct to
+queue as specified by user(ntuple filters).
+This patch fixes that problem.
+
+Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet")
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/af/rvu_npc.c | 22 ++++++++++++++++---
+ .../marvell/octeontx2/af/rvu_npc_fs.c | 20 ++++++++++-------
+ 2 files changed, 31 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+index 5efb4174e82df..87f18e32b4634 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+@@ -402,6 +402,7 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
+ int blkaddr, int index, struct mcam_entry *entry,
+ bool *enable)
+ {
++ struct rvu_npc_mcam_rule *rule;
+ u16 owner, target_func;
+ struct rvu_pfvf *pfvf;
+ u64 rx_action;
+@@ -423,6 +424,12 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
+ test_bit(NIXLF_INITIALIZED, &pfvf->flags)))
+ *enable = false;
+
++ /* fix up not needed for the rules added by user(ntuple filters) */
++ list_for_each_entry(rule, &mcam->mcam_rules, list) {
++ if (rule->entry == index)
++ return;
++ }
++
+ /* copy VF default entry action to the VF mcam entry */
+ rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr,
+ target_func);
+@@ -489,8 +496,8 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
+ }
+
+ /* PF installing VF rule */
+- if (intf == NIX_INTF_RX && actindex < mcam->bmap_entries)
+- npc_fixup_vf_rule(rvu, mcam, blkaddr, index, entry, &enable);
++ if (is_npc_intf_rx(intf) && actindex < mcam->bmap_entries)
++ npc_fixup_vf_rule(rvu, mcam, blkaddr, actindex, entry, &enable);
+
+ /* Set 'action' */
+ rvu_write64(rvu, blkaddr,
+@@ -916,7 +923,8 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
+ int blkaddr, u16 pcifunc, u64 rx_action)
+ {
+ int actindex, index, bank, entry;
+- bool enable;
++ struct rvu_npc_mcam_rule *rule;
++ bool enable, update;
+
+ if (!(pcifunc & RVU_PFVF_FUNC_MASK))
+ return;
+@@ -924,6 +932,14 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
+ mutex_lock(&mcam->lock);
+ for (index = 0; index < mcam->bmap_entries; index++) {
+ if (mcam->entry2target_pffunc[index] == pcifunc) {
++ update = true;
++ /* update not needed for the rules added via ntuple filters */
++ list_for_each_entry(rule, &mcam->mcam_rules, list) {
++ if (rule->entry == index)
++ update = false;
++ }
++ if (!update)
++ continue;
+ bank = npc_get_bank(mcam, index);
+ actindex = index;
+ entry = index & (mcam->banksize - 1);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+index 51ddc7b81d0bd..ca404d51d9f56 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+@@ -1098,14 +1098,6 @@ find_rule:
+ write_req.cntr = rule->cntr;
+ }
+
+- err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req,
+- &write_rsp);
+- if (err) {
+- rvu_mcam_remove_counter_from_rule(rvu, owner, rule);
+- if (new)
+- kfree(rule);
+- return err;
+- }
+ /* update rule */
+ memcpy(&rule->packet, &dummy.packet, sizeof(rule->packet));
+ memcpy(&rule->mask, &dummy.mask, sizeof(rule->mask));
+@@ -1129,6 +1121,18 @@ find_rule:
+ if (req->default_rule)
+ pfvf->def_ucast_rule = rule;
+
++ /* write to mcam entry registers */
++ err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req,
++ &write_rsp);
++ if (err) {
++ rvu_mcam_remove_counter_from_rule(rvu, owner, rule);
++ if (new) {
++ list_del(&rule->list);
++ kfree(rule);
++ }
++ return err;
++ }
++
+ /* VF's MAC address is being changed via PF */
+ if (pf_set_vfs_mac) {
+ ether_addr_copy(pfvf->default_mac, req->packet.dmac);
+--
+2.34.1
+
--- /dev/null
+From 9917c059291ea7f257714fd0f3dc222912189ebf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:40 +0530
+Subject: octeontx2-af: Fix LBK backpressure id count
+
+From: Sunil Goutham <sgoutham@marvell.com>
+
+[ Upstream commit 00bfe94e388fe12bfd0d4f6361b1b1343374ff5b ]
+
+In rvu_nix_get_bpid() lbk_bpid_cnt is being read from
+wrong register. Due to this backpressure enable is failing
+for LBK VF32 onwards. This patch fixes that.
+
+Fixes: fe1939bb2340 ("octeontx2-af: Add SDP interface support")
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index 6970540dc4709..8ee324aabf2d6 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -511,11 +511,11 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
+ lmac_chan_cnt = cfg & 0xFF;
+
+- cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+- sdp_chan_cnt = cfg & 0xFFF;
+-
+ cgx_bpid_cnt = hw->cgx_links * lmac_chan_cnt;
+ lbk_bpid_cnt = hw->lbk_links * ((cfg >> 16) & 0xFF);
++
++ cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
++ sdp_chan_cnt = cfg & 0xFFF;
+ sdp_bpid_cnt = hw->sdp_links * sdp_chan_cnt;
+
+ pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+--
+2.34.1
+
--- /dev/null
+From f8fedc91133ecf56b1538eddb4b5f59cdd64f27e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:44 +0530
+Subject: octeontx2-af: Increase link credit restore polling timeout
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit 1581d61b42d985cefe7b71eea67ab3bfcbf34d0f ]
+
+It's been observed that sometimes link credit restore takes
+a lot of time than the current timeout. This patch increases
+the default timeout value and return the proper error value
+on failure.
+
+Fixes: 1c74b89171c3 ("octeontx2-af: Wait for TX link idle for credits change")
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/mbox.h | 1 +
+ drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 4 ++--
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+index a8618259de943..26ad71842b3b2 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+@@ -700,6 +700,7 @@ enum nix_af_status {
+ NIX_AF_ERR_BANDPROF_INVAL_REQ = -428,
+ NIX_AF_ERR_CQ_CTX_WRITE_ERR = -429,
+ NIX_AF_ERR_AQ_CTX_RETRY_WRITE = -430,
++ NIX_AF_ERR_LINK_CREDITS = -431,
+ };
+
+ /* For NIX RX vtag action */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index 9d4cc0ae61474..959266894cf15 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -3891,8 +3891,8 @@ nix_config_link_credits(struct rvu *rvu, int blkaddr, int link,
+ NIX_AF_TL1X_SW_XOFF(schq), BIT_ULL(0));
+ }
+
+- rc = -EBUSY;
+- poll_tmo = jiffies + usecs_to_jiffies(10000);
++ rc = NIX_AF_ERR_LINK_CREDITS;
++ poll_tmo = jiffies + usecs_to_jiffies(200000);
+ /* Wait for credits to return */
+ do {
+ if (time_after(jiffies, poll_tmo))
+--
+2.34.1
+
--- /dev/null
+From b111e1c10fad5cb45c85b2d7ec080e8c37cabf31 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:41 +0530
+Subject: octeontx2-af: Retry until RVU block reset complete
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit 03ffbc9914bd1130fba464f0a41c01372e5fc359 ]
+
+Few RVU blocks like SSO require more time for reset on some
+silicons. Hence retrying the block reset until success.
+
+Fixes: c0fa2cff8822c ("octeontx2-af: Handle return value in block reset")
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+index 90dc5343827f0..11ef46e72ddd9 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+@@ -520,8 +520,11 @@ static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg)
+
+ rvu_write64(rvu, blkaddr, rst_reg, BIT_ULL(0));
+ err = rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true);
+- if (err)
+- dev_err(rvu->dev, "HW block:%d reset failed\n", blkaddr);
++ if (err) {
++ dev_err(rvu->dev, "HW block:%d reset timeout retrying again\n", blkaddr);
++ while (rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true) == -EBUSY)
++ ;
++ }
+ }
+
+ static void rvu_reset_all_blocks(struct rvu *rvu)
+--
+2.34.1
+
--- /dev/null
+From 9f6506cd4e6651d3395b59182ee00509f84a9d66 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Sep 2021 18:40:24 +0530
+Subject: octeontx2-af: verify CQ context updates
+
+From: Hariprasad Kelam <hkelam@marvell.com>
+
+[ Upstream commit 14e94f9445a9e91d460f5d4b519f8892c3fb14bb ]
+
+As per HW errata AQ modification to CQ could be discarded on heavy
+traffic. This patch implements workaround for the same after each
+CQ write by AQ check whether the requested fields (except those
+which HW can update eg: avg_level) are properly updated or not.
+
+If CQ context is not updated then perform AQ write again.
+
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/marvell/octeontx2/af/mbox.h | 2 +
+ .../ethernet/marvell/octeontx2/af/rvu_nix.c | 78 ++++++++++++++++++-
+ 2 files changed, 79 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+index 154877706a0e1..a8618259de943 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+@@ -698,6 +698,8 @@ enum nix_af_status {
+ NIX_AF_ERR_INVALID_BANDPROF = -426,
+ NIX_AF_ERR_IPOLICER_NOTSUPP = -427,
+ NIX_AF_ERR_BANDPROF_INVAL_REQ = -428,
++ NIX_AF_ERR_CQ_CTX_WRITE_ERR = -429,
++ NIX_AF_ERR_AQ_CTX_RETRY_WRITE = -430,
+ };
+
+ /* For NIX RX vtag action */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index 8ee324aabf2d6..9d4cc0ae61474 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -28,6 +28,7 @@ static int nix_verify_bandprof(struct nix_cn10k_aq_enq_req *req,
+ static int nix_free_all_bandprof(struct rvu *rvu, u16 pcifunc);
+ static void nix_clear_ratelimit_aggr(struct rvu *rvu, struct nix_hw *nix_hw,
+ u32 leaf_prof);
++static const char *nix_get_ctx_name(int ctype);
+
+ enum mc_tbl_sz {
+ MC_TBL_SZ_256,
+@@ -1061,10 +1062,68 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
+ return 0;
+ }
+
++static int rvu_nix_verify_aq_ctx(struct rvu *rvu, struct nix_hw *nix_hw,
++ struct nix_aq_enq_req *req, u8 ctype)
++{
++ struct nix_cn10k_aq_enq_req aq_req;
++ struct nix_cn10k_aq_enq_rsp aq_rsp;
++ int rc, word;
++
++ if (req->ctype != NIX_AQ_CTYPE_CQ)
++ return 0;
++
++ rc = nix_aq_context_read(rvu, nix_hw, &aq_req, &aq_rsp,
++ req->hdr.pcifunc, ctype, req->qidx);
++ if (rc) {
++ dev_err(rvu->dev,
++ "%s: Failed to fetch %s%d context of PFFUNC 0x%x\n",
++ __func__, nix_get_ctx_name(ctype), req->qidx,
++ req->hdr.pcifunc);
++ return rc;
++ }
++
++ /* Make copy of original context & mask which are required
++ * for resubmission
++ */
++ memcpy(&aq_req.cq_mask, &req->cq_mask, sizeof(struct nix_cq_ctx_s));
++ memcpy(&aq_req.cq, &req->cq, sizeof(struct nix_cq_ctx_s));
++
++ /* exclude fields which HW can update */
++ aq_req.cq_mask.cq_err = 0;
++ aq_req.cq_mask.wrptr = 0;
++ aq_req.cq_mask.tail = 0;
++ aq_req.cq_mask.head = 0;
++ aq_req.cq_mask.avg_level = 0;
++ aq_req.cq_mask.update_time = 0;
++ aq_req.cq_mask.substream = 0;
++
++ /* Context mask (cq_mask) holds mask value of fields which
++ * are changed in AQ WRITE operation.
++ * for example cq.drop = 0xa;
++ * cq_mask.drop = 0xff;
++ * Below logic performs '&' between cq and cq_mask so that non
++ * updated fields are masked out for request and response
++ * comparison
++ */
++ for (word = 0; word < sizeof(struct nix_cq_ctx_s) / sizeof(u64);
++ word++) {
++ *(u64 *)((u8 *)&aq_rsp.cq + word * 8) &=
++ (*(u64 *)((u8 *)&aq_req.cq_mask + word * 8));
++ *(u64 *)((u8 *)&aq_req.cq + word * 8) &=
++ (*(u64 *)((u8 *)&aq_req.cq_mask + word * 8));
++ }
++
++ if (memcmp(&aq_req.cq, &aq_rsp.cq, sizeof(struct nix_cq_ctx_s)))
++ return NIX_AF_ERR_AQ_CTX_RETRY_WRITE;
++
++ return 0;
++}
++
+ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req,
+ struct nix_aq_enq_rsp *rsp)
+ {
+ struct nix_hw *nix_hw;
++ int err, retries = 5;
+ int blkaddr;
+
+ blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, req->hdr.pcifunc);
+@@ -1075,7 +1134,24 @@ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req,
+ if (!nix_hw)
+ return NIX_AF_ERR_INVALID_NIXBLK;
+
+- return rvu_nix_blk_aq_enq_inst(rvu, nix_hw, req, rsp);
++retry:
++ err = rvu_nix_blk_aq_enq_inst(rvu, nix_hw, req, rsp);
++
++ /* HW errata 'AQ Modification to CQ could be discarded on heavy traffic'
++ * As a work around perfrom CQ context read after each AQ write. If AQ
++ * read shows AQ write is not updated perform AQ write again.
++ */
++ if (!err && req->op == NIX_AQ_INSTOP_WRITE) {
++ err = rvu_nix_verify_aq_ctx(rvu, nix_hw, req, NIX_AQ_CTYPE_CQ);
++ if (err == NIX_AF_ERR_AQ_CTX_RETRY_WRITE) {
++ if (retries--)
++ goto retry;
++ else
++ return NIX_AF_ERR_CQ_CTX_WRITE_ERR;
++ }
++ }
++
++ return err;
+ }
+
+ static const char *nix_get_ctx_name(int ctype)
+--
+2.34.1
+
--- /dev/null
+From 3ce849716925d71e73715c2cee9972c3643202cd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:43 +0530
+Subject: octeontx2-pf: cn10k: Ensure valid pointers are freed to aura
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit c5d731c54a17677939bd59ee8be4ed74d7485ba4 ]
+
+While freeing SQB pointers to aura, driver first memcpy to
+target address and then triggers lmtst operation to free pointer
+to the aura. We need to ensure(by adding dmb barrier)that memcpy
+is finished before pointers are freed to the aura. This patch also
+adds the missing sq context structure entry in debugfs.
+
+Fixes: ef6c8da71eaf ("octeontx2-pf: cn10K: Reserve LMTST lines per core")
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c | 2 ++
+ drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h | 1 +
+ 2 files changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+index 49d822a98adab..f001579569a2b 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+@@ -1131,6 +1131,8 @@ static void print_nix_cn10k_sq_ctx(struct seq_file *m,
+ seq_printf(m, "W3: head_offset\t\t\t%d\nW3: smenq_next_sqb_vld\t\t%d\n\n",
+ sq_ctx->head_offset, sq_ctx->smenq_next_sqb_vld);
+
++ seq_printf(m, "W3: smq_next_sq_vld\t\t%d\nW3: smq_pend\t\t\t%d\n",
++ sq_ctx->smq_next_sq_vld, sq_ctx->smq_pend);
+ seq_printf(m, "W4: next_sqb \t\t\t%llx\n\n", sq_ctx->next_sqb);
+ seq_printf(m, "W5: tail_sqb \t\t\t%llx\n\n", sq_ctx->tail_sqb);
+ seq_printf(m, "W6: smenq_sqb \t\t\t%llx\n\n", sq_ctx->smenq_sqb);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+index a51ecd771d075..637450de189c8 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+@@ -591,6 +591,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
+ size++;
+ tar_addr |= ((size - 1) & 0x7) << 4;
+ }
++ dma_wmb();
+ memcpy((u64 *)lmt_info->lmt_addr, ptrs, sizeof(u64) * num_ptrs);
+ /* Perform LMTST flush */
+ cn10k_lmt_flush(val, tar_addr);
+--
+2.34.1
+
--- /dev/null
+From 447beaed92f63363ca25914e7f0202ba09659a38 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:46 +0530
+Subject: octeontx2-pf: Forward error codes to VF
+
+From: Subbaraya Sundeep <sbhatta@marvell.com>
+
+[ Upstream commit a8db854be28622a2477cb21cdf7f829adbb2c42d ]
+
+PF forwards its VF messages to AF and corresponding
+replies from AF to VF. AF sets proper error code in the
+replies after processing message requests. Currently PF
+checks the error codes in replies and sends invalid
+message to VF. This way VF lacks the information of
+error code set by AF for its messages. This patch
+changes that such that PF simply forwards AF replies
+so that VF can handle error codes.
+
+Fixes: d424b6c02415 ("octeontx2-pf: Enable SRIOV and added VF mbox handling")
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index 53a3e8de1a51e..b1894d4045b8d 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -386,7 +386,12 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf,
+ dst_mdev->msg_size = mbox_hdr->msg_size;
+ dst_mdev->num_msgs = num_msgs;
+ err = otx2_sync_mbox_msg(dst_mbox);
+- if (err) {
++ /* Error code -EIO indicate there is a communication failure
++ * to the AF. Rest of the error codes indicate that AF processed
++ * VF messages and set the error codes in response messages
++ * (if any) so simply forward responses to VF.
++ */
++ if (err == -EIO) {
+ dev_warn(pf->dev,
+ "AF not responding to VF%d messages\n", vf);
+ /* restore PF mbase and exit */
+--
+2.34.1
+
--- /dev/null
+From 4059b9d5d31f6330507d7534fc11bb01670677b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Dec 2021 13:19:52 +0100
+Subject: perf: Fix perf_event_read_local() time
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 09f5e7dc7ad705289e1b1ec065439aa3c42951c4 ]
+
+Time readers that cannot take locks (due to NMI etc..) currently make
+use of perf_event::shadow_ctx_time, which, for that event gives:
+
+ time' = now + (time - timestamp)
+
+or, alternatively arranged:
+
+ time' = time + (now - timestamp)
+
+IOW, the progression of time since the last time the shadow_ctx_time
+was updated.
+
+There's problems with this:
+
+ A) the shadow_ctx_time is per-event, even though the ctx_time it
+ reflects is obviously per context. The direct concequence of this
+ is that the context needs to iterate all events all the time to
+ keep the shadow_ctx_time in sync.
+
+ B) even with the prior point, the context itself might not be active
+ meaning its time should not advance to begin with.
+
+ C) shadow_ctx_time isn't consistently updated when ctx_time is
+
+There are 3 users of this stuff, that suffer differently from this:
+
+ - calc_timer_values()
+ - perf_output_read()
+ - perf_event_update_userpage() /* A */
+
+ - perf_event_read_local() /* A,B */
+
+In particular, perf_output_read() doesn't suffer at all, because it's
+sample driven and hence only relevant when the event is actually
+running.
+
+This same was supposed to be true for perf_event_update_userpage(),
+after all self-monitoring implies the context is active *HOWEVER*, as
+per commit f79256532682 ("perf/core: fix userpage->time_enabled of
+inactive events") this goes wrong when combined with counter
+overcommit, in that case those events that do not get scheduled when
+the context becomes active (task events typically) miss out on the
+EVENT_TIME update and ENABLED time is inflated (for a little while)
+with the time the context was inactive. Once the event gets rotated
+in, this gets corrected, leading to a non-monotonic timeflow.
+
+perf_event_read_local() made things even worse, it can request time at
+any point, suffering all the problems perf_event_update_userpage()
+does and more. Because while perf_event_update_userpage() is limited
+by the context being active, perf_event_read_local() users have no
+such constraint.
+
+Therefore, completely overhaul things and do away with
+perf_event::shadow_ctx_time. Instead have regular context time updates
+keep track of this offset directly and provide perf_event_time_now()
+to complement perf_event_time().
+
+perf_event_time_now() will, in adition to being context wide, also
+take into account if the context is active. For inactive context, it
+will not advance time.
+
+This latter property means the cgroup perf_cgroup_info context needs
+to grow addition state to track this.
+
+Additionally, since all this is strictly per-cpu, we can use barrier()
+to order context activity vs context time.
+
+Fixes: 7d9285e82db5 ("perf/bpf: Extend the perf_event_read_local() interface, a.k.a. "bpf: perf event change needed for subsequent bpf helpers"")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Song Liu <song@kernel.org>
+Tested-by: Namhyung Kim <namhyung@kernel.org>
+Link: https://lkml.kernel.org/r/YcB06DasOBtU0b00@hirez.programming.kicks-ass.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/perf_event.h | 15 +--
+ kernel/events/core.c | 246 ++++++++++++++++++++++---------------
+ 2 files changed, 149 insertions(+), 112 deletions(-)
+
+diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
+index ae1f0c8b75623..6cce33e7e7acc 100644
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -680,18 +680,6 @@ struct perf_event {
+ u64 total_time_running;
+ u64 tstamp;
+
+- /*
+- * timestamp shadows the actual context timing but it can
+- * be safely used in NMI interrupt context. It reflects the
+- * context time as it was when the event was last scheduled in,
+- * or when ctx_sched_in failed to schedule the event because we
+- * run out of PMC.
+- *
+- * ctx_time already accounts for ctx->timestamp. Therefore to
+- * compute ctx_time for a sample, simply add perf_clock().
+- */
+- u64 shadow_ctx_time;
+-
+ struct perf_event_attr attr;
+ u16 header_size;
+ u16 id_header_size;
+@@ -838,6 +826,7 @@ struct perf_event_context {
+ */
+ u64 time;
+ u64 timestamp;
++ u64 timeoffset;
+
+ /*
+ * These fields let us detect when two contexts have both
+@@ -920,6 +909,8 @@ struct bpf_perf_event_data_kern {
+ struct perf_cgroup_info {
+ u64 time;
+ u64 timestamp;
++ u64 timeoffset;
++ int active;
+ };
+
+ struct perf_cgroup {
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 0fe6a65bbd58f..0153f8f972834 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -674,6 +674,23 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state)
+ WRITE_ONCE(event->state, state);
+ }
+
++/*
++ * UP store-release, load-acquire
++ */
++
++#define __store_release(ptr, val) \
++do { \
++ barrier(); \
++ WRITE_ONCE(*(ptr), (val)); \
++} while (0)
++
++#define __load_acquire(ptr) \
++({ \
++ __unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr)); \
++ barrier(); \
++ ___p; \
++})
++
+ #ifdef CONFIG_CGROUP_PERF
+
+ static inline bool
+@@ -719,34 +736,51 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
+ return t->time;
+ }
+
+-static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
++static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
+ {
+- struct perf_cgroup_info *info;
+- u64 now;
+-
+- now = perf_clock();
++ struct perf_cgroup_info *t;
+
+- info = this_cpu_ptr(cgrp->info);
++ t = per_cpu_ptr(event->cgrp->info, event->cpu);
++ if (!__load_acquire(&t->active))
++ return t->time;
++ now += READ_ONCE(t->timeoffset);
++ return now;
++}
+
+- info->time += now - info->timestamp;
++static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv)
++{
++ if (adv)
++ info->time += now - info->timestamp;
+ info->timestamp = now;
++ /*
++ * see update_context_time()
++ */
++ WRITE_ONCE(info->timeoffset, info->time - info->timestamp);
+ }
+
+-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
++static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final)
+ {
+ struct perf_cgroup *cgrp = cpuctx->cgrp;
+ struct cgroup_subsys_state *css;
++ struct perf_cgroup_info *info;
+
+ if (cgrp) {
++ u64 now = perf_clock();
++
+ for (css = &cgrp->css; css; css = css->parent) {
+ cgrp = container_of(css, struct perf_cgroup, css);
+- __update_cgrp_time(cgrp);
++ info = this_cpu_ptr(cgrp->info);
++
++ __update_cgrp_time(info, now, true);
++ if (final)
++ __store_release(&info->active, 0);
+ }
+ }
+ }
+
+ static inline void update_cgrp_time_from_event(struct perf_event *event)
+ {
++ struct perf_cgroup_info *info;
+ struct perf_cgroup *cgrp;
+
+ /*
+@@ -760,8 +794,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
+ /*
+ * Do not update time when cgroup is not active
+ */
+- if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
+- __update_cgrp_time(event->cgrp);
++ if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) {
++ info = this_cpu_ptr(event->cgrp->info);
++ __update_cgrp_time(info, perf_clock(), true);
++ }
+ }
+
+ static inline void
+@@ -785,7 +821,8 @@ perf_cgroup_set_timestamp(struct task_struct *task,
+ for (css = &cgrp->css; css; css = css->parent) {
+ cgrp = container_of(css, struct perf_cgroup, css);
+ info = this_cpu_ptr(cgrp->info);
+- info->timestamp = ctx->timestamp;
++ __update_cgrp_time(info, ctx->timestamp, false);
++ __store_release(&info->active, 1);
+ }
+ }
+
+@@ -981,14 +1018,6 @@ out:
+ return ret;
+ }
+
+-static inline void
+-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
+-{
+- struct perf_cgroup_info *t;
+- t = per_cpu_ptr(event->cgrp->info, event->cpu);
+- event->shadow_ctx_time = now - t->timestamp;
+-}
+-
+ static inline void
+ perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
+ {
+@@ -1066,7 +1095,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
+ {
+ }
+
+-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
++static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
++ bool final)
+ {
+ }
+
+@@ -1098,12 +1128,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
+ {
+ }
+
+-static inline void
+-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
++static inline u64 perf_cgroup_event_time(struct perf_event *event)
+ {
++ return 0;
+ }
+
+-static inline u64 perf_cgroup_event_time(struct perf_event *event)
++static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
+ {
+ return 0;
+ }
+@@ -1525,22 +1555,59 @@ static void perf_unpin_context(struct perf_event_context *ctx)
+ /*
+ * Update the record of the current time in a context.
+ */
+-static void update_context_time(struct perf_event_context *ctx)
++static void __update_context_time(struct perf_event_context *ctx, bool adv)
+ {
+ u64 now = perf_clock();
+
+- ctx->time += now - ctx->timestamp;
++ if (adv)
++ ctx->time += now - ctx->timestamp;
+ ctx->timestamp = now;
++
++ /*
++ * The above: time' = time + (now - timestamp), can be re-arranged
++ * into: time` = now + (time - timestamp), which gives a single value
++ * offset to compute future time without locks on.
++ *
++ * See perf_event_time_now(), which can be used from NMI context where
++ * it's (obviously) not possible to acquire ctx->lock in order to read
++ * both the above values in a consistent manner.
++ */
++ WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp);
++}
++
++static void update_context_time(struct perf_event_context *ctx)
++{
++ __update_context_time(ctx, true);
+ }
+
+ static u64 perf_event_time(struct perf_event *event)
+ {
+ struct perf_event_context *ctx = event->ctx;
+
++ if (unlikely(!ctx))
++ return 0;
++
+ if (is_cgroup_event(event))
+ return perf_cgroup_event_time(event);
+
+- return ctx ? ctx->time : 0;
++ return ctx->time;
++}
++
++static u64 perf_event_time_now(struct perf_event *event, u64 now)
++{
++ struct perf_event_context *ctx = event->ctx;
++
++ if (unlikely(!ctx))
++ return 0;
++
++ if (is_cgroup_event(event))
++ return perf_cgroup_event_time_now(event, now);
++
++ if (!(__load_acquire(&ctx->is_active) & EVENT_TIME))
++ return ctx->time;
++
++ now += READ_ONCE(ctx->timeoffset);
++ return now;
+ }
+
+ static enum event_type_t get_event_type(struct perf_event *event)
+@@ -2346,7 +2413,7 @@ __perf_remove_from_context(struct perf_event *event,
+
+ if (ctx->is_active & EVENT_TIME) {
+ update_context_time(ctx);
+- update_cgrp_time_from_cpuctx(cpuctx);
++ update_cgrp_time_from_cpuctx(cpuctx, false);
+ }
+
+ event_sched_out(event, cpuctx, ctx);
+@@ -2357,6 +2424,9 @@ __perf_remove_from_context(struct perf_event *event,
+ list_del_event(event, ctx);
+
+ if (!ctx->nr_events && ctx->is_active) {
++ if (ctx == &cpuctx->ctx)
++ update_cgrp_time_from_cpuctx(cpuctx, true);
++
+ ctx->is_active = 0;
+ ctx->rotate_necessary = 0;
+ if (ctx->task) {
+@@ -2478,40 +2548,6 @@ void perf_event_disable_inatomic(struct perf_event *event)
+ irq_work_queue(&event->pending);
+ }
+
+-static void perf_set_shadow_time(struct perf_event *event,
+- struct perf_event_context *ctx)
+-{
+- /*
+- * use the correct time source for the time snapshot
+- *
+- * We could get by without this by leveraging the
+- * fact that to get to this function, the caller
+- * has most likely already called update_context_time()
+- * and update_cgrp_time_xx() and thus both timestamp
+- * are identical (or very close). Given that tstamp is,
+- * already adjusted for cgroup, we could say that:
+- * tstamp - ctx->timestamp
+- * is equivalent to
+- * tstamp - cgrp->timestamp.
+- *
+- * Then, in perf_output_read(), the calculation would
+- * work with no changes because:
+- * - event is guaranteed scheduled in
+- * - no scheduled out in between
+- * - thus the timestamp would be the same
+- *
+- * But this is a bit hairy.
+- *
+- * So instead, we have an explicit cgroup call to remain
+- * within the time source all along. We believe it
+- * is cleaner and simpler to understand.
+- */
+- if (is_cgroup_event(event))
+- perf_cgroup_set_shadow_time(event, event->tstamp);
+- else
+- event->shadow_ctx_time = event->tstamp - ctx->timestamp;
+-}
+-
+ #define MAX_INTERRUPTS (~0ULL)
+
+ static void perf_log_throttle(struct perf_event *event, int enable);
+@@ -2552,8 +2588,6 @@ event_sched_in(struct perf_event *event,
+
+ perf_pmu_disable(event->pmu);
+
+- perf_set_shadow_time(event, ctx);
+-
+ perf_log_itrace_start(event);
+
+ if (event->pmu->add(event, PERF_EF_START)) {
+@@ -3247,16 +3281,6 @@ static void ctx_sched_out(struct perf_event_context *ctx,
+ return;
+ }
+
+- ctx->is_active &= ~event_type;
+- if (!(ctx->is_active & EVENT_ALL))
+- ctx->is_active = 0;
+-
+- if (ctx->task) {
+- WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+- if (!ctx->is_active)
+- cpuctx->task_ctx = NULL;
+- }
+-
+ /*
+ * Always update time if it was set; not only when it changes.
+ * Otherwise we can 'forget' to update time for any but the last
+@@ -3270,7 +3294,22 @@ static void ctx_sched_out(struct perf_event_context *ctx,
+ if (is_active & EVENT_TIME) {
+ /* update (and stop) ctx time */
+ update_context_time(ctx);
+- update_cgrp_time_from_cpuctx(cpuctx);
++ update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx);
++ /*
++ * CPU-release for the below ->is_active store,
++ * see __load_acquire() in perf_event_time_now()
++ */
++ barrier();
++ }
++
++ ctx->is_active &= ~event_type;
++ if (!(ctx->is_active & EVENT_ALL))
++ ctx->is_active = 0;
++
++ if (ctx->task) {
++ WARN_ON_ONCE(cpuctx->task_ctx != ctx);
++ if (!ctx->is_active)
++ cpuctx->task_ctx = NULL;
+ }
+
+ is_active ^= ctx->is_active; /* changed bits */
+@@ -3707,13 +3746,19 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
+ return 0;
+ }
+
++/*
++ * Because the userpage is strictly per-event (there is no concept of context,
++ * so there cannot be a context indirection), every userpage must be updated
++ * when context time starts :-(
++ *
++ * IOW, we must not miss EVENT_TIME edges.
++ */
+ static inline bool event_update_userpage(struct perf_event *event)
+ {
+ if (likely(!atomic_read(&event->mmap_count)))
+ return false;
+
+ perf_event_update_time(event);
+- perf_set_shadow_time(event, event->ctx);
+ perf_event_update_userpage(event);
+
+ return true;
+@@ -3797,13 +3842,23 @@ ctx_sched_in(struct perf_event_context *ctx,
+ struct task_struct *task)
+ {
+ int is_active = ctx->is_active;
+- u64 now;
+
+ lockdep_assert_held(&ctx->lock);
+
+ if (likely(!ctx->nr_events))
+ return;
+
++ if (is_active ^ EVENT_TIME) {
++ /* start ctx time */
++ __update_context_time(ctx, false);
++ perf_cgroup_set_timestamp(task, ctx);
++ /*
++ * CPU-release for the below ->is_active store,
++ * see __load_acquire() in perf_event_time_now()
++ */
++ barrier();
++ }
++
+ ctx->is_active |= (event_type | EVENT_TIME);
+ if (ctx->task) {
+ if (!is_active)
+@@ -3814,13 +3869,6 @@ ctx_sched_in(struct perf_event_context *ctx,
+
+ is_active ^= ctx->is_active; /* changed bits */
+
+- if (is_active & EVENT_TIME) {
+- /* start ctx time */
+- now = perf_clock();
+- ctx->timestamp = now;
+- perf_cgroup_set_timestamp(task, ctx);
+- }
+-
+ /*
+ * First go through the list and put on any pinned groups
+ * in order to give them the best chance of going on.
+@@ -4414,6 +4462,18 @@ static inline u64 perf_event_count(struct perf_event *event)
+ return local64_read(&event->count) + atomic64_read(&event->child_count);
+ }
+
++static void calc_timer_values(struct perf_event *event,
++ u64 *now,
++ u64 *enabled,
++ u64 *running)
++{
++ u64 ctx_time;
++
++ *now = perf_clock();
++ ctx_time = perf_event_time_now(event, *now);
++ __perf_update_times(event, ctx_time, enabled, running);
++}
++
+ /*
+ * NMI-safe method to read a local event, that is an event that
+ * is:
+@@ -4473,10 +4533,9 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
+
+ *value = local64_read(&event->count);
+ if (enabled || running) {
+- u64 now = event->shadow_ctx_time + perf_clock();
+- u64 __enabled, __running;
++ u64 __enabled, __running, __now;;
+
+- __perf_update_times(event, now, &__enabled, &__running);
++ calc_timer_values(event, &__now, &__enabled, &__running);
+ if (enabled)
+ *enabled = __enabled;
+ if (running)
+@@ -5798,18 +5857,6 @@ static int perf_event_index(struct perf_event *event)
+ return event->pmu->event_idx(event);
+ }
+
+-static void calc_timer_values(struct perf_event *event,
+- u64 *now,
+- u64 *enabled,
+- u64 *running)
+-{
+- u64 ctx_time;
+-
+- *now = perf_clock();
+- ctx_time = event->shadow_ctx_time + *now;
+- __perf_update_times(event, ctx_time, enabled, running);
+-}
+-
+ static void perf_event_init_userpage(struct perf_event *event)
+ {
+ struct perf_event_mmap_page *userpg;
+@@ -6349,7 +6396,6 @@ accounting:
+ ring_buffer_attach(event, rb);
+
+ perf_event_update_time(event);
+- perf_set_shadow_time(event, event->ctx);
+ perf_event_init_userpage(event);
+ perf_event_update_userpage(event);
+ } else {
+--
+2.34.1
+
--- /dev/null
+From c7a186d60eebdacf2d9454601c5929562826e865 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jan 2022 17:27:48 +0100
+Subject: phylib: fix potential use-after-free
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Marek Behún <kabel@kernel.org>
+
+[ Upstream commit cbda1b16687580d5beee38273f6241ae3725960c ]
+
+Commit bafbdd527d56 ("phylib: Add device reset GPIO support") added call
+to phy_device_reset(phydev) after the put_device() call in phy_detach().
+
+The comment before the put_device() call says that the phydev might go
+away with put_device().
+
+Fix potential use-after-free by calling phy_device_reset() before
+put_device().
+
+Fixes: bafbdd527d56 ("phylib: Add device reset GPIO support")
+Signed-off-by: Marek Behún <kabel@kernel.org>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/20220119162748.32418-1-kabel@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/phy_device.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
+index 4f9990b47a377..28f4a383aba72 100644
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -1746,6 +1746,9 @@ void phy_detach(struct phy_device *phydev)
+ phy_driver_is_genphy_10g(phydev))
+ device_release_driver(&phydev->mdio.dev);
+
++ /* Assert the reset signal */
++ phy_device_reset(phydev, 1);
++
+ /*
+ * The phydev might go away on the put_device() below, so avoid
+ * a use-after-free bug by reading the underlying bus first.
+@@ -1757,9 +1760,6 @@ void phy_detach(struct phy_device *phydev)
+ ndev_owner = dev->dev.parent->driver->owner;
+ if (ndev_owner != bus->owner)
+ module_put(bus->owner);
+-
+- /* Assert the reset signal */
+- phy_device_reset(phydev, 1);
+ }
+ EXPORT_SYMBOL(phy_detach);
+
+--
+2.34.1
+
--- /dev/null
+From 7129d076879177c945633bc47433edbb47d7bcd6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jan 2022 23:44:03 +1000
+Subject: powerpc/64s: Mask SRR0 before checking against the masked NIP
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+[ Upstream commit aee101d7b95a03078945681dd7f7ea5e4a1e7686 ]
+
+Commit 314f6c23dd8d ("powerpc/64s: Mask NIP before checking against
+SRR0") masked off the low 2 bits of the NIP value in the interrupt
+stack frame in case they are non-zero and mis-compare against a SRR0
+register value of a CPU which always reads back 0 from the 2 low bits
+which are reserved.
+
+This now causes the opposite problem that an implementation which does
+implement those bits in SRR0 will mis-compare against the masked NIP
+value in which they have been cleared. QEMU is one such implementation,
+and this is allowed by the architecture.
+
+This can be triggered by sigfuz by setting low bits of PT_NIP in the
+signal context.
+
+Fix this for now by masking the SRR0 bits as well. Cleaner is probably
+to sanitise these values before putting them in registers or stack, but
+this is the quick and backportable fix.
+
+Fixes: 314f6c23dd8d ("powerpc/64s: Mask NIP before checking against SRR0")
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220117134403.2995059-1-npiggin@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/kernel/interrupt_64.S | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
+index 4b1ff94e67eb4..4c6d1a8dcefed 100644
+--- a/arch/powerpc/kernel/interrupt_64.S
++++ b/arch/powerpc/kernel/interrupt_64.S
+@@ -30,6 +30,7 @@ COMPAT_SYS_CALL_TABLE:
+ .ifc \srr,srr
+ mfspr r11,SPRN_SRR0
+ ld r12,_NIP(r1)
++ clrrdi r11,r11,2
+ clrrdi r12,r12,2
+ 100: tdne r11,r12
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+@@ -40,6 +41,7 @@ COMPAT_SYS_CALL_TABLE:
+ .else
+ mfspr r11,SPRN_HSRR0
+ ld r12,_NIP(r1)
++ clrrdi r11,r11,2
+ clrrdi r12,r12,2
+ 100: tdne r11,r12
+ EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+--
+2.34.1
+
--- /dev/null
+From c94c276e869eb8084d695854f48475aa0df1e018 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jan 2022 17:15:12 +0530
+Subject: powerpc64/bpf: Limit 'ldbrx' to processors compliant with ISA v2.06
+
+From: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+
+[ Upstream commit 3f5f766d5f7f95a69a630da3544a1a0cee1cdddf ]
+
+Johan reported the below crash with test_bpf on ppc64 e5500:
+
+ test_bpf: #296 ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301 jited:1
+ Oops: Exception in kernel mode, sig: 4 [#1]
+ BE PAGE_SIZE=4K SMP NR_CPUS=24 QEMU e500
+ Modules linked in: test_bpf(+)
+ CPU: 0 PID: 76 Comm: insmod Not tainted 5.14.0-03771-g98c2059e008a-dirty #1
+ NIP: 8000000000061c3c LR: 80000000006dea64 CTR: 8000000000061c18
+ REGS: c0000000032d3420 TRAP: 0700 Not tainted (5.14.0-03771-g98c2059e008a-dirty)
+ MSR: 0000000080089000 <EE,ME> CR: 88002822 XER: 20000000 IRQMASK: 0
+ <...>
+ NIP [8000000000061c3c] 0x8000000000061c3c
+ LR [80000000006dea64] .__run_one+0x104/0x17c [test_bpf]
+ Call Trace:
+ .__run_one+0x60/0x17c [test_bpf] (unreliable)
+ .test_bpf_init+0x6a8/0xdc8 [test_bpf]
+ .do_one_initcall+0x6c/0x28c
+ .do_init_module+0x68/0x28c
+ .load_module+0x2460/0x2abc
+ .__do_sys_init_module+0x120/0x18c
+ .system_call_exception+0x110/0x1b8
+ system_call_common+0xf0/0x210
+ --- interrupt: c00 at 0x101d0acc
+ <...>
+ ---[ end trace 47b2bf19090bb3d0 ]---
+
+ Illegal instruction
+
+The illegal instruction turned out to be 'ldbrx' emitted for
+BPF_FROM_[L|B]E, which was only introduced in ISA v2.06. Guard use of
+the same and implement an alternative approach for older processors.
+
+Fixes: 156d0e290e969c ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF")
+Reported-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
+Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Tested-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
+Acked-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/d1e51c6fdf572062cf3009a751c3406bda01b832.1641468127.git.naveen.n.rao@linux.vnet.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/ppc-opcode.h | 1 +
+ arch/powerpc/net/bpf_jit_comp64.c | 22 +++++++++++++---------
+ 2 files changed, 14 insertions(+), 9 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
+index baea657bc8687..bca31a61e57f8 100644
+--- a/arch/powerpc/include/asm/ppc-opcode.h
++++ b/arch/powerpc/include/asm/ppc-opcode.h
+@@ -498,6 +498,7 @@
+ #define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+ #define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+ #define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
++#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+ #define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+ #define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+ #define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i))
+diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
+index 95a337b5dc2b4..57e1b6680365c 100644
+--- a/arch/powerpc/net/bpf_jit_comp64.c
++++ b/arch/powerpc/net/bpf_jit_comp64.c
+@@ -633,17 +633,21 @@ bpf_alu32_trunc:
+ EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
+ break;
+ case 64:
+- /*
+- * Way easier and faster(?) to store the value
+- * into stack and then use ldbrx
+- *
+- * ctx->seen will be reliable in pass2, but
+- * the instructions generated will remain the
+- * same across all passes
+- */
++ /* Store the value to stack and then use byte-reverse loads */
+ PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
+ EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)));
+- EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
++ if (cpu_has_feature(CPU_FTR_ARCH_206)) {
++ EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
++ } else {
++ EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1]));
++ if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
++ EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32));
++ EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4));
++ EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1]));
++ if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
++ EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32));
++ EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2]));
++ }
+ break;
+ }
+ break;
+--
+2.34.1
+
--- /dev/null
+From 1cbb4005d9ae8dbb89b79eff4f521438ac714bf6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 23:12:58 +0000
+Subject: rxrpc: Adjust retransmission backoff
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 2c13c05c5ff4b9fc907b07f7311821910ebaaf8a ]
+
+Improve retransmission backoff by only backing off when we retransmit data
+packets rather than when we set the lost ack timer.
+
+To this end:
+
+ (1) In rxrpc_resend(), use rxrpc_get_rto_backoff() when setting the
+ retransmission timer and only tell it that we are retransmitting if we
+ actually have things to retransmit.
+
+ Note that it's possible for the retransmission algorithm to race with
+ the processing of a received ACK, so we may see no packets needing
+ retransmission.
+
+ (2) In rxrpc_send_data_packet(), don't bump the backoff when setting the
+ ack_lost_at timer, as it may then get bumped twice.
+
+With this, when looking at one particular packet, the retransmission
+intervals were seen to be 1.5ms, 2ms, 3ms, 5ms, 9ms, 17ms, 33ms, 71ms,
+136ms, 264ms, 544ms, 1.088s, 2.1s, 4.2s and 8.3s.
+
+Fixes: c410bf01933e ("rxrpc: Fix the excessive initial retransmission timeout")
+Suggested-by: Marc Dionne <marc.dionne@auristor.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
+Tested-by: Marc Dionne <marc.dionne@auristor.com>
+cc: linux-afs@lists.infradead.org
+Link: https://lore.kernel.org/r/164138117069.2023386.17446904856843997127.stgit@warthog.procyon.org.uk/
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rxrpc/call_event.c | 8 +++-----
+ net/rxrpc/output.c | 2 +-
+ 2 files changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
+index 6be2672a65eab..df864e6922679 100644
+--- a/net/rxrpc/call_event.c
++++ b/net/rxrpc/call_event.c
+@@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
+ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+ {
+ struct sk_buff *skb;
+- unsigned long resend_at, rto_j;
++ unsigned long resend_at;
+ rxrpc_seq_t cursor, seq, top;
+ ktime_t now, max_age, oldest, ack_ts;
+ int ix;
+@@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+
+ _enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
+
+- rto_j = call->peer->rto_j;
+-
+ now = ktime_get_real();
+- max_age = ktime_sub(now, jiffies_to_usecs(rto_j));
++ max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j));
+
+ spin_lock_bh(&call->lock);
+
+@@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+ }
+
+ resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
+- resend_at += jiffies + rto_j;
++ resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans);
+ WRITE_ONCE(call->resend_at, resend_at);
+
+ if (unacked)
+diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
+index 10f2bf2e9068a..a45c83f22236e 100644
+--- a/net/rxrpc/output.c
++++ b/net/rxrpc/output.c
+@@ -468,7 +468,7 @@ done:
+ if (call->peer->rtt_count > 1) {
+ unsigned long nowj = jiffies, ack_lost_at;
+
+- ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans);
++ ack_lost_at = rxrpc_get_rto_backoff(call->peer, false);
+ ack_lost_at += nowj;
+ WRITE_ONCE(call->ack_lost_at, ack_lost_at);
+ rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
+--
+2.34.1
+
--- /dev/null
+From b19759818555424de7d26fde6024f936d388f140 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jan 2022 14:46:56 +0100
+Subject: sched/pelt: Relax the sync of util_sum with util_avg
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+[ Upstream commit 98b0d890220d45418cfbc5157b3382e6da5a12ab ]
+
+Rick reported performance regressions in bugzilla because of cpu frequency
+being lower than before:
+ https://bugzilla.kernel.org/show_bug.cgi?id=215045
+
+He bisected the problem to:
+commit 1c35b07e6d39 ("sched/fair: Ensure _sum and _avg values stay consistent")
+
+This commit forces util_sum to be synced with the new util_avg after
+removing the contribution of a task and before the next periodic sync. By
+doing so util_sum is rounded to its lower bound and might lost up to
+LOAD_AVG_MAX-1 of accumulated contribution which has not yet been
+reflected in util_avg.
+
+Instead of always setting util_sum to the low bound of util_avg, which can
+significantly lower the utilization of root cfs_rq after propagating the
+change down into the hierarchy, we revert the change of util_sum and
+propagate the difference.
+
+In addition, we also check that cfs's util_sum always stays above the
+lower bound for a given util_avg as it has been observed that
+sched_entity's util_sum is sometimes above cfs one.
+
+Fixes: 1c35b07e6d39 ("sched/fair: Ensure _sum and _avg values stay consistent")
+Reported-by: Rick Yiu <rickyiu@google.com>
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Tested-by: Sachin Sant <sachinp@linux.ibm.com>
+Link: https://lkml.kernel.org/r/20220111134659.24961-2-vincent.guittot@linaro.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 16 +++++++++++++---
+ kernel/sched/pelt.h | 4 +++-
+ 2 files changed, 16 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index d41f966f5866a..6420580f2730b 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -3422,7 +3422,6 @@ void set_task_rq_fair(struct sched_entity *se,
+ se->avg.last_update_time = n_last_update_time;
+ }
+
+-
+ /*
+ * When on migration a sched_entity joins/leaves the PELT hierarchy, we need to
+ * propagate its contribution. The key to this propagation is the invariant
+@@ -3490,7 +3489,6 @@ void set_task_rq_fair(struct sched_entity *se,
+ * XXX: only do this for the part of runnable > running ?
+ *
+ */
+-
+ static inline void
+ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
+ {
+@@ -3722,7 +3720,19 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
+
+ r = removed_util;
+ sub_positive(&sa->util_avg, r);
+- sa->util_sum = sa->util_avg * divider;
++ sub_positive(&sa->util_sum, r * divider);
++ /*
++ * Because of rounding, se->util_sum might ends up being +1 more than
++ * cfs->util_sum. Although this is not a problem by itself, detaching
++ * a lot of tasks with the rounding problem between 2 updates of
++ * util_avg (~1ms) can make cfs->util_sum becoming null whereas
++ * cfs_util_avg is not.
++ * Check that util_sum is still above its lower bound for the new
++ * util_avg. Given that period_contrib might have moved since the last
++ * sync, we are only sure that util_sum must be above or equal to
++ * util_avg * minimum possible divider
++ */
++ sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * PELT_MIN_DIVIDER);
+
+ r = removed_runnable;
+ sub_positive(&sa->runnable_avg, r);
+diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
+index e06071bf3472c..c336f5f481bca 100644
+--- a/kernel/sched/pelt.h
++++ b/kernel/sched/pelt.h
+@@ -37,9 +37,11 @@ update_irq_load_avg(struct rq *rq, u64 running)
+ }
+ #endif
+
++#define PELT_MIN_DIVIDER (LOAD_AVG_MAX - 1024)
++
+ static inline u32 get_pelt_divider(struct sched_avg *avg)
+ {
+- return LOAD_AVG_MAX - 1024 + avg->period_contrib;
++ return PELT_MIN_DIVIDER + avg->period_contrib;
+ }
+
+ static inline void cfs_se_util_change(struct sched_avg *avg)
+--
+2.34.1
+
--- /dev/null
+From e88ab3d1eeb91c03f3d87442859ad933f44f0107 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jan 2022 16:35:29 -0800
+Subject: selftests: mptcp: fix ipv6 routing setup
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 9846921dba4936d92f7608315b5d1e0a8ec3a538 ]
+
+MPJ ipv6 selftests currently lack per link route to the server
+net. Additionally, ipv6 subflows endpoints are created without any
+interface specified. The end-result is that in ipv6 self-tests
+subflows are created all on the same link, leading to expected delays
+and sporadic self-tests failures.
+
+Fix the issue by adding the missing setup bits.
+
+Fixes: 523514ed0a99 ("selftests: mptcp: add ADD_ADDR IPv6 test cases")
+Reported-and-tested-by: Geliang Tang <geliang.tang@suse.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_join.sh | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index 0c12602fa22e8..38777d1ef766f 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -75,6 +75,7 @@ init()
+
+ # let $ns2 reach any $ns1 address from any interface
+ ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
++ ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i
+ done
+ }
+
+@@ -1386,7 +1387,7 @@ ipv6_tests()
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
++ ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
+ run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+ chk_join_nr "single subflow IPv6" 1 1 1
+
+@@ -1421,7 +1422,7 @@ ipv6_tests()
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
++ ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
+ run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
+ chk_join_nr "remove subflow and signal IPv6" 2 2 2
+ chk_add_nr 1 1
+--
+2.34.1
+
rpmsg-char-fix-race-between-the-release-of-rpmsg_eptdev-and-cdev.patch
scsi-elx-efct-don-t-use-gfp_kernel-under-spin-lock.patch
scsi-bnx2fc-flush-destroy_work-queue-before-calling-bnx2fc_interface_put.patch
+nfs-ensure-the-server-has-an-up-to-date-ctime-before.patch
+nfs-ensure-the-server-has-an-up-to-date-ctime-before.patch-14723
+nfsv4-handle-case-where-the-lookup-of-a-directory-fa.patch
+nfsv4-nfs_atomic_open-can-race-when-looking-up-a-non.patch
+kvm-arm64-pkvm-use-the-mm_ops-indirection-for-cache-.patch
+sunrpc-use-bit-macro-in-rpc_show_xprt_state.patch
+sunrpc-don-t-dereference-xprt-snd_task-if-it-s-a-coo.patch
+powerpc64-bpf-limit-ldbrx-to-processors-compliant-wi.patch
+netfilter-conntrack-don-t-increment-invalid-counter-.patch
+powerpc-64s-mask-srr0-before-checking-against-the-ma.patch
+perf-fix-perf_event_read_local-time.patch
+sched-pelt-relax-the-sync-of-util_sum-with-util_avg.patch
+arm-9170-1-fix-panic-when-kasan-and-kprobe-are-enabl.patch
+net-fix-information-leakage-in-proc-net-ptype.patch
+net-phy-broadcom-hook-up-soft_reset-for-bcm54616s.patch
+ipv6_tunnel-rate-limit-warning-messages.patch
+net-stmmac-dwmac-visconti-fix-bit-definitions-for-et.patch
+net-stmmac-dwmac-visconti-fix-clock-configuration-fo.patch
+phylib-fix-potential-use-after-free.patch
+ipv6-annotate-accesses-to-fn-fn_sernum.patch
+mptcp-allow-changing-the-backup-bit-by-endpoint-id.patch
+mptcp-clean-up-harmless-false-expressions.patch
+mptcp-keep-track-of-local-endpoint-still-available-f.patch
+mptcp-fix-msk-traversal-in-mptcp_nl_cmd_set_flags.patch
+mptcp-fix-removing-ids-bitmap-setting.patch
+selftests-mptcp-fix-ipv6-routing-setup.patch
+octeontx2-af-do-not-fixup-all-vf-action-entries.patch
+octeontx2-af-fix-lbk-backpressure-id-count.patch
+octeontx2-af-retry-until-rvu-block-reset-complete.patch
+octeontx2-pf-cn10k-ensure-valid-pointers-are-freed-t.patch
+octeontx2-af-verify-cq-context-updates.patch
+octeontx2-af-increase-link-credit-restore-polling-ti.patch
+octeontx2-af-cn10k-do-not-enable-rpm-loopback-for-lp.patch
+octeontx2-pf-forward-error-codes-to-vf.patch
+rxrpc-adjust-retransmission-backoff.patch
+efi-libstub-arm64-fix-image-check-alignment-at-entry.patch
+io_uring-fix-bug-in-slow-unregistering-of-nodes.patch
--- /dev/null
+From 163d8c8237eecaebd61404f0f4f6b7ae73e969bd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jan 2022 12:20:36 -0500
+Subject: SUNRPC: Don't dereference xprt->snd_task if it's a cookie
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit aed28b7a2d620cb5cd0c554cb889075c02e25e8e ]
+
+Fixes: e26d9972720e ("SUNRPC: Clean up scheduling of autoclose")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/sunrpc.h | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
+index 312507cb341f4..daaf407e9e494 100644
+--- a/include/trace/events/sunrpc.h
++++ b/include/trace/events/sunrpc.h
+@@ -936,7 +936,8 @@ TRACE_EVENT(rpc_socket_nospace,
+ { BIT(XPRT_REMOVE), "REMOVE" }, \
+ { BIT(XPRT_CONGESTED), "CONGESTED" }, \
+ { BIT(XPRT_CWND_WAIT), "CWND_WAIT" }, \
+- { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" })
++ { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" }, \
++ { BIT(XPRT_SND_IS_COOKIE), "SND_IS_COOKIE" })
+
+ DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class,
+ TP_PROTO(
+@@ -1133,8 +1134,11 @@ DECLARE_EVENT_CLASS(xprt_writelock_event,
+ __entry->task_id = -1;
+ __entry->client_id = -1;
+ }
+- __entry->snd_task_id = xprt->snd_task ?
+- xprt->snd_task->tk_pid : -1;
++ if (xprt->snd_task &&
++ !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
++ __entry->snd_task_id = xprt->snd_task->tk_pid;
++ else
++ __entry->snd_task_id = -1;
+ ),
+
+ TP_printk("task:%u@%u snd_task:%u",
+@@ -1178,8 +1182,12 @@ DECLARE_EVENT_CLASS(xprt_cong_event,
+ __entry->task_id = -1;
+ __entry->client_id = -1;
+ }
+- __entry->snd_task_id = xprt->snd_task ?
+- xprt->snd_task->tk_pid : -1;
++ if (xprt->snd_task &&
++ !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
++ __entry->snd_task_id = xprt->snd_task->tk_pid;
++ else
++ __entry->snd_task_id = -1;
++
+ __entry->cong = xprt->cong;
+ __entry->cwnd = xprt->cwnd;
+ __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
+--
+2.34.1
+
--- /dev/null
+From e4e0ab17ef8c18fafa1713b3aeb1163368e612dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Oct 2021 18:02:38 -0400
+Subject: SUNRPC: Use BIT() macro in rpc_show_xprt_state()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 76497b1adb89175eee85afc437f08a68247314b3 ]
+
+Clean up: BIT() is preferred over open-coding the shift.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/sunrpc.h | 24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
+index 2d04eb96d4183..312507cb341f4 100644
+--- a/include/trace/events/sunrpc.h
++++ b/include/trace/events/sunrpc.h
+@@ -925,18 +925,18 @@ TRACE_EVENT(rpc_socket_nospace,
+
+ #define rpc_show_xprt_state(x) \
+ __print_flags(x, "|", \
+- { (1UL << XPRT_LOCKED), "LOCKED"}, \
+- { (1UL << XPRT_CONNECTED), "CONNECTED"}, \
+- { (1UL << XPRT_CONNECTING), "CONNECTING"}, \
+- { (1UL << XPRT_CLOSE_WAIT), "CLOSE_WAIT"}, \
+- { (1UL << XPRT_BOUND), "BOUND"}, \
+- { (1UL << XPRT_BINDING), "BINDING"}, \
+- { (1UL << XPRT_CLOSING), "CLOSING"}, \
+- { (1UL << XPRT_OFFLINE), "OFFLINE"}, \
+- { (1UL << XPRT_REMOVE), "REMOVE"}, \
+- { (1UL << XPRT_CONGESTED), "CONGESTED"}, \
+- { (1UL << XPRT_CWND_WAIT), "CWND_WAIT"}, \
+- { (1UL << XPRT_WRITE_SPACE), "WRITE_SPACE"})
++ { BIT(XPRT_LOCKED), "LOCKED" }, \
++ { BIT(XPRT_CONNECTED), "CONNECTED" }, \
++ { BIT(XPRT_CONNECTING), "CONNECTING" }, \
++ { BIT(XPRT_CLOSE_WAIT), "CLOSE_WAIT" }, \
++ { BIT(XPRT_BOUND), "BOUND" }, \
++ { BIT(XPRT_BINDING), "BINDING" }, \
++ { BIT(XPRT_CLOSING), "CLOSING" }, \
++ { BIT(XPRT_OFFLINE), "OFFLINE" }, \
++ { BIT(XPRT_REMOVE), "REMOVE" }, \
++ { BIT(XPRT_CONGESTED), "CONGESTED" }, \
++ { BIT(XPRT_CWND_WAIT), "CWND_WAIT" }, \
++ { BIT(XPRT_WRITE_SPACE), "WRITE_SPACE" })
+
+ DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class,
+ TP_PROTO(
+--
+2.34.1
+