Fixes for 5.15

author Sasha Levin <sashal@kernel.org>

Sun, 30 Jan 2022 15:27:50 +0000 (10:27 -0500)

committer Sasha Levin <sashal@kernel.org>

Sun, 30 Jan 2022 15:27:50 +0000 (10:27 -0500)
author Sasha Levin <sashal@kernel.org>
Sun, 30 Jan 2022 15:27:50 +0000 (10:27 -0500)
committer Sasha Levin <sashal@kernel.org>
Sun, 30 Jan 2022 15:27:50 +0000 (10:27 -0500)
diff --git a/queue-5.15/arm-9170-1-fix-panic-when-kasan-and-kprobe-are-enabl.patch b/queue-5.15/arm-9170-1-fix-panic-when-kasan-and-kprobe-are-enabl.patch

new file mode 100644 (file)

index 0000000..3f36996
--- /dev/null
+++ b/queue-5.15/arm-9170-1-fix-panic-when-kasan-and-kprobe-are-enabl.patch
@@ -0,0 +1,119 @@
+From 169c8ffd95e10e13013b1e2e8f51fbb0e387b99d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Dec 2021 10:08:23 +0100
+Subject: ARM: 9170/1: fix panic when kasan and kprobe are enabled
+
+From: sparkhuang <huangshaobo6@huawei.com>
+
+[ Upstream commit 8b59b0a53c840921b625378f137e88adfa87647e ]
+
+arm32 uses software to simulate the instruction replaced
+by kprobe. some instructions may be simulated by constructing
+assembly functions. therefore, before executing instruction
+simulation, it is necessary to construct assembly function
+execution environment in C language through binding registers.
+after kasan is enabled, the register binding relationship will
+be destroyed, resulting in instruction simulation errors and
+causing kernel panic.
+
+the kprobe emulate instruction function is distributed in three
+files: actions-common.c actions-arm.c actions-thumb.c, so disable
+KASAN when compiling these files.
+
+for example, use kprobe insert on cap_capable+20 after kasan
+enabled, the cap_capable assembly code is as follows:
+<cap_capable>:
+e92d47f0       push    {r4, r5, r6, r7, r8, r9, sl, lr}
+e1a05000       mov     r5, r0
+e280006c       add     r0, r0, #108    ; 0x6c
+e1a04001       mov     r4, r1
+e1a06002       mov     r6, r2
+e59fa090       ldr     sl, [pc, #144]  ;
+ebfc7bf8       bl      c03aa4b4 <__asan_load4>
+e595706c       ldr     r7, [r5, #108]  ; 0x6c
+e2859014       add     r9, r5, #20
+......
+The emulate_ldr assembly code after enabling kasan is as follows:
+c06f1384 <emulate_ldr>:
+e92d47f0       push    {r4, r5, r6, r7, r8, r9, sl, lr}
+e282803c       add     r8, r2, #60     ; 0x3c
+e1a05000       mov     r5, r0
+e7e37855       ubfx    r7, r5, #16, #4
+e1a00008       mov     r0, r8
+e1a09001       mov     r9, r1
+e1a04002       mov     r4, r2
+ebf35462       bl      c03c6530 <__asan_load4>
+e357000f       cmp     r7, #15
+e7e36655       ubfx    r6, r5, #12, #4
+e205a00f       and     sl, r5, #15
+0a000001       beq     c06f13bc <emulate_ldr+0x38>
+e0840107       add     r0, r4, r7, lsl #2
+ebf3545c       bl      c03c6530 <__asan_load4>
+e084010a       add     r0, r4, sl, lsl #2
+ebf3545a       bl      c03c6530 <__asan_load4>
+e2890010       add     r0, r9, #16
+ebf35458       bl      c03c6530 <__asan_load4>
+e5990010       ldr     r0, [r9, #16]
+e12fff30       blx     r0
+e356000f       cm      r6, #15
+1a000014       bne     c06f1430 <emulate_ldr+0xac>
+e1a06000       mov     r6, r0
+e2840040       add     r0, r4, #64     ; 0x40
+......
+
+when running in emulate_ldr to simulate the ldr instruction, panic
+occurred, and the log is as follows:
+Unable to handle kernel NULL pointer dereference at virtual address
+00000090
+pgd = ecb46400
+[00000090] *pgd=2e0fa003, *pmd=00000000
+Internal error: Oops: 206 [#1] SMP ARM
+PC is at cap_capable+0x14/0xb0
+LR is at emulate_ldr+0x50/0xc0
+psr: 600d0293 sp : ecd63af8  ip : 00000004  fp : c0a7c30c
+r10: 00000000  r9 : c30897f4  r8 : ecd63cd4
+r7 : 0000000f  r6 : 0000000a  r5 : e59fa090  r4 : ecd63c98
+r3 : c06ae294  r2 : 00000000  r1 : b7611300  r0 : bf4ec008
+Flags: nZCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment user
+Control: 32c5387d  Table: 2d546400  DAC: 55555555
+Process bash (pid: 1643, stack limit = 0xecd60190)
+(cap_capable) from (kprobe_handler+0x218/0x340)
+(kprobe_handler) from (kprobe_trap_handler+0x24/0x48)
+(kprobe_trap_handler) from (do_undefinstr+0x13c/0x364)
+(do_undefinstr) from (__und_svc_finish+0x0/0x30)
+(__und_svc_finish) from (cap_capable+0x18/0xb0)
+(cap_capable) from (cap_vm_enough_memory+0x38/0x48)
+(cap_vm_enough_memory) from
+(security_vm_enough_memory_mm+0x48/0x6c)
+(security_vm_enough_memory_mm) from
+(copy_process.constprop.5+0x16b4/0x25c8)
+(copy_process.constprop.5) from (_do_fork+0xe8/0x55c)
+(_do_fork) from (SyS_clone+0x1c/0x24)
+(SyS_clone) from (__sys_trace_return+0x0/0x10)
+Code: 0050a0e1 6c0080e2 0140a0e1 0260a0e1 (f801f0e7)
+
+Fixes: 35aa1df43283 ("ARM kprobes: instruction single-stepping support")
+Fixes: 421015713b30 ("ARM: 9017/2: Enable KASan for ARM")
+Signed-off-by: huangshaobo <huangshaobo6@huawei.com>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/probes/kprobes/Makefile | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/arch/arm/probes/kprobes/Makefile b/arch/arm/probes/kprobes/Makefile
+index 14db56f49f0a3..6159010dac4a6 100644
+--- a/arch/arm/probes/kprobes/Makefile
++++ b/arch/arm/probes/kprobes/Makefile
+@@ -1,4 +1,7 @@
+ # SPDX-License-Identifier: GPL-2.0
++KASAN_SANITIZE_actions-common.o := n
++KASAN_SANITIZE_actions-arm.o := n
++KASAN_SANITIZE_actions-thumb.o := n
+ obj-$(CONFIG_KPROBES)         += core.o actions-common.o checkers-common.o
+ obj-$(CONFIG_ARM_KPROBES_TEST)        += test-kprobes.o
+ test-kprobes-objs             := test-core.o
+-- 
+2.34.1
+
diff --git a/queue-5.15/efi-libstub-arm64-fix-image-check-alignment-at-entry.patch b/queue-5.15/efi-libstub-arm64-fix-image-check-alignment-at-entry.patch

new file mode 100644 (file)

index 0000000..6574b45
--- /dev/null
+++ b/queue-5.15/efi-libstub-arm64-fix-image-check-alignment-at-entry.patch
@@ -0,0 +1,49 @@
+From ef3193384de640c25668a8dde90b6a4397e864b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jan 2022 18:14:27 +0200
+Subject: efi/libstub: arm64: Fix image check alignment at entry
+
+From: Mihai Carabas <mihai.carabas@oracle.com>
+
+[ Upstream commit e9b7c3a4263bdcfd31bc3d03d48ce0ded7a94635 ]
+
+The kernel is aligned at SEGMENT_SIZE and this is the size populated in the PE
+headers:
+
+arch/arm64/kernel/efi-header.S: .long   SEGMENT_ALIGN // SectionAlignment
+
+EFI_KIMG_ALIGN is defined as: (SEGMENT_ALIGN > THREAD_ALIGN ? SEGMENT_ALIGN :
+THREAD_ALIGN)
+
+So it depends on THREAD_ALIGN. On newer builds this message started to appear
+even though the loader is taking into account the PE header (which is stating
+SEGMENT_ALIGN).
+
+Fixes: c32ac11da3f8 ("efi/libstub: arm64: Double check image alignment at entry")
+Signed-off-by: Mihai Carabas <mihai.carabas@oracle.com>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/efi/libstub/arm64-stub.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c
+index 2363fee9211c9..9cc556013d085 100644
+--- a/drivers/firmware/efi/libstub/arm64-stub.c
++++ b/drivers/firmware/efi/libstub/arm64-stub.c
+@@ -119,9 +119,9 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
+       if (image->image_base != _text)
+               efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n");
+ 
+-      if (!IS_ALIGNED((u64)_text, EFI_KIMG_ALIGN))
+-              efi_err("FIRMWARE BUG: kernel image not aligned on %ldk boundary\n",
+-                      EFI_KIMG_ALIGN >> 10);
++      if (!IS_ALIGNED((u64)_text, SEGMENT_ALIGN))
++              efi_err("FIRMWARE BUG: kernel image not aligned on %dk boundary\n",
++                      SEGMENT_ALIGN >> 10);
+ 
+       kernel_size = _edata - _text;
+       kernel_memsize = kernel_size + (_end - _edata);
+-- 
+2.34.1
+
diff --git a/queue-5.15/ipv6-annotate-accesses-to-fn-fn_sernum.patch b/queue-5.15/ipv6-annotate-accesses-to-fn-fn_sernum.patch

new file mode 100644 (file)

index 0000000..4933315
--- /dev/null
+++ b/queue-5.15/ipv6-annotate-accesses-to-fn-fn_sernum.patch
@@ -0,0 +1,197 @@
+From 2adb2fe8cd76a1617463803e1c7ed2bc1f2f0768 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jan 2022 09:41:12 -0800
+Subject: ipv6: annotate accesses to fn->fn_sernum
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit aafc2e3285c2d7a79b7ee15221c19fbeca7b1509 ]
+
+struct fib6_node's fn_sernum field can be
+read while other threads change it.
+
+Add READ_ONCE()/WRITE_ONCE() annotations.
+
+Do not change existing smp barriers in fib6_get_cookie_safe()
+and __fib6_update_sernum_upto_root()
+
+syzbot reported:
+
+BUG: KCSAN: data-race in fib6_clean_node / inet6_csk_route_socket
+
+write to 0xffff88813df62e2c of 4 bytes by task 1920 on cpu 1:
+ fib6_clean_node+0xc2/0x260 net/ipv6/ip6_fib.c:2178
+ fib6_walk_continue+0x38e/0x430 net/ipv6/ip6_fib.c:2112
+ fib6_walk net/ipv6/ip6_fib.c:2160 [inline]
+ fib6_clean_tree net/ipv6/ip6_fib.c:2240 [inline]
+ __fib6_clean_all+0x1a9/0x2e0 net/ipv6/ip6_fib.c:2256
+ fib6_flush_trees+0x6c/0x80 net/ipv6/ip6_fib.c:2281
+ rt_genid_bump_ipv6 include/net/net_namespace.h:488 [inline]
+ addrconf_dad_completed+0x57f/0x870 net/ipv6/addrconf.c:4230
+ addrconf_dad_work+0x908/0x1170
+ process_one_work+0x3f6/0x960 kernel/workqueue.c:2307
+ worker_thread+0x616/0xa70 kernel/workqueue.c:2454
+ kthread+0x1bf/0x1e0 kernel/kthread.c:359
+ ret_from_fork+0x1f/0x30
+
+read to 0xffff88813df62e2c of 4 bytes by task 15701 on cpu 0:
+ fib6_get_cookie_safe include/net/ip6_fib.h:285 [inline]
+ rt6_get_cookie include/net/ip6_fib.h:306 [inline]
+ ip6_dst_store include/net/ip6_route.h:234 [inline]
+ inet6_csk_route_socket+0x352/0x3c0 net/ipv6/inet6_connection_sock.c:109
+ inet6_csk_xmit+0x91/0x1e0 net/ipv6/inet6_connection_sock.c:121
+ __tcp_transmit_skb+0x1323/0x1840 net/ipv4/tcp_output.c:1402
+ tcp_transmit_skb net/ipv4/tcp_output.c:1420 [inline]
+ tcp_write_xmit+0x1450/0x4460 net/ipv4/tcp_output.c:2680
+ __tcp_push_pending_frames+0x68/0x1c0 net/ipv4/tcp_output.c:2864
+ tcp_push+0x2d9/0x2f0 net/ipv4/tcp.c:725
+ mptcp_push_release net/mptcp/protocol.c:1491 [inline]
+ __mptcp_push_pending+0x46c/0x490 net/mptcp/protocol.c:1578
+ mptcp_sendmsg+0x9ec/0xa50 net/mptcp/protocol.c:1764
+ inet6_sendmsg+0x5f/0x80 net/ipv6/af_inet6.c:643
+ sock_sendmsg_nosec net/socket.c:705 [inline]
+ sock_sendmsg net/socket.c:725 [inline]
+ kernel_sendmsg+0x97/0xd0 net/socket.c:745
+ sock_no_sendpage+0x84/0xb0 net/core/sock.c:3086
+ inet_sendpage+0x9d/0xc0 net/ipv4/af_inet.c:834
+ kernel_sendpage+0x187/0x200 net/socket.c:3492
+ sock_sendpage+0x5a/0x70 net/socket.c:1007
+ pipe_to_sendpage+0x128/0x160 fs/splice.c:364
+ splice_from_pipe_feed fs/splice.c:418 [inline]
+ __splice_from_pipe+0x207/0x500 fs/splice.c:562
+ splice_from_pipe fs/splice.c:597 [inline]
+ generic_splice_sendpage+0x94/0xd0 fs/splice.c:746
+ do_splice_from fs/splice.c:767 [inline]
+ direct_splice_actor+0x80/0xa0 fs/splice.c:936
+ splice_direct_to_actor+0x345/0x650 fs/splice.c:891
+ do_splice_direct+0x106/0x190 fs/splice.c:979
+ do_sendfile+0x675/0xc40 fs/read_write.c:1245
+ __do_sys_sendfile64 fs/read_write.c:1310 [inline]
+ __se_sys_sendfile64 fs/read_write.c:1296 [inline]
+ __x64_sys_sendfile64+0x102/0x140 fs/read_write.c:1296
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+value changed: 0x0000026f -> 0x00000271
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 15701 Comm: syz-executor.2 Not tainted 5.16.0-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+
+The Fixes tag I chose is probably arbitrary, I do not think
+we need to backport this patch to older kernels.
+
+Fixes: c5cff8561d2d ("ipv6: add rcu grace period before freeing fib6_node")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Link: https://lore.kernel.org/r/20220120174112.1126644-1-eric.dumazet@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip6_fib.h |  2 +-
+ net/ipv6/ip6_fib.c    | 23 +++++++++++++----------
+ net/ipv6/route.c      |  2 +-
+ 3 files changed, 15 insertions(+), 12 deletions(-)
+
+diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
+index 83b8070d1cc93..c85b040728d7e 100644
+--- a/include/net/ip6_fib.h
++++ b/include/net/ip6_fib.h
+@@ -281,7 +281,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i,
+       fn = rcu_dereference(f6i->fib6_node);
+ 
+       if (fn) {
+-              *cookie = fn->fn_sernum;
++              *cookie = READ_ONCE(fn->fn_sernum);
+               /* pairs with smp_wmb() in __fib6_update_sernum_upto_root() */
+               smp_rmb();
+               status = true;
+diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
+index 0371d2c141455..a506e57c4032a 100644
+--- a/net/ipv6/ip6_fib.c
++++ b/net/ipv6/ip6_fib.c
+@@ -111,7 +111,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
+       fn = rcu_dereference_protected(f6i->fib6_node,
+                       lockdep_is_held(&f6i->fib6_table->tb6_lock));
+       if (fn)
+-              fn->fn_sernum = fib6_new_sernum(net);
++              WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
+ }
+ 
+ /*
+@@ -589,12 +589,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
+               spin_unlock_bh(&table->tb6_lock);
+               if (res > 0) {
+                       cb->args[4] = 1;
+-                      cb->args[5] = w->root->fn_sernum;
++                      cb->args[5] = READ_ONCE(w->root->fn_sernum);
+               }
+       } else {
+-              if (cb->args[5] != w->root->fn_sernum) {
++              int sernum = READ_ONCE(w->root->fn_sernum);
++              if (cb->args[5] != sernum) {
+                       /* Begin at the root if the tree changed */
+-                      cb->args[5] = w->root->fn_sernum;
++                      cb->args[5] = sernum;
+                       w->state = FWS_INIT;
+                       w->node = w->root;
+                       w->skip = w->count;
+@@ -1344,7 +1345,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
+       /* paired with smp_rmb() in fib6_get_cookie_safe() */
+       smp_wmb();
+       while (fn) {
+-              fn->fn_sernum = sernum;
++              WRITE_ONCE(fn->fn_sernum, sernum);
+               fn = rcu_dereference_protected(fn->parent,
+                               lockdep_is_held(&rt->fib6_table->tb6_lock));
+       }
+@@ -2173,8 +2174,8 @@ static int fib6_clean_node(struct fib6_walker *w)
+       };
+ 
+       if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
+-          w->node->fn_sernum != c->sernum)
+-              w->node->fn_sernum = c->sernum;
++          READ_ONCE(w->node->fn_sernum) != c->sernum)
++              WRITE_ONCE(w->node->fn_sernum, c->sernum);
+ 
+       if (!c->func) {
+               WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
+@@ -2542,7 +2543,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
+       iter->w.state = FWS_INIT;
+       iter->w.node = iter->w.root;
+       iter->w.args = iter;
+-      iter->sernum = iter->w.root->fn_sernum;
++      iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
+       INIT_LIST_HEAD(&iter->w.lh);
+       fib6_walker_link(net, &iter->w);
+ }
+@@ -2570,8 +2571,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
+ 
+ static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
+ {
+-      if (iter->sernum != iter->w.root->fn_sernum) {
+-              iter->sernum = iter->w.root->fn_sernum;
++      int sernum = READ_ONCE(iter->w.root->fn_sernum);
++
++      if (iter->sernum != sernum) {
++              iter->sernum = sernum;
+               iter->w.state = FWS_INIT;
+               iter->w.node = iter->w.root;
+               WARN_ON(iter->w.skip);
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 0632382a5427b..3c5bb49692206 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -2802,7 +2802,7 @@ static void ip6_link_failure(struct sk_buff *skb)
+                       if (from) {
+                               fn = rcu_dereference(from->fib6_node);
+                               if (fn && (rt->rt6i_flags & RTF_DEFAULT))
+-                                      fn->fn_sernum = -1;
++                                      WRITE_ONCE(fn->fn_sernum, -1);
+                       }
+               }
+               rcu_read_unlock();
+-- 
+2.34.1
+
diff --git a/queue-5.15/ipv6_tunnel-rate-limit-warning-messages.patch b/queue-5.15/ipv6_tunnel-rate-limit-warning-messages.patch

new file mode 100644 (file)

index 0000000..83652c2
--- /dev/null
+++ b/queue-5.15/ipv6_tunnel-rate-limit-warning-messages.patch
@@ -0,0 +1,51 @@
+From 006c8ca2dd19aa9e6a73cb7e89e4f4a7af8f271b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jan 2022 10:05:46 +0200
+Subject: ipv6_tunnel: Rate limit warning messages
+
+From: Ido Schimmel <idosch@nvidia.com>
+
+[ Upstream commit 6cee105e7f2ced596373951d9ea08dacc3883c68 ]
+
+The warning messages can be invoked from the data path for every packet
+transmitted through an ip6gre netdev, leading to high CPU utilization.
+
+Fix that by rate limiting the messages.
+
+Fixes: 09c6bbf090ec ("[IPV6]: Do mandatory IPv6 tunnel endpoint checks in realtime")
+Reported-by: Maksym Yaremchuk <maksymy@nvidia.com>
+Tested-by: Maksym Yaremchuk <maksymy@nvidia.com>
+Signed-off-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Amit Cohen <amcohen@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ip6_tunnel.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
+index 20a67efda47f5..fa8da8ff35b42 100644
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -1036,14 +1036,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
+ 
+               if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+                                                     0, IFA_F_TENTATIVE)))
+-                      pr_warn("%s xmit: Local address not yet configured!\n",
+-                              p->name);
++                      pr_warn_ratelimited("%s xmit: Local address not yet configured!\n",
++                                          p->name);
+               else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
+                        !ipv6_addr_is_multicast(raddr) &&
+                        unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+                                                         true, 0, IFA_F_TENTATIVE)))
+-                      pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
+-                              p->name);
++                      pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n",
++                                          p->name);
+               else
+                       ret = 1;
+               rcu_read_unlock();
+-- 
+2.34.1
+
diff --git a/queue-5.15/kvm-arm64-pkvm-use-the-mm_ops-indirection-for-cache-.patch b/queue-5.15/kvm-arm64-pkvm-use-the-mm_ops-indirection-for-cache-.patch

new file mode 100644 (file)

index 0000000..e7471cc
--- /dev/null
+++ b/queue-5.15/kvm-arm64-pkvm-use-the-mm_ops-indirection-for-cache-.patch
@@ -0,0 +1,67 @@
+From faa9223e29cc788ae6234c967529d1978f9d9e27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jan 2022 08:57:58 +0000
+Subject: KVM: arm64: pkvm: Use the mm_ops indirection for cache maintenance
+
+From: Marc Zyngier <maz@kernel.org>
+
+[ Upstream commit 094d00f8ca58c5d29b25e23b4daaed1ff1f13b41 ]
+
+CMOs issued from EL2 cannot directly use the kernel helpers,
+as EL2 doesn't have a mapping of the guest pages. Oops.
+
+Instead, use the mm_ops indirection to use helpers that will
+perform a mapping at EL2 and allow the CMO to be effective.
+
+Fixes: 25aa28691bb9 ("KVM: arm64: Move guest CMOs to the fault handlers")
+Reviewed-by: Quentin Perret <qperret@google.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20220114125038.1336965-1-maz@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/kvm/hyp/pgtable.c | 18 ++++++------------
+ 1 file changed, 6 insertions(+), 12 deletions(-)
+
+diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
+index f8ceebe4982eb..4c77ff556f0ae 100644
+--- a/arch/arm64/kvm/hyp/pgtable.c
++++ b/arch/arm64/kvm/hyp/pgtable.c
+@@ -921,13 +921,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+        */
+       stage2_put_pte(ptep, mmu, addr, level, mm_ops);
+ 
+-      if (need_flush) {
+-              kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops);
+-
+-              dcache_clean_inval_poc((unsigned long)pte_follow,
+-                                  (unsigned long)pte_follow +
+-                                          kvm_granule_size(level));
+-      }
++      if (need_flush && mm_ops->dcache_clean_inval_poc)
++              mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
++                                             kvm_granule_size(level));
+ 
+       if (childp)
+               mm_ops->put_page(childp);
+@@ -1089,15 +1085,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+       struct kvm_pgtable *pgt = arg;
+       struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
+       kvm_pte_t pte = *ptep;
+-      kvm_pte_t *pte_follow;
+ 
+       if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
+               return 0;
+ 
+-      pte_follow = kvm_pte_follow(pte, mm_ops);
+-      dcache_clean_inval_poc((unsigned long)pte_follow,
+-                          (unsigned long)pte_follow +
+-                                  kvm_granule_size(level));
++      if (mm_ops->dcache_clean_inval_poc)
++              mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
++                                             kvm_granule_size(level));
+       return 0;
+ }
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/mptcp-allow-changing-the-backup-bit-by-endpoint-id.patch b/queue-5.15/mptcp-allow-changing-the-backup-bit-by-endpoint-id.patch

new file mode 100644 (file)

index 0000000..cbdbe7a
--- /dev/null
+++ b/queue-5.15/mptcp-allow-changing-the-backup-bit-by-endpoint-id.patch
@@ -0,0 +1,61 @@
+From 68a8ab696c5cbc469b8d85f8fcb5da8df8bb4f90 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Dec 2021 14:35:36 -0800
+Subject: mptcp: allow changing the "backup" bit by endpoint id
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+[ Upstream commit 602837e8479d20d49559b4b97b79d34c0efe7ecb ]
+
+a non-zero 'id' is sufficient to identify MPTCP endpoints: allow changing
+the value of 'backup' bit by simply specifying the endpoint id.
+
+Link: https://github.com/multipath-tcp/mptcp_net-next/issues/158
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm_netlink.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index d96860053816a..3be10bf22cf7c 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -1711,22 +1711,28 @@ next:
+ 
+ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
+ {
++      struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
+       struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
+       struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
+-      struct mptcp_pm_addr_entry addr, *entry;
+       struct net *net = sock_net(skb->sk);
+-      u8 bkup = 0;
++      u8 bkup = 0, lookup_by_id = 0;
+       int ret;
+ 
+-      ret = mptcp_pm_parse_addr(attr, info, true, &addr);
++      ret = mptcp_pm_parse_addr(attr, info, false, &addr);
+       if (ret < 0)
+               return ret;
+ 
+       if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
+               bkup = 1;
++      if (addr.addr.family == AF_UNSPEC) {
++              lookup_by_id = 1;
++              if (!addr.addr.id)
++                      return -EOPNOTSUPP;
++      }
+ 
+       list_for_each_entry(entry, &pernet->local_addr_list, list) {
+-              if (addresses_equal(&entry->addr, &addr.addr, true)) {
++              if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) ||
++                  (lookup_by_id && entry->addr.id == addr.addr.id)) {
+                       mptcp_nl_addr_backup(net, &entry->addr, bkup);
+ 
+                       if (bkup)
+-- 
+2.34.1
+
diff --git a/queue-5.15/mptcp-clean-up-harmless-false-expressions.patch b/queue-5.15/mptcp-clean-up-harmless-false-expressions.patch

new file mode 100644 (file)

index 0000000..c73a97c
--- /dev/null
+++ b/queue-5.15/mptcp-clean-up-harmless-false-expressions.patch
@@ -0,0 +1,60 @@
+From ba9ade4de4e8f2fc4cbf8320e093ebf8f059014f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Dec 2021 15:37:02 -0800
+Subject: mptcp: clean up harmless false expressions
+
+From: Jean Sacren <sakiwit@gmail.com>
+
+[ Upstream commit 59060a47ca50bbdb1d863b73667a1065873ecc06 ]
+
+entry->addr.id is u8 with a range from 0 to 255 and MAX_ADDR_ID is 255.
+We should drop both false expressions of (entry->addr.id > MAX_ADDR_ID).
+
+We should also remove the obsolete parentheses in the first if branch.
+
+Use U8_MAX for MAX_ADDR_ID and add a comment to show the link to
+mptcp_addr_info.id as suggested by Mr. Matthieu Baerts.
+
+Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
+Signed-off-by: Jean Sacren <sakiwit@gmail.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm_netlink.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index 3be10bf22cf7c..15c89d4fea4d2 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -38,7 +38,8 @@ struct mptcp_pm_add_entry {
+       u8                      retrans_times;
+ };
+ 
+-#define MAX_ADDR_ID           255
++/* max value of mptcp_addr_info.id */
++#define MAX_ADDR_ID           U8_MAX
+ #define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG)
+ 
+ struct pm_nl_pernet {
+@@ -831,14 +832,13 @@ find_next:
+               entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
+                                                   MAX_ADDR_ID + 1,
+                                                   pernet->next_id);
+-              if ((!entry->addr.id || entry->addr.id > MAX_ADDR_ID) &&
+-                  pernet->next_id != 1) {
++              if (!entry->addr.id && pernet->next_id != 1) {
+                       pernet->next_id = 1;
+                       goto find_next;
+               }
+       }
+ 
+-      if (!entry->addr.id || entry->addr.id > MAX_ADDR_ID)
++      if (!entry->addr.id)
+               goto out;
+ 
+       __set_bit(entry->addr.id, pernet->id_bitmap);
+-- 
+2.34.1
+
diff --git a/queue-5.15/mptcp-fix-msk-traversal-in-mptcp_nl_cmd_set_flags.patch b/queue-5.15/mptcp-fix-msk-traversal-in-mptcp_nl_cmd_set_flags.patch

new file mode 100644 (file)

index 0000000..1d558ef
--- /dev/null
+++ b/queue-5.15/mptcp-fix-msk-traversal-in-mptcp_nl_cmd_set_flags.patch
@@ -0,0 +1,85 @@
+From 1eefa603bbf4fca0a4f4208e8f95f139588869e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jan 2022 16:35:27 -0800
+Subject: mptcp: fix msk traversal in mptcp_nl_cmd_set_flags()
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 8e9eacad7ec7a9cbf262649ebf1fa6e6f6cc7d82 ]
+
+The MPTCP endpoint list is under RCU protection, guarded by the
+pernet spinlock. mptcp_nl_cmd_set_flags() traverses the list
+without acquiring the spin-lock nor under the RCU critical section.
+
+This change addresses the issue performing the lookup and the endpoint
+update under the pernet spinlock.
+
+Fixes: 0f9f696a502e ("mptcp: add set_flags command in PM netlink")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm_netlink.c | 37 +++++++++++++++++++++++++++----------
+ 1 file changed, 27 insertions(+), 10 deletions(-)
+
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index bba166ddacc78..7f11eb3e35137 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -469,6 +469,20 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
+       return NULL;
+ }
+ 
++static struct mptcp_pm_addr_entry *
++__lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info,
++            bool lookup_by_id)
++{
++      struct mptcp_pm_addr_entry *entry;
++
++      list_for_each_entry(entry, &pernet->local_addr_list, list) {
++              if ((!lookup_by_id && addresses_equal(&entry->addr, info, true)) ||
++                  (lookup_by_id && entry->addr.id == info->id))
++                      return entry;
++      }
++      return NULL;
++}
++
+ static int
+ lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
+ {
+@@ -1753,18 +1767,21 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
+                       return -EOPNOTSUPP;
+       }
+ 
+-      list_for_each_entry(entry, &pernet->local_addr_list, list) {
+-              if ((!lookup_by_id && addresses_equal(&entry->addr, &addr.addr, true)) ||
+-                  (lookup_by_id && entry->addr.id == addr.addr.id)) {
+-                      mptcp_nl_addr_backup(net, &entry->addr, bkup);
+-
+-                      if (bkup)
+-                              entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+-                      else
+-                              entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+-              }
++      spin_lock_bh(&pernet->lock);
++      entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
++      if (!entry) {
++              spin_unlock_bh(&pernet->lock);
++              return -EINVAL;
+       }
+ 
++      if (bkup)
++              entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
++      else
++              entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
++      addr = *entry;
++      spin_unlock_bh(&pernet->lock);
++
++      mptcp_nl_addr_backup(net, &addr.addr, bkup);
+       return 0;
+ }
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/mptcp-fix-removing-ids-bitmap-setting.patch b/queue-5.15/mptcp-fix-removing-ids-bitmap-setting.patch

new file mode 100644 (file)

index 0000000..6b203cc
--- /dev/null
+++ b/queue-5.15/mptcp-fix-removing-ids-bitmap-setting.patch
@@ -0,0 +1,38 @@
+From b607a1d248f0c837a4a59bd0f54b35c871916765 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jan 2022 16:35:28 -0800
+Subject: mptcp: fix removing ids bitmap setting
+
+From: Geliang Tang <geliang.tang@suse.com>
+
+[ Upstream commit a4c0214fbee97c46e3f41fee37931d66c0fc3cb1 ]
+
+In mptcp_pm_nl_rm_addr_or_subflow(), the bit of rm_list->ids[i] in the
+id_avail_bitmap should be set, not rm_list->ids[1]. This patch fixed it.
+
+Fixes: 86e39e04482b ("mptcp: keep track of local endpoint still available for each msk")
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Geliang Tang <geliang.tang@suse.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm_netlink.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index 7f11eb3e35137..84e6b55375e1d 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -781,7 +781,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
+                       msk->pm.subflows--;
+                       __MPTCP_INC_STATS(sock_net(sk), rm_type);
+               }
+-              __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
++              __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap);
+               if (!removed)
+                       continue;
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/mptcp-keep-track-of-local-endpoint-still-available-f.patch b/queue-5.15/mptcp-keep-track-of-local-endpoint-still-available-f.patch

new file mode 100644 (file)

index 0000000..922c54f
--- /dev/null
+++ b/queue-5.15/mptcp-keep-track-of-local-endpoint-still-available-f.patch
@@ -0,0 +1,404 @@
+From 96327b679e618f5e286c988cdfa012750e660571 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jan 2022 16:20:22 -0800
+Subject: mptcp: keep track of local endpoint still available for each msk
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 86e39e04482b0aadf3ee3ed5fcf2d63816559d36 ]
+
+Include into the path manager status a bitmap tracking the list
+of local endpoints still available - not yet used - for the
+relevant mptcp socket.
+
+Keep such map updated at endpoint creation/deletion time, so
+that we can easily skip already used endpoint at local address
+selection time.
+
+The endpoint used by the initial subflow is lazyly accounted at
+subflow creation time: the usage bitmap is be up2date before
+endpoint selection and we avoid such unneeded task in some relevant
+scenarios - e.g. busy servers accepting incoming subflows but
+not creating any additional ones nor annuncing additional addresses.
+
+Overall this allows for fair local endpoints usage in case of
+subflow failure.
+
+As a side effect, this patch also enforces that each endpoint
+is used at most once for each mptcp connection.
+
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/pm.c                                |   1 +
+ net/mptcp/pm_netlink.c                        | 125 +++++++++++-------
+ net/mptcp/protocol.c                          |   3 +-
+ net/mptcp/protocol.h                          |  12 +-
+ .../testing/selftests/net/mptcp/mptcp_join.sh |   5 +-
+ 5 files changed, 91 insertions(+), 55 deletions(-)
+
+diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
+index 6ab386ff32944..332ac6eda3ba4 100644
+--- a/net/mptcp/pm.c
++++ b/net/mptcp/pm.c
+@@ -370,6 +370,7 @@ void mptcp_pm_data_init(struct mptcp_sock *msk)
+       WRITE_ONCE(msk->pm.accept_subflow, false);
+       WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
+       msk->pm.status = 0;
++      bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+ 
+       spin_lock_init(&msk->pm.lock);
+       INIT_LIST_HEAD(&msk->pm.anno_list);
+diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
+index 15c89d4fea4d2..bba166ddacc78 100644
+--- a/net/mptcp/pm_netlink.c
++++ b/net/mptcp/pm_netlink.c
+@@ -38,10 +38,6 @@ struct mptcp_pm_add_entry {
+       u8                      retrans_times;
+ };
+ 
+-/* max value of mptcp_addr_info.id */
+-#define MAX_ADDR_ID           U8_MAX
+-#define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG)
+-
+ struct pm_nl_pernet {
+       /* protects pernet updates */
+       spinlock_t              lock;
+@@ -53,14 +49,14 @@ struct pm_nl_pernet {
+       unsigned int            local_addr_max;
+       unsigned int            subflows_max;
+       unsigned int            next_id;
+-      unsigned long           id_bitmap[BITMAP_SZ];
++      DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+ };
+ 
+ #define MPTCP_PM_ADDR_MAX     8
+ #define ADD_ADDR_RETRANS_MAX  3
+ 
+ static bool addresses_equal(const struct mptcp_addr_info *a,
+-                          struct mptcp_addr_info *b, bool use_port)
++                          const struct mptcp_addr_info *b, bool use_port)
+ {
+       bool addr_equals = false;
+ 
+@@ -174,6 +170,9 @@ select_local_address(const struct pm_nl_pernet *pernet,
+               if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
+                       continue;
+ 
++              if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
++                      continue;
++
+               if (entry->addr.family != sk->sk_family) {
+ #if IS_ENABLED(CONFIG_MPTCP_IPV6)
+                       if ((entry->addr.family == AF_INET &&
+@@ -184,23 +183,17 @@ select_local_address(const struct pm_nl_pernet *pernet,
+                               continue;
+               }
+ 
+-              /* avoid any address already in use by subflows and
+-               * pending join
+-               */
+-              if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
+-                      ret = entry;
+-                      break;
+-              }
++              ret = entry;
++              break;
+       }
+       rcu_read_unlock();
+       return ret;
+ }
+ 
+ static struct mptcp_pm_addr_entry *
+-select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
++select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk)
+ {
+       struct mptcp_pm_addr_entry *entry, *ret = NULL;
+-      int i = 0;
+ 
+       rcu_read_lock();
+       /* do not keep any additional per socket state, just signal
+@@ -209,12 +202,14 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
+        * can lead to additional addresses not being announced.
+        */
+       list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
++              if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
++                      continue;
++
+               if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
+                       continue;
+-              if (i++ == pos) {
+-                      ret = entry;
+-                      break;
+-              }
++
++              ret = entry;
++              break;
+       }
+       rcu_read_unlock();
+       return ret;
+@@ -258,9 +253,11 @@ EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
+ 
+ static void check_work_pending(struct mptcp_sock *msk)
+ {
+-      if (msk->pm.add_addr_signaled == mptcp_pm_get_add_addr_signal_max(msk) &&
+-          (msk->pm.local_addr_used == mptcp_pm_get_local_addr_max(msk) ||
+-           msk->pm.subflows == mptcp_pm_get_subflows_max(msk)))
++      struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
++
++      if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
++          (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
++                             MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1))
+               WRITE_ONCE(msk->pm.work_pending, false);
+ }
+ 
+@@ -460,6 +457,35 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
+       return i;
+ }
+ 
++static struct mptcp_pm_addr_entry *
++__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
++{
++      struct mptcp_pm_addr_entry *entry;
++
++      list_for_each_entry(entry, &pernet->local_addr_list, list) {
++              if (entry->addr.id == id)
++                      return entry;
++      }
++      return NULL;
++}
++
++static int
++lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
++{
++      struct mptcp_pm_addr_entry *entry;
++      int ret = -1;
++
++      rcu_read_lock();
++      list_for_each_entry(entry, &pernet->local_addr_list, list) {
++              if (addresses_equal(&entry->addr, addr, entry->addr.port)) {
++                      ret = entry->addr.id;
++                      break;
++              }
++      }
++      rcu_read_unlock();
++      return ret;
++}
++
+ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
+ {
+       struct sock *sk = (struct sock *)msk;
+@@ -475,6 +501,19 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
+       local_addr_max = mptcp_pm_get_local_addr_max(msk);
+       subflows_max = mptcp_pm_get_subflows_max(msk);
+ 
++      /* do lazy endpoint usage accounting for the MPC subflows */
++      if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) {
++              struct mptcp_addr_info local;
++              int mpc_id;
++
++              local_address((struct sock_common *)msk->first, &local);
++              mpc_id = lookup_id_by_addr(pernet, &local);
++              if (mpc_id < 0)
++                      __clear_bit(mpc_id, msk->pm.id_avail_bitmap);
++
++              msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED);
++      }
++
+       pr_debug("local %d:%d signal %d:%d subflows %d:%d\n",
+                msk->pm.local_addr_used, local_addr_max,
+                msk->pm.add_addr_signaled, add_addr_signal_max,
+@@ -482,21 +521,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
+ 
+       /* check first for announce */
+       if (msk->pm.add_addr_signaled < add_addr_signal_max) {
+-              local = select_signal_address(pernet,
+-                                            msk->pm.add_addr_signaled);
++              local = select_signal_address(pernet, msk);
+ 
+               if (local) {
+                       if (mptcp_pm_alloc_anno_list(msk, local)) {
++                              __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+                               msk->pm.add_addr_signaled++;
+                               mptcp_pm_announce_addr(msk, &local->addr, false);
+                               mptcp_pm_nl_addr_send_ack(msk);
+                       }
+-              } else {
+-                      /* pick failed, avoid fourther attempts later */
+-                      msk->pm.local_addr_used = add_addr_signal_max;
+               }
+-
+-              check_work_pending(msk);
+       }
+ 
+       /* check if should create a new subflow */
+@@ -510,19 +544,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
+                       int i, nr;
+ 
+                       msk->pm.local_addr_used++;
+-                      check_work_pending(msk);
+                       nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
++                      if (nr)
++                              __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+                       spin_unlock_bh(&msk->pm.lock);
+                       for (i = 0; i < nr; i++)
+                               __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
+                       spin_lock_bh(&msk->pm.lock);
+-                      return;
+               }
+-
+-              /* lookup failed, avoid fourther attempts later */
+-              msk->pm.local_addr_used = local_addr_max;
+-              check_work_pending(msk);
+       }
++      check_work_pending(msk);
+ }
+ 
+ static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
+@@ -736,6 +767,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
+                       msk->pm.subflows--;
+                       __MPTCP_INC_STATS(sock_net(sk), rm_type);
+               }
++              __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
+               if (!removed)
+                       continue;
+ 
+@@ -765,6 +797,9 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
+ 
+       msk_owned_by_me(msk);
+ 
++      if (!(pm->status & MPTCP_PM_WORK_MASK))
++              return;
++
+       spin_lock_bh(&msk->pm.lock);
+ 
+       pr_debug("msk=%p status=%x", msk, pm->status);
+@@ -810,7 +845,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
+       /* to keep the code simple, don't do IDR-like allocation for address ID,
+        * just bail when we exceed limits
+        */
+-      if (pernet->next_id == MAX_ADDR_ID)
++      if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID)
+               pernet->next_id = 1;
+       if (pernet->addrs >= MPTCP_PM_ADDR_MAX)
+               goto out;
+@@ -830,7 +865,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
+       if (!entry->addr.id) {
+ find_next:
+               entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
+-                                                  MAX_ADDR_ID + 1,
++                                                  MPTCP_PM_MAX_ADDR_ID + 1,
+                                                   pernet->next_id);
+               if (!entry->addr.id && pernet->next_id != 1) {
+                       pernet->next_id = 1;
+@@ -1197,18 +1232,6 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
+       return 0;
+ }
+ 
+-static struct mptcp_pm_addr_entry *
+-__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
+-{
+-      struct mptcp_pm_addr_entry *entry;
+-
+-      list_for_each_entry(entry, &pernet->local_addr_list, list) {
+-              if (entry->addr.id == id)
+-                      return entry;
+-      }
+-      return NULL;
+-}
+-
+ int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+                                        u8 *flags, int *ifindex)
+ {
+@@ -1467,7 +1490,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
+       list_splice_init(&pernet->local_addr_list, &free_list);
+       __reset_counters(pernet);
+       pernet->next_id = 1;
+-      bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
++      bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+       spin_unlock_bh(&pernet->lock);
+       mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
+       synchronize_rcu();
+@@ -1577,7 +1600,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
+       pernet = net_generic(net, pm_nl_pernet_id);
+ 
+       spin_lock_bh(&pernet->lock);
+-      for (i = id; i < MAX_ADDR_ID + 1; i++) {
++      for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
+               if (test_bit(i, pernet->id_bitmap)) {
+                       entry = __lookup_addr_by_id(pernet, i);
+                       if (!entry)
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index 4c889552cde77..354f169ca120a 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2435,8 +2435,7 @@ static void mptcp_worker(struct work_struct *work)
+ 
+       mptcp_check_fastclose(msk);
+ 
+-      if (msk->pm.status)
+-              mptcp_pm_nl_work(msk);
++      mptcp_pm_nl_work(msk);
+ 
+       if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
+               mptcp_check_for_eof(msk);
+diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
+index 82c5dc4d6b49d..9fc6f494075fa 100644
+--- a/net/mptcp/protocol.h
++++ b/net/mptcp/protocol.h
+@@ -174,16 +174,25 @@ enum mptcp_pm_status {
+       MPTCP_PM_ADD_ADDR_SEND_ACK,
+       MPTCP_PM_RM_ADDR_RECEIVED,
+       MPTCP_PM_ESTABLISHED,
+-      MPTCP_PM_ALREADY_ESTABLISHED,   /* persistent status, set after ESTABLISHED event */
+       MPTCP_PM_SUBFLOW_ESTABLISHED,
++      MPTCP_PM_ALREADY_ESTABLISHED,   /* persistent status, set after ESTABLISHED event */
++      MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is
++                                       * accounted int id_avail_bitmap
++                                       */
+ };
+ 
++/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
++#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
++
+ enum mptcp_addr_signal_status {
+       MPTCP_ADD_ADDR_SIGNAL,
+       MPTCP_ADD_ADDR_ECHO,
+       MPTCP_RM_ADDR_SIGNAL,
+ };
+ 
++/* max value of mptcp_addr_info.id */
++#define MPTCP_PM_MAX_ADDR_ID          U8_MAX
++
+ struct mptcp_pm_data {
+       struct mptcp_addr_info local;
+       struct mptcp_addr_info remote;
+@@ -202,6 +211,7 @@ struct mptcp_pm_data {
+       u8              local_addr_used;
+       u8              subflows;
+       u8              status;
++      DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
+       struct mptcp_rm_list rm_list_tx;
+       struct mptcp_rm_list rm_list_rx;
+ };
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index 586af88194e56..0c12602fa22e8 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -1068,7 +1068,10 @@ signal_address_tests()
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
+       ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
+       run_tests $ns1 $ns2 10.0.1.1
+-      chk_add_nr 4 4
++
++      # the server will not signal the address terminating
++      # the MPC subflow
++      chk_add_nr 3 3
+ }
+ 
+ link_failure_tests()
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-fix-information-leakage-in-proc-net-ptype.patch b/queue-5.15/net-fix-information-leakage-in-proc-net-ptype.patch

new file mode 100644 (file)

index 0000000..21968b1
--- /dev/null
+++ b/queue-5.15/net-fix-information-leakage-in-proc-net-ptype.patch
@@ -0,0 +1,78 @@
+From 62cf11a278e6b862b0ad7d05a518dbd832f6935c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jan 2022 14:20:13 -0500
+Subject: net: fix information leakage in /proc/net/ptype
+
+From: Congyu Liu <liu3101@purdue.edu>
+
+[ Upstream commit 47934e06b65637c88a762d9c98329ae6e3238888 ]
+
+In one net namespace, after creating a packet socket without binding
+it to a device, users in other net namespaces can observe the new
+`packet_type` added by this packet socket by reading `/proc/net/ptype`
+file. This is minor information leakage as packet socket is
+namespace aware.
+
+Add a net pointer in `packet_type` to keep the net namespace of
+of corresponding packet socket. In `ptype_seq_show`, this net pointer
+must be checked when it is not NULL.
+
+Fixes: 2feb27dbe00c ("[NETNS]: Minor information leak via /proc/net/ptype file.")
+Signed-off-by: Congyu Liu <liu3101@purdue.edu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h | 1 +
+ net/core/net-procfs.c     | 3 ++-
+ net/packet/af_packet.c    | 2 ++
+ 3 files changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index ce81cc96a98d9..fba54624191a2 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2636,6 +2636,7 @@ struct packet_type {
+                                             struct net_device *);
+       bool                    (*id_match)(struct packet_type *ptype,
+                                           struct sock *sk);
++      struct net              *af_packet_net;
+       void                    *af_packet_priv;
+       struct list_head        list;
+ };
+diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c
+index d8b9dbabd4a43..5b8016335acaf 100644
+--- a/net/core/net-procfs.c
++++ b/net/core/net-procfs.c
+@@ -260,7 +260,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v)
+ 
+       if (v == SEQ_START_TOKEN)
+               seq_puts(seq, "Type Device      Function\n");
+-      else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
++      else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) &&
++               (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) {
+               if (pt->type == htons(ETH_P_ALL))
+                       seq_puts(seq, "ALL ");
+               else
+diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
+index 1bc7ef49e1487..1a138e8d32d66 100644
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1738,6 +1738,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args)
+               match->prot_hook.dev = po->prot_hook.dev;
+               match->prot_hook.func = packet_rcv_fanout;
+               match->prot_hook.af_packet_priv = match;
++              match->prot_hook.af_packet_net = read_pnet(&match->net);
+               match->prot_hook.id_match = match_fanout_group;
+               match->max_num_members = args->max_num_members;
+               list_add(&match->list, &fanout_list);
+@@ -3323,6 +3324,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
+               po->prot_hook.func = packet_rcv_spkt;
+ 
+       po->prot_hook.af_packet_priv = sk;
++      po->prot_hook.af_packet_net = sock_net(sk);
+ 
+       if (proto) {
+               po->prot_hook.type = proto;
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-phy-broadcom-hook-up-soft_reset-for-bcm54616s.patch b/queue-5.15/net-phy-broadcom-hook-up-soft_reset-for-bcm54616s.patch

new file mode 100644 (file)

index 0000000..7b85ce1
--- /dev/null
+++ b/queue-5.15/net-phy-broadcom-hook-up-soft_reset-for-bcm54616s.patch
@@ -0,0 +1,61 @@
+From 340b01a2aa30d880662fbccca87b15467d53c257 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jan 2022 15:52:43 -0600
+Subject: net: phy: broadcom: hook up soft_reset for BCM54616S
+
+From: Robert Hancock <robert.hancock@calian.com>
+
+[ Upstream commit d15c7e875d44367005370e6a82e8f3a382a04f9b ]
+
+A problem was encountered with the Bel-Fuse 1GBT-SFP05 SFP module (which
+is a 1 Gbps copper module operating in SGMII mode with an internal
+BCM54616S PHY device) using the Xilinx AXI Ethernet MAC core, where the
+module would work properly on the initial insertion or boot of the
+device, but after the device was rebooted, the link would either only
+come up at 100 Mbps speeds or go up and down erratically.
+
+I found no meaningful changes in the PHY configuration registers between
+the working and non-working boots, but the status registers seemed to
+have a lot of error indications set on the SERDES side of the device on
+the non-working boot. I suspect the problem is that whatever happens on
+the SGMII link when the device is rebooted and the FPGA logic gets
+reloaded ends up putting the module's onboard PHY into a bad state.
+
+Since commit 6e2d85ec0559 ("net: phy: Stop with excessive soft reset")
+the genphy_soft_reset call is not made automatically by the PHY core
+unless the callback is explicitly specified in the driver structure. For
+most of these Broadcom devices, there is probably a hardware reset that
+gets asserted to reset the PHY during boot, however for SFP modules
+(where the BCM54616S is commonly found) no such reset line exists, so if
+the board keeps the SFP cage powered up across a reboot, it will end up
+with no reset occurring during reboots.
+
+Hook up the genphy_soft_reset callback for BCM54616S to ensure that a
+PHY reset is performed before the device is initialized. This appears to
+fix the issue with erratic operation after a reboot with this SFP
+module.
+
+Fixes: 6e2d85ec0559 ("net: phy: Stop with excessive soft reset")
+Signed-off-by: Robert Hancock <robert.hancock@calian.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/broadcom.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
+index 83aea5c5cd03c..db26ff8ce7dbb 100644
+--- a/drivers/net/phy/broadcom.c
++++ b/drivers/net/phy/broadcom.c
+@@ -768,6 +768,7 @@ static struct phy_driver broadcom_drivers[] = {
+       .phy_id_mask    = 0xfffffff0,
+       .name           = "Broadcom BCM54616S",
+       /* PHY_GBIT_FEATURES */
++      .soft_reset     = genphy_soft_reset,
+       .config_init    = bcm54xx_config_init,
+       .config_aneg    = bcm54616s_config_aneg,
+       .config_intr    = bcm_phy_config_intr,
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-stmmac-dwmac-visconti-fix-bit-definitions-for-et.patch b/queue-5.15/net-stmmac-dwmac-visconti-fix-bit-definitions-for-et.patch

new file mode 100644 (file)

index 0000000..d28d269
--- /dev/null
+++ b/queue-5.15/net-stmmac-dwmac-visconti-fix-bit-definitions-for-et.patch
@@ -0,0 +1,60 @@
+From 2838c988be07cfa57a51a5eb22da09d8f039fad9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jan 2022 13:46:47 +0900
+Subject: net: stmmac: dwmac-visconti: Fix bit definitions for ETHER_CLK_SEL
+
+From: Yuji Ishikawa <yuji2.ishikawa@toshiba.co.jp>
+
+[ Upstream commit 1ba1a4a90fa416a6f389206416c5f488cf8b1543 ]
+
+just 0 should be used to represent cleared bits
+
+* ETHER_CLK_SEL_DIV_SEL_20
+* ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN
+* ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN
+* ETHER_CLK_SEL_TX_CLK_O_TX_I
+* ETHER_CLK_SEL_RMII_CLK_SEL_IN
+
+Fixes: b38dd98ff8d0 ("net: stmmac: Add Toshiba Visconti SoCs glue driver")
+Signed-off-by: Yuji Ishikawa <yuji2.ishikawa@toshiba.co.jp>
+Reviewed-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+index fac788718c045..1c599a005aab6 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+@@ -22,21 +22,21 @@
+ #define ETHER_CLK_SEL_RMII_CLK_EN BIT(2)
+ #define ETHER_CLK_SEL_RMII_CLK_RST BIT(3)
+ #define ETHER_CLK_SEL_DIV_SEL_2 BIT(4)
+-#define ETHER_CLK_SEL_DIV_SEL_20 BIT(0)
++#define ETHER_CLK_SEL_DIV_SEL_20 0
+ #define ETHER_CLK_SEL_FREQ_SEL_125M   (BIT(9) | BIT(8))
+ #define ETHER_CLK_SEL_FREQ_SEL_50M    BIT(9)
+ #define ETHER_CLK_SEL_FREQ_SEL_25M    BIT(8)
+ #define ETHER_CLK_SEL_FREQ_SEL_2P5M   0
+-#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN BIT(0)
++#define ETHER_CLK_SEL_TX_CLK_EXT_SEL_IN 0
+ #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC BIT(10)
+ #define ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV BIT(11)
+-#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN  BIT(0)
++#define ETHER_CLK_SEL_RX_CLK_EXT_SEL_IN  0
+ #define ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC BIT(12)
+ #define ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV BIT(13)
+-#define ETHER_CLK_SEL_TX_CLK_O_TX_I    BIT(0)
++#define ETHER_CLK_SEL_TX_CLK_O_TX_I    0
+ #define ETHER_CLK_SEL_TX_CLK_O_RMII_I  BIT(14)
+ #define ETHER_CLK_SEL_TX_O_E_N_IN      BIT(15)
+-#define ETHER_CLK_SEL_RMII_CLK_SEL_IN  BIT(0)
++#define ETHER_CLK_SEL_RMII_CLK_SEL_IN  0
+ #define ETHER_CLK_SEL_RMII_CLK_SEL_RX_C        BIT(16)
+ 
+ #define ETHER_CLK_SEL_RX_TX_CLK_EN (ETHER_CLK_SEL_RX_CLK_EN | ETHER_CLK_SEL_TX_CLK_EN)
+-- 
+2.34.1
+
diff --git a/queue-5.15/net-stmmac-dwmac-visconti-fix-clock-configuration-fo.patch b/queue-5.15/net-stmmac-dwmac-visconti-fix-clock-configuration-fo.patch

new file mode 100644 (file)

index 0000000..9f5aedd
--- /dev/null
+++ b/queue-5.15/net-stmmac-dwmac-visconti-fix-clock-configuration-fo.patch
@@ -0,0 +1,81 @@
+From 9a25d5f55f530a13abb63b04f35997acfb20c983 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jan 2022 13:46:48 +0900
+Subject: net: stmmac: dwmac-visconti: Fix clock configuration for RMII mode
+
+From: Yuji Ishikawa <yuji2.ishikawa@toshiba.co.jp>
+
+[ Upstream commit 0959bc4bd4206433ed101a1332a23e93ad16ec77 ]
+
+Bit pattern of the ETHER_CLOCK_SEL register for RMII/MII mode should be fixed.
+Also, some control bits should be modified with a specific sequence.
+
+Fixes: b38dd98ff8d0 ("net: stmmac: Add Toshiba Visconti SoCs glue driver")
+Signed-off-by: Yuji Ishikawa <yuji2.ishikawa@toshiba.co.jp>
+Reviewed-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/stmicro/stmmac/dwmac-visconti.c  | 32 ++++++++++++-------
+ 1 file changed, 21 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+index 1c599a005aab6..4578c64953eac 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
+@@ -96,31 +96,41 @@ static void visconti_eth_fix_mac_speed(void *priv, unsigned int speed)
+       val |= ETHER_CLK_SEL_TX_O_E_N_IN;
+       writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+ 
++      /* Set Clock-Mux, Start clock, Set TX_O direction */
+       switch (dwmac->phy_intf_sel) {
+       case ETHER_CONFIG_INTF_RGMII:
+               val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC;
++              writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++              val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
++              writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++              val &= ~ETHER_CLK_SEL_TX_O_E_N_IN;
++              writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+               break;
+       case ETHER_CONFIG_INTF_RMII:
+               val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_DIV |
+-                      ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN |
++                      ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN |
+                       ETHER_CLK_SEL_RMII_CLK_SEL_RX_C;
++              writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++              val |= ETHER_CLK_SEL_RMII_CLK_RST;
++              writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++              val |= ETHER_CLK_SEL_RMII_CLK_EN | ETHER_CLK_SEL_RX_TX_CLK_EN;
++              writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+               break;
+       case ETHER_CONFIG_INTF_MII:
+       default:
+               val = clk_sel_val | ETHER_CLK_SEL_RX_CLK_EXT_SEL_RXC |
+-                      ETHER_CLK_SEL_TX_CLK_EXT_SEL_DIV | ETHER_CLK_SEL_TX_O_E_N_IN |
+-                      ETHER_CLK_SEL_RMII_CLK_EN;
++                      ETHER_CLK_SEL_TX_CLK_EXT_SEL_TXC | ETHER_CLK_SEL_TX_O_E_N_IN;
++              writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
++
++              val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
++              writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+               break;
+       }
+ 
+-      /* Start clock */
+-      writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+-      val |= ETHER_CLK_SEL_RX_TX_CLK_EN;
+-      writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+-
+-      val &= ~ETHER_CLK_SEL_TX_O_E_N_IN;
+-      writel(val, dwmac->reg + REG_ETHER_CLOCK_SEL);
+-
+       spin_unlock_irqrestore(&dwmac->lock, flags);
+ }
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/netfilter-conntrack-don-t-increment-invalid-counter-.patch b/queue-5.15/netfilter-conntrack-don-t-increment-invalid-counter-.patch

new file mode 100644 (file)

index 0000000..0957bbe
--- /dev/null
+++ b/queue-5.15/netfilter-conntrack-don-t-increment-invalid-counter-.patch
@@ -0,0 +1,51 @@
+From 27e186efa8e0b0f6f604bc4c41dc716cdf6d770f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jan 2022 21:37:58 +0100
+Subject: netfilter: conntrack: don't increment invalid counter on NF_REPEAT
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 830af2eba40327abec64325a5b08b1e85c37a2e0 ]
+
+The packet isn't invalid, REPEAT means we're trying again after cleaning
+out a stale connection, e.g. via tcp tracker.
+
+This caused increases of invalid stat counter in a test case involving
+frequent connection reuse, even though no packet is actually invalid.
+
+Fixes: 56a62e2218f5 ("netfilter: conntrack: fix NF_REPEAT handling")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_conntrack_core.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
+index 4712a90a1820c..7f79974607643 100644
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -1922,15 +1922,17 @@ repeat:
+               pr_debug("nf_conntrack_in: Can't track with proto module\n");
+               nf_conntrack_put(&ct->ct_general);
+               skb->_nfct = 0;
+-              NF_CT_STAT_INC_ATOMIC(state->net, invalid);
+-              if (ret == -NF_DROP)
+-                      NF_CT_STAT_INC_ATOMIC(state->net, drop);
+               /* Special case: TCP tracker reports an attempt to reopen a
+                * closed/aborted connection. We have to go back and create a
+                * fresh conntrack.
+                */
+               if (ret == -NF_REPEAT)
+                       goto repeat;
++
++              NF_CT_STAT_INC_ATOMIC(state->net, invalid);
++              if (ret == -NF_DROP)
++                      NF_CT_STAT_INC_ATOMIC(state->net, drop);
++
+               ret = -ret;
+               goto out;
+       }
+-- 
+2.34.1
+
diff --git a/queue-5.15/nfs-ensure-the-server-has-an-up-to-date-ctime-before.patch b/queue-5.15/nfs-ensure-the-server-has-an-up-to-date-ctime-before.patch

new file mode 100644 (file)

index 0000000..b72fc65
--- /dev/null
+++ b/queue-5.15/nfs-ensure-the-server-has-an-up-to-date-ctime-before.patch
@@ -0,0 +1,37 @@
+From 819a1638d01ceef0d04387333898e7e819560df2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Dec 2021 16:38:15 -0500
+Subject: NFS: Ensure the server has an up to date ctime before hardlinking
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 204975036b34f55237bc44c8a302a88468ef21b5 ]
+
+Creating a hard link is required by POSIX to update the file ctime, so
+ensure that the file data is synced to disk so that we don't clobber the
+updated ctime by writing back after creating the hard link.
+
+Fixes: 9f7682728728 ("NFS: Move the delegation return down into nfs4_proc_link()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/dir.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index 5b68c44848caf..fdea6ff824cf8 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -2382,6 +2382,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
+ 
+       trace_nfs_link_enter(inode, dir, dentry);
+       d_drop(dentry);
++      if (S_ISREG(inode->i_mode))
++              nfs_sync_inode(inode);
+       error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
+       if (error == 0) {
+               ihold(inode);
+-- 
+2.34.1
+
diff --git a/queue-5.15/nfs-ensure-the-server-has-an-up-to-date-ctime-before.patch-14723 b/queue-5.15/nfs-ensure-the-server-has-an-up-to-date-ctime-before.patch-14723

new file mode 100644 (file)

index 0000000..e580030
--- /dev/null
+++ b/queue-5.15/nfs-ensure-the-server-has-an-up-to-date-ctime-before.patch-14723
@@ -0,0 +1,37 @@
+From f255aff22cfe3d57206c276db792e6b8c7619b60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Dec 2021 16:38:16 -0500
+Subject: NFS: Ensure the server has an up to date ctime before renaming
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 6ff9d99bb88faebf134ca668842349d9718e5464 ]
+
+Renaming a file is required by POSIX to update the file ctime, so
+ensure that the file data is synced to disk so that we don't clobber the
+updated ctime by writing back after creating the hard link.
+
+Fixes: f2c2c552f119 ("NFS: Move delegation recall into the NFSv4 callback for rename_setup()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/dir.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index fdea6ff824cf8..ac0e1ab1b71ba 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -2472,6 +2472,8 @@ int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+               }
+       }
+ 
++      if (S_ISREG(old_inode->i_mode))
++              nfs_sync_inode(old_inode);
+       task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
+       if (IS_ERR(task)) {
+               error = PTR_ERR(task);
+-- 
+2.34.1
+
diff --git a/queue-5.15/nfsv4-handle-case-where-the-lookup-of-a-directory-fa.patch b/queue-5.15/nfsv4-handle-case-where-the-lookup-of-a-directory-fa.patch

new file mode 100644 (file)

index 0000000..27fb961
--- /dev/null
+++ b/queue-5.15/nfsv4-handle-case-where-the-lookup-of-a-directory-fa.patch
@@ -0,0 +1,52 @@
+From c407b6e1c8205cf17ffa2218dbc9bd5fb00b7000 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jan 2022 18:24:02 -0500
+Subject: NFSv4: Handle case where the lookup of a directory fails
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit ac795161c93699d600db16c1a8cc23a65a1eceaf ]
+
+If the application sets the O_DIRECTORY flag, and tries to open a
+regular file, nfs_atomic_open() will punt to doing a regular lookup.
+If the server then returns a regular file, we will happily return a
+file descriptor with uninitialised open state.
+
+The fix is to return the expected ENOTDIR error in these cases.
+
+Reported-by: Lyu Tao <tao.lyu@epfl.ch>
+Fixes: 0dd2b474d0b6 ("nfs: implement i_op->atomic_open()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/dir.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index ac0e1ab1b71ba..2d156311c374d 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -1982,6 +1982,19 @@ out:
+ 
+ no_open:
+       res = nfs_lookup(dir, dentry, lookup_flags);
++      if (!res) {
++              inode = d_inode(dentry);
++              if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
++                  !S_ISDIR(inode->i_mode))
++                      res = ERR_PTR(-ENOTDIR);
++      } else if (!IS_ERR(res)) {
++              inode = d_inode(res);
++              if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
++                  !S_ISDIR(inode->i_mode)) {
++                      dput(res);
++                      res = ERR_PTR(-ENOTDIR);
++              }
++      }
+       if (switched) {
+               d_lookup_done(dentry);
+               if (!res)
+-- 
+2.34.1
+
diff --git a/queue-5.15/nfsv4-nfs_atomic_open-can-race-when-looking-up-a-non.patch b/queue-5.15/nfsv4-nfs_atomic_open-can-race-when-looking-up-a-non.patch

new file mode 100644 (file)

index 0000000..3b7fffe
--- /dev/null
+++ b/queue-5.15/nfsv4-nfs_atomic_open-can-race-when-looking-up-a-non.patch
@@ -0,0 +1,45 @@
+From 46db83ff5fbfc5cb89d9903bad6b205eab622c9b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jan 2022 18:24:03 -0500
+Subject: NFSv4: nfs_atomic_open() can race when looking up a non-regular file
+
+From: Trond Myklebust <trond.myklebust@hammerspace.com>
+
+[ Upstream commit 1751fc1db36f6f411709e143d5393f92d12137a9 ]
+
+If the file type changes back to being a regular file on the server
+between the failed OPEN and our LOOKUP, then we need to re-run the OPEN.
+
+Fixes: 0dd2b474d0b6 ("nfs: implement i_op->atomic_open()")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfs/dir.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
+index 2d156311c374d..ed79c1bd84a29 100644
+--- a/fs/nfs/dir.c
++++ b/fs/nfs/dir.c
+@@ -1987,12 +1987,17 @@ no_open:
+               if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
+                   !S_ISDIR(inode->i_mode))
+                       res = ERR_PTR(-ENOTDIR);
++              else if (inode && S_ISREG(inode->i_mode))
++                      res = ERR_PTR(-EOPENSTALE);
+       } else if (!IS_ERR(res)) {
+               inode = d_inode(res);
+               if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
+                   !S_ISDIR(inode->i_mode)) {
+                       dput(res);
+                       res = ERR_PTR(-ENOTDIR);
++              } else if (inode && S_ISREG(inode->i_mode)) {
++                      dput(res);
++                      res = ERR_PTR(-EOPENSTALE);
+               }
+       }
+       if (switched) {
+-- 
+2.34.1
+
diff --git a/queue-5.15/octeontx2-af-cn10k-do-not-enable-rpm-loopback-for-lp.patch b/queue-5.15/octeontx2-af-cn10k-do-not-enable-rpm-loopback-for-lp.patch

new file mode 100644 (file)

index 0000000..b43db5b
--- /dev/null
+++ b/queue-5.15/octeontx2-af-cn10k-do-not-enable-rpm-loopback-for-lp.patch
@@ -0,0 +1,64 @@
+From 069df600c33a90fe62f8eddf777e4da0a31795a9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:45 +0530
+Subject: octeontx2-af: cn10k: Do not enable RPM loopback for LPC interfaces
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit df66b6ebc5dcf7253e35a640b9ec4add54195c25 ]
+
+Internal looback is not supported to low rate LPCS interface like
+SGMII/QSGMII. Hence don't allow to enable for such interfaces.
+
+Fixes: 3ad3f8f93c81 ("octeontx2-af: cn10k: MAC internal loopback support")
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/marvell/octeontx2/af/rpm.c   | 27 +++++++++----------
+ 1 file changed, 12 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+index 07b0eafccad87..b3803577324e6 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rpm.c
+@@ -251,22 +251,19 @@ int rpm_lmac_internal_loopback(void *rpmd, int lmac_id, bool enable)
+       if (!rpm || lmac_id >= rpm->lmac_count)
+               return -ENODEV;
+       lmac_type = rpm->mac_ops->get_lmac_type(rpm, lmac_id);
+-      if (lmac_type == LMAC_MODE_100G_R) {
+-              cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1);
+-
+-              if (enable)
+-                      cfg |= RPMX_MTI_PCS_LBK;
+-              else
+-                      cfg &= ~RPMX_MTI_PCS_LBK;
+-              rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg);
+-      } else {
+-              cfg = rpm_read(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1);
+-              if (enable)
+-                      cfg |= RPMX_MTI_PCS_LBK;
+-              else
+-                      cfg &= ~RPMX_MTI_PCS_LBK;
+-              rpm_write(rpm, lmac_id, RPMX_MTI_LPCSX_CONTROL1, cfg);
++
++      if (lmac_type == LMAC_MODE_QSGMII || lmac_type == LMAC_MODE_SGMII) {
++              dev_err(&rpm->pdev->dev, "loopback not supported for LPC mode\n");
++              return 0;
+       }
+ 
++      cfg = rpm_read(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1);
++
++      if (enable)
++              cfg |= RPMX_MTI_PCS_LBK;
++      else
++              cfg &= ~RPMX_MTI_PCS_LBK;
++      rpm_write(rpm, lmac_id, RPMX_MTI_PCS100X_CONTROL1, cfg);
++
+       return 0;
+ }
+-- 
+2.34.1
+
diff --git a/queue-5.15/octeontx2-af-do-not-fixup-all-vf-action-entries.patch b/queue-5.15/octeontx2-af-do-not-fixup-all-vf-action-entries.patch

new file mode 100644 (file)

index 0000000..88da023
--- /dev/null
+++ b/queue-5.15/octeontx2-af-do-not-fixup-all-vf-action-entries.patch
@@ -0,0 +1,131 @@
+From 89fd81ca0a9542b5236f7e29affc9a072b18deed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:39 +0530
+Subject: octeontx2-af: Do not fixup all VF action entries
+
+From: Subbaraya Sundeep <sbhatta@marvell.com>
+
+[ Upstream commit d225c449ab2be25273a3674f476c6c0b57c50254 ]
+
+AF modifies all the rules destined for VF to use
+the action same as default RSS action. This fixup
+was needed because AF only installs default rules with
+RSS action. But the action in rules installed by a PF
+for its VFs should not be changed by this fixup.
+This is because action can be drop or direct to
+queue as specified by user(ntuple filters).
+This patch fixes that problem.
+
+Fixes: 967db3529eca ("octeontx2-af: add support for multicast/promisc packet")
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Naveen Mamindlapalli <naveenm@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/marvell/octeontx2/af/rvu_npc.c   | 22 ++++++++++++++++---
+ .../marvell/octeontx2/af/rvu_npc_fs.c         | 20 ++++++++++-------
+ 2 files changed, 31 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+index 5efb4174e82df..87f18e32b4634 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+@@ -402,6 +402,7 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
+                             int blkaddr, int index, struct mcam_entry *entry,
+                             bool *enable)
+ {
++      struct rvu_npc_mcam_rule *rule;
+       u16 owner, target_func;
+       struct rvu_pfvf *pfvf;
+       u64 rx_action;
+@@ -423,6 +424,12 @@ static void npc_fixup_vf_rule(struct rvu *rvu, struct npc_mcam *mcam,
+             test_bit(NIXLF_INITIALIZED, &pfvf->flags)))
+               *enable = false;
+ 
++      /* fix up not needed for the rules added by user(ntuple filters) */
++      list_for_each_entry(rule, &mcam->mcam_rules, list) {
++              if (rule->entry == index)
++                      return;
++      }
++
+       /* copy VF default entry action to the VF mcam entry */
+       rx_action = npc_get_default_entry_action(rvu, mcam, blkaddr,
+                                                target_func);
+@@ -489,8 +496,8 @@ static void npc_config_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
+       }
+ 
+       /* PF installing VF rule */
+-      if (intf == NIX_INTF_RX && actindex < mcam->bmap_entries)
+-              npc_fixup_vf_rule(rvu, mcam, blkaddr, index, entry, &enable);
++      if (is_npc_intf_rx(intf) && actindex < mcam->bmap_entries)
++              npc_fixup_vf_rule(rvu, mcam, blkaddr, actindex, entry, &enable);
+ 
+       /* Set 'action' */
+       rvu_write64(rvu, blkaddr,
+@@ -916,7 +923,8 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
+                                    int blkaddr, u16 pcifunc, u64 rx_action)
+ {
+       int actindex, index, bank, entry;
+-      bool enable;
++      struct rvu_npc_mcam_rule *rule;
++      bool enable, update;
+ 
+       if (!(pcifunc & RVU_PFVF_FUNC_MASK))
+               return;
+@@ -924,6 +932,14 @@ static void npc_update_vf_flow_entry(struct rvu *rvu, struct npc_mcam *mcam,
+       mutex_lock(&mcam->lock);
+       for (index = 0; index < mcam->bmap_entries; index++) {
+               if (mcam->entry2target_pffunc[index] == pcifunc) {
++                      update = true;
++                      /* update not needed for the rules added via ntuple filters */
++                      list_for_each_entry(rule, &mcam->mcam_rules, list) {
++                              if (rule->entry == index)
++                                      update = false;
++                      }
++                      if (!update)
++                              continue;
+                       bank = npc_get_bank(mcam, index);
+                       actindex = index;
+                       entry = index & (mcam->banksize - 1);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+index 51ddc7b81d0bd..ca404d51d9f56 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+@@ -1098,14 +1098,6 @@ find_rule:
+               write_req.cntr = rule->cntr;
+       }
+ 
+-      err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req,
+-                                                  &write_rsp);
+-      if (err) {
+-              rvu_mcam_remove_counter_from_rule(rvu, owner, rule);
+-              if (new)
+-                      kfree(rule);
+-              return err;
+-      }
+       /* update rule */
+       memcpy(&rule->packet, &dummy.packet, sizeof(rule->packet));
+       memcpy(&rule->mask, &dummy.mask, sizeof(rule->mask));
+@@ -1129,6 +1121,18 @@ find_rule:
+       if (req->default_rule)
+               pfvf->def_ucast_rule = rule;
+ 
++      /* write to mcam entry registers */
++      err = rvu_mbox_handler_npc_mcam_write_entry(rvu, &write_req,
++                                                  &write_rsp);
++      if (err) {
++              rvu_mcam_remove_counter_from_rule(rvu, owner, rule);
++              if (new) {
++                      list_del(&rule->list);
++                      kfree(rule);
++              }
++              return err;
++      }
++
+       /* VF's MAC address is being changed via PF  */
+       if (pf_set_vfs_mac) {
+               ether_addr_copy(pfvf->default_mac, req->packet.dmac);
+-- 
+2.34.1
+
diff --git a/queue-5.15/octeontx2-af-fix-lbk-backpressure-id-count.patch b/queue-5.15/octeontx2-af-fix-lbk-backpressure-id-count.patch

new file mode 100644 (file)

index 0000000..f5e6f76
--- /dev/null
+++ b/queue-5.15/octeontx2-af-fix-lbk-backpressure-id-count.patch
@@ -0,0 +1,44 @@
+From 9917c059291ea7f257714fd0f3dc222912189ebf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:40 +0530
+Subject: octeontx2-af: Fix LBK backpressure id count
+
+From: Sunil Goutham <sgoutham@marvell.com>
+
+[ Upstream commit 00bfe94e388fe12bfd0d4f6361b1b1343374ff5b ]
+
+In rvu_nix_get_bpid() lbk_bpid_cnt is being read from
+wrong register. Due to this backpressure enable is failing
+for LBK VF32 onwards. This patch fixes that.
+
+Fixes: fe1939bb2340 ("octeontx2-af: Add SDP interface support")
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index 6970540dc4709..8ee324aabf2d6 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -511,11 +511,11 @@ static int rvu_nix_get_bpid(struct rvu *rvu, struct nix_bp_cfg_req *req,
+       cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST);
+       lmac_chan_cnt = cfg & 0xFF;
+ 
+-      cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
+-      sdp_chan_cnt = cfg & 0xFFF;
+-
+       cgx_bpid_cnt = hw->cgx_links * lmac_chan_cnt;
+       lbk_bpid_cnt = hw->lbk_links * ((cfg >> 16) & 0xFF);
++
++      cfg = rvu_read64(rvu, blkaddr, NIX_AF_CONST1);
++      sdp_chan_cnt = cfg & 0xFFF;
+       sdp_bpid_cnt = hw->sdp_links * sdp_chan_cnt;
+ 
+       pfvf = rvu_get_pfvf(rvu, req->hdr.pcifunc);
+-- 
+2.34.1
+
diff --git a/queue-5.15/octeontx2-af-increase-link-credit-restore-polling-ti.patch b/queue-5.15/octeontx2-af-increase-link-credit-restore-polling-ti.patch

new file mode 100644 (file)

index 0000000..9c16f48
--- /dev/null
+++ b/queue-5.15/octeontx2-af-increase-link-credit-restore-polling-ti.patch
@@ -0,0 +1,55 @@
+From f8fedc91133ecf56b1538eddb4b5f59cdd64f27e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:44 +0530
+Subject: octeontx2-af: Increase link credit restore polling timeout
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit 1581d61b42d985cefe7b71eea67ab3bfcbf34d0f ]
+
+It's been observed that sometimes link credit restore takes
+a lot of time than the current timeout. This patch increases
+the default timeout value and return the proper error value
+on failure.
+
+Fixes: 1c74b89171c3 ("octeontx2-af: Wait for TX link idle for credits change")
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/mbox.h    | 1 +
+ drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c | 4 ++--
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+index a8618259de943..26ad71842b3b2 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+@@ -700,6 +700,7 @@ enum nix_af_status {
+       NIX_AF_ERR_BANDPROF_INVAL_REQ  = -428,
+       NIX_AF_ERR_CQ_CTX_WRITE_ERR  = -429,
+       NIX_AF_ERR_AQ_CTX_RETRY_WRITE  = -430,
++      NIX_AF_ERR_LINK_CREDITS  = -431,
+ };
+ 
+ /* For NIX RX vtag action  */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index 9d4cc0ae61474..959266894cf15 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -3891,8 +3891,8 @@ nix_config_link_credits(struct rvu *rvu, int blkaddr, int link,
+                           NIX_AF_TL1X_SW_XOFF(schq), BIT_ULL(0));
+       }
+ 
+-      rc = -EBUSY;
+-      poll_tmo = jiffies + usecs_to_jiffies(10000);
++      rc = NIX_AF_ERR_LINK_CREDITS;
++      poll_tmo = jiffies + usecs_to_jiffies(200000);
+       /* Wait for credits to return */
+       do {
+               if (time_after(jiffies, poll_tmo))
+-- 
+2.34.1
+
diff --git a/queue-5.15/octeontx2-af-retry-until-rvu-block-reset-complete.patch b/queue-5.15/octeontx2-af-retry-until-rvu-block-reset-complete.patch

new file mode 100644 (file)

index 0000000..c3354ea
--- /dev/null
+++ b/queue-5.15/octeontx2-af-retry-until-rvu-block-reset-complete.patch
@@ -0,0 +1,43 @@
+From b111e1c10fad5cb45c85b2d7ec080e8c37cabf31 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:41 +0530
+Subject: octeontx2-af: Retry until RVU block reset complete
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit 03ffbc9914bd1130fba464f0a41c01372e5fc359 ]
+
+Few RVU blocks like SSO require more time for reset on some
+silicons. Hence retrying the block reset until success.
+
+Fixes: c0fa2cff8822c ("octeontx2-af: Handle return value in block reset")
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rvu.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+index 90dc5343827f0..11ef46e72ddd9 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c
+@@ -520,8 +520,11 @@ static void rvu_block_reset(struct rvu *rvu, int blkaddr, u64 rst_reg)
+ 
+       rvu_write64(rvu, blkaddr, rst_reg, BIT_ULL(0));
+       err = rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true);
+-      if (err)
+-              dev_err(rvu->dev, "HW block:%d reset failed\n", blkaddr);
++      if (err) {
++              dev_err(rvu->dev, "HW block:%d reset timeout retrying again\n", blkaddr);
++              while (rvu_poll_reg(rvu, blkaddr, rst_reg, BIT_ULL(63), true) == -EBUSY)
++                      ;
++      }
+ }
+ 
+ static void rvu_reset_all_blocks(struct rvu *rvu)
+-- 
+2.34.1
+
diff --git a/queue-5.15/octeontx2-af-verify-cq-context-updates.patch b/queue-5.15/octeontx2-af-verify-cq-context-updates.patch

new file mode 100644 (file)

index 0000000..49dc64f
--- /dev/null
+++ b/queue-5.15/octeontx2-af-verify-cq-context-updates.patch
@@ -0,0 +1,148 @@
+From 9f6506cd4e6651d3395b59182ee00509f84a9d66 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Sep 2021 18:40:24 +0530
+Subject: octeontx2-af: verify CQ context updates
+
+From: Hariprasad Kelam <hkelam@marvell.com>
+
+[ Upstream commit 14e94f9445a9e91d460f5d4b519f8892c3fb14bb ]
+
+As per HW errata AQ modification to CQ could be discarded on heavy
+traffic. This patch implements workaround for the same after each
+CQ write by AQ check whether the requested fields (except those
+which HW can update eg: avg_level) are properly updated or not.
+
+If CQ context is not updated then perform AQ write again.
+
+Signed-off-by: Hariprasad Kelam <hkelam@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/marvell/octeontx2/af/mbox.h  |  2 +
+ .../ethernet/marvell/octeontx2/af/rvu_nix.c   | 78 ++++++++++++++++++-
+ 2 files changed, 79 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+index 154877706a0e1..a8618259de943 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
++++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+@@ -698,6 +698,8 @@ enum nix_af_status {
+       NIX_AF_ERR_INVALID_BANDPROF = -426,
+       NIX_AF_ERR_IPOLICER_NOTSUPP = -427,
+       NIX_AF_ERR_BANDPROF_INVAL_REQ  = -428,
++      NIX_AF_ERR_CQ_CTX_WRITE_ERR  = -429,
++      NIX_AF_ERR_AQ_CTX_RETRY_WRITE  = -430,
+ };
+ 
+ /* For NIX RX vtag action  */
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+index 8ee324aabf2d6..9d4cc0ae61474 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+@@ -28,6 +28,7 @@ static int nix_verify_bandprof(struct nix_cn10k_aq_enq_req *req,
+ static int nix_free_all_bandprof(struct rvu *rvu, u16 pcifunc);
+ static void nix_clear_ratelimit_aggr(struct rvu *rvu, struct nix_hw *nix_hw,
+                                    u32 leaf_prof);
++static const char *nix_get_ctx_name(int ctype);
+ 
+ enum mc_tbl_sz {
+       MC_TBL_SZ_256,
+@@ -1061,10 +1062,68 @@ static int rvu_nix_blk_aq_enq_inst(struct rvu *rvu, struct nix_hw *nix_hw,
+       return 0;
+ }
+ 
++static int rvu_nix_verify_aq_ctx(struct rvu *rvu, struct nix_hw *nix_hw,
++                               struct nix_aq_enq_req *req, u8 ctype)
++{
++      struct nix_cn10k_aq_enq_req aq_req;
++      struct nix_cn10k_aq_enq_rsp aq_rsp;
++      int rc, word;
++
++      if (req->ctype != NIX_AQ_CTYPE_CQ)
++              return 0;
++
++      rc = nix_aq_context_read(rvu, nix_hw, &aq_req, &aq_rsp,
++                               req->hdr.pcifunc, ctype, req->qidx);
++      if (rc) {
++              dev_err(rvu->dev,
++                      "%s: Failed to fetch %s%d context of PFFUNC 0x%x\n",
++                      __func__, nix_get_ctx_name(ctype), req->qidx,
++                      req->hdr.pcifunc);
++              return rc;
++      }
++
++      /* Make copy of original context & mask which are required
++       * for resubmission
++       */
++      memcpy(&aq_req.cq_mask, &req->cq_mask, sizeof(struct nix_cq_ctx_s));
++      memcpy(&aq_req.cq, &req->cq, sizeof(struct nix_cq_ctx_s));
++
++      /* exclude fields which HW can update */
++      aq_req.cq_mask.cq_err       = 0;
++      aq_req.cq_mask.wrptr        = 0;
++      aq_req.cq_mask.tail         = 0;
++      aq_req.cq_mask.head         = 0;
++      aq_req.cq_mask.avg_level    = 0;
++      aq_req.cq_mask.update_time  = 0;
++      aq_req.cq_mask.substream    = 0;
++
++      /* Context mask (cq_mask) holds mask value of fields which
++       * are changed in AQ WRITE operation.
++       * for example cq.drop = 0xa;
++       *             cq_mask.drop = 0xff;
++       * Below logic performs '&' between cq and cq_mask so that non
++       * updated fields are masked out for request and response
++       * comparison
++       */
++      for (word = 0; word < sizeof(struct nix_cq_ctx_s) / sizeof(u64);
++           word++) {
++              *(u64 *)((u8 *)&aq_rsp.cq + word * 8) &=
++                      (*(u64 *)((u8 *)&aq_req.cq_mask + word * 8));
++              *(u64 *)((u8 *)&aq_req.cq + word * 8) &=
++                      (*(u64 *)((u8 *)&aq_req.cq_mask + word * 8));
++      }
++
++      if (memcmp(&aq_req.cq, &aq_rsp.cq, sizeof(struct nix_cq_ctx_s)))
++              return NIX_AF_ERR_AQ_CTX_RETRY_WRITE;
++
++      return 0;
++}
++
+ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req,
+                              struct nix_aq_enq_rsp *rsp)
+ {
+       struct nix_hw *nix_hw;
++      int err, retries = 5;
+       int blkaddr;
+ 
+       blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, req->hdr.pcifunc);
+@@ -1075,7 +1134,24 @@ static int rvu_nix_aq_enq_inst(struct rvu *rvu, struct nix_aq_enq_req *req,
+       if (!nix_hw)
+               return NIX_AF_ERR_INVALID_NIXBLK;
+ 
+-      return rvu_nix_blk_aq_enq_inst(rvu, nix_hw, req, rsp);
++retry:
++      err = rvu_nix_blk_aq_enq_inst(rvu, nix_hw, req, rsp);
++
++      /* HW errata 'AQ Modification to CQ could be discarded on heavy traffic'
++       * As a work around perfrom CQ context read after each AQ write. If AQ
++       * read shows AQ write is not updated perform AQ write again.
++       */
++      if (!err && req->op == NIX_AQ_INSTOP_WRITE) {
++              err = rvu_nix_verify_aq_ctx(rvu, nix_hw, req, NIX_AQ_CTYPE_CQ);
++              if (err == NIX_AF_ERR_AQ_CTX_RETRY_WRITE) {
++                      if (retries--)
++                              goto retry;
++                      else
++                              return NIX_AF_ERR_CQ_CTX_WRITE_ERR;
++              }
++      }
++
++      return err;
+ }
+ 
+ static const char *nix_get_ctx_name(int ctype)
+-- 
+2.34.1
+
diff --git a/queue-5.15/octeontx2-pf-cn10k-ensure-valid-pointers-are-freed-t.patch b/queue-5.15/octeontx2-pf-cn10k-ensure-valid-pointers-are-freed-t.patch

new file mode 100644 (file)

index 0000000..dc60a02
--- /dev/null
+++ b/queue-5.15/octeontx2-pf-cn10k-ensure-valid-pointers-are-freed-t.patch
@@ -0,0 +1,54 @@
+From 3ce849716925d71e73715c2cee9972c3643202cd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:43 +0530
+Subject: octeontx2-pf: cn10k: Ensure valid pointers are freed to aura
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit c5d731c54a17677939bd59ee8be4ed74d7485ba4 ]
+
+While freeing SQB pointers to aura, driver first memcpy to
+target address and then triggers lmtst operation to free pointer
+to the aura. We need to ensure(by adding dmb barrier)that memcpy
+is finished before pointers are freed to the aura. This patch also
+adds the missing sq context structure entry in debugfs.
+
+Fixes: ef6c8da71eaf ("octeontx2-pf: cn10K: Reserve LMTST lines per core")
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c  | 2 ++
+ drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h | 1 +
+ 2 files changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+index 49d822a98adab..f001579569a2b 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
++++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+@@ -1131,6 +1131,8 @@ static void print_nix_cn10k_sq_ctx(struct seq_file *m,
+       seq_printf(m, "W3: head_offset\t\t\t%d\nW3: smenq_next_sqb_vld\t\t%d\n\n",
+                  sq_ctx->head_offset, sq_ctx->smenq_next_sqb_vld);
+ 
++      seq_printf(m, "W3: smq_next_sq_vld\t\t%d\nW3: smq_pend\t\t\t%d\n",
++                 sq_ctx->smq_next_sq_vld, sq_ctx->smq_pend);
+       seq_printf(m, "W4: next_sqb \t\t\t%llx\n\n", sq_ctx->next_sqb);
+       seq_printf(m, "W5: tail_sqb \t\t\t%llx\n\n", sq_ctx->tail_sqb);
+       seq_printf(m, "W6: smenq_sqb \t\t\t%llx\n\n", sq_ctx->smenq_sqb);
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+index a51ecd771d075..637450de189c8 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+@@ -591,6 +591,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura,
+                       size++;
+               tar_addr |=  ((size - 1) & 0x7) << 4;
+       }
++      dma_wmb();
+       memcpy((u64 *)lmt_info->lmt_addr, ptrs, sizeof(u64) * num_ptrs);
+       /* Perform LMTST flush */
+       cn10k_lmt_flush(val, tar_addr);
+-- 
+2.34.1
+
diff --git a/queue-5.15/octeontx2-pf-forward-error-codes-to-vf.patch b/queue-5.15/octeontx2-pf-forward-error-codes-to-vf.patch

new file mode 100644 (file)

index 0000000..cc84e6c
--- /dev/null
+++ b/queue-5.15/octeontx2-pf-forward-error-codes-to-vf.patch
@@ -0,0 +1,48 @@
+From 447beaed92f63363ca25914e7f0202ba09659a38 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 12:04:46 +0530
+Subject: octeontx2-pf: Forward error codes to VF
+
+From: Subbaraya Sundeep <sbhatta@marvell.com>
+
+[ Upstream commit a8db854be28622a2477cb21cdf7f829adbb2c42d ]
+
+PF forwards its VF messages to AF and corresponding
+replies from AF to VF. AF sets proper error code in the
+replies after processing message requests. Currently PF
+checks the error codes in replies and sends invalid
+message to VF. This way VF lacks the information of
+error code set by AF for its messages. This patch
+changes that such that PF simply forwards AF replies
+so that VF can handle error codes.
+
+Fixes: d424b6c02415 ("octeontx2-pf: Enable SRIOV and added VF mbox handling")
+Signed-off-by: Subbaraya Sundeep <sbhatta@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index 53a3e8de1a51e..b1894d4045b8d 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -386,7 +386,12 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf,
+               dst_mdev->msg_size = mbox_hdr->msg_size;
+               dst_mdev->num_msgs = num_msgs;
+               err = otx2_sync_mbox_msg(dst_mbox);
+-              if (err) {
++              /* Error code -EIO indicate there is a communication failure
++               * to the AF. Rest of the error codes indicate that AF processed
++               * VF messages and set the error codes in response messages
++               * (if any) so simply forward responses to VF.
++               */
++              if (err == -EIO) {
+                       dev_warn(pf->dev,
+                                "AF not responding to VF%d messages\n", vf);
+                       /* restore PF mbase and exit */
+-- 
+2.34.1
+
diff --git a/queue-5.15/perf-fix-perf_event_read_local-time.patch b/queue-5.15/perf-fix-perf_event_read_local-time.patch

new file mode 100644 (file)

index 0000000..7f30536
--- /dev/null
+++ b/queue-5.15/perf-fix-perf_event_read_local-time.patch
@@ -0,0 +1,575 @@
+From 4059b9d5d31f6330507d7534fc11bb01670677b4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Dec 2021 13:19:52 +0100
+Subject: perf: Fix perf_event_read_local() time
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit 09f5e7dc7ad705289e1b1ec065439aa3c42951c4 ]
+
+Time readers that cannot take locks (due to NMI etc..) currently make
+use of perf_event::shadow_ctx_time, which, for that event gives:
+
+  time' = now + (time - timestamp)
+
+or, alternatively arranged:
+
+  time' = time + (now - timestamp)
+
+IOW, the progression of time since the last time the shadow_ctx_time
+was updated.
+
+There's problems with this:
+
+ A) the shadow_ctx_time is per-event, even though the ctx_time it
+    reflects is obviously per context. The direct concequence of this
+    is that the context needs to iterate all events all the time to
+    keep the shadow_ctx_time in sync.
+
+ B) even with the prior point, the context itself might not be active
+    meaning its time should not advance to begin with.
+
+ C) shadow_ctx_time isn't consistently updated when ctx_time is
+
+There are 3 users of this stuff, that suffer differently from this:
+
+ - calc_timer_values()
+   - perf_output_read()
+   - perf_event_update_userpage()      /* A */
+
+ - perf_event_read_local()             /* A,B */
+
+In particular, perf_output_read() doesn't suffer at all, because it's
+sample driven and hence only relevant when the event is actually
+running.
+
+This same was supposed to be true for perf_event_update_userpage(),
+after all self-monitoring implies the context is active *HOWEVER*, as
+per commit f79256532682 ("perf/core: fix userpage->time_enabled of
+inactive events") this goes wrong when combined with counter
+overcommit, in that case those events that do not get scheduled when
+the context becomes active (task events typically) miss out on the
+EVENT_TIME update and ENABLED time is inflated (for a little while)
+with the time the context was inactive. Once the event gets rotated
+in, this gets corrected, leading to a non-monotonic timeflow.
+
+perf_event_read_local() made things even worse, it can request time at
+any point, suffering all the problems perf_event_update_userpage()
+does and more. Because while perf_event_update_userpage() is limited
+by the context being active, perf_event_read_local() users have no
+such constraint.
+
+Therefore, completely overhaul things and do away with
+perf_event::shadow_ctx_time. Instead have regular context time updates
+keep track of this offset directly and provide perf_event_time_now()
+to complement perf_event_time().
+
+perf_event_time_now() will, in adition to being context wide, also
+take into account if the context is active. For inactive context, it
+will not advance time.
+
+This latter property means the cgroup perf_cgroup_info context needs
+to grow addition state to track this.
+
+Additionally, since all this is strictly per-cpu, we can use barrier()
+to order context activity vs context time.
+
+Fixes: 7d9285e82db5 ("perf/bpf: Extend the perf_event_read_local() interface, a.k.a. "bpf: perf event change needed for subsequent bpf helpers"")
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Song Liu <song@kernel.org>
+Tested-by: Namhyung Kim <namhyung@kernel.org>
+Link: https://lkml.kernel.org/r/YcB06DasOBtU0b00@hirez.programming.kicks-ass.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/perf_event.h |  15 +--
+ kernel/events/core.c       | 246 ++++++++++++++++++++++---------------
+ 2 files changed, 149 insertions(+), 112 deletions(-)
+
+diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
+index ae1f0c8b75623..6cce33e7e7acc 100644
+--- a/include/linux/perf_event.h
++++ b/include/linux/perf_event.h
+@@ -680,18 +680,6 @@ struct perf_event {
+       u64                             total_time_running;
+       u64                             tstamp;
+ 
+-      /*
+-       * timestamp shadows the actual context timing but it can
+-       * be safely used in NMI interrupt context. It reflects the
+-       * context time as it was when the event was last scheduled in,
+-       * or when ctx_sched_in failed to schedule the event because we
+-       * run out of PMC.
+-       *
+-       * ctx_time already accounts for ctx->timestamp. Therefore to
+-       * compute ctx_time for a sample, simply add perf_clock().
+-       */
+-      u64                             shadow_ctx_time;
+-
+       struct perf_event_attr          attr;
+       u16                             header_size;
+       u16                             id_header_size;
+@@ -838,6 +826,7 @@ struct perf_event_context {
+        */
+       u64                             time;
+       u64                             timestamp;
++      u64                             timeoffset;
+ 
+       /*
+        * These fields let us detect when two contexts have both
+@@ -920,6 +909,8 @@ struct bpf_perf_event_data_kern {
+ struct perf_cgroup_info {
+       u64                             time;
+       u64                             timestamp;
++      u64                             timeoffset;
++      int                             active;
+ };
+ 
+ struct perf_cgroup {
+diff --git a/kernel/events/core.c b/kernel/events/core.c
+index 0fe6a65bbd58f..0153f8f972834 100644
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -674,6 +674,23 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state)
+       WRITE_ONCE(event->state, state);
+ }
+ 
++/*
++ * UP store-release, load-acquire
++ */
++
++#define __store_release(ptr, val)                                     \
++do {                                                                  \
++      barrier();                                                      \
++      WRITE_ONCE(*(ptr), (val));                                      \
++} while (0)
++
++#define __load_acquire(ptr)                                           \
++({                                                                    \
++      __unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr));        \
++      barrier();                                                      \
++      ___p;                                                           \
++})
++
+ #ifdef CONFIG_CGROUP_PERF
+ 
+ static inline bool
+@@ -719,34 +736,51 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event)
+       return t->time;
+ }
+ 
+-static inline void __update_cgrp_time(struct perf_cgroup *cgrp)
++static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
+ {
+-      struct perf_cgroup_info *info;
+-      u64 now;
+-
+-      now = perf_clock();
++      struct perf_cgroup_info *t;
+ 
+-      info = this_cpu_ptr(cgrp->info);
++      t = per_cpu_ptr(event->cgrp->info, event->cpu);
++      if (!__load_acquire(&t->active))
++              return t->time;
++      now += READ_ONCE(t->timeoffset);
++      return now;
++}
+ 
+-      info->time += now - info->timestamp;
++static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv)
++{
++      if (adv)
++              info->time += now - info->timestamp;
+       info->timestamp = now;
++      /*
++       * see update_context_time()
++       */
++      WRITE_ONCE(info->timeoffset, info->time - info->timestamp);
+ }
+ 
+-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
++static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final)
+ {
+       struct perf_cgroup *cgrp = cpuctx->cgrp;
+       struct cgroup_subsys_state *css;
++      struct perf_cgroup_info *info;
+ 
+       if (cgrp) {
++              u64 now = perf_clock();
++
+               for (css = &cgrp->css; css; css = css->parent) {
+                       cgrp = container_of(css, struct perf_cgroup, css);
+-                      __update_cgrp_time(cgrp);
++                      info = this_cpu_ptr(cgrp->info);
++
++                      __update_cgrp_time(info, now, true);
++                      if (final)
++                              __store_release(&info->active, 0);
+               }
+       }
+ }
+ 
+ static inline void update_cgrp_time_from_event(struct perf_event *event)
+ {
++      struct perf_cgroup_info *info;
+       struct perf_cgroup *cgrp;
+ 
+       /*
+@@ -760,8 +794,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
+       /*
+        * Do not update time when cgroup is not active
+        */
+-      if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup))
+-              __update_cgrp_time(event->cgrp);
++      if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) {
++              info = this_cpu_ptr(event->cgrp->info);
++              __update_cgrp_time(info, perf_clock(), true);
++      }
+ }
+ 
+ static inline void
+@@ -785,7 +821,8 @@ perf_cgroup_set_timestamp(struct task_struct *task,
+       for (css = &cgrp->css; css; css = css->parent) {
+               cgrp = container_of(css, struct perf_cgroup, css);
+               info = this_cpu_ptr(cgrp->info);
+-              info->timestamp = ctx->timestamp;
++              __update_cgrp_time(info, ctx->timestamp, false);
++              __store_release(&info->active, 1);
+       }
+ }
+ 
+@@ -981,14 +1018,6 @@ out:
+       return ret;
+ }
+ 
+-static inline void
+-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
+-{
+-      struct perf_cgroup_info *t;
+-      t = per_cpu_ptr(event->cgrp->info, event->cpu);
+-      event->shadow_ctx_time = now - t->timestamp;
+-}
+-
+ static inline void
+ perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx)
+ {
+@@ -1066,7 +1095,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event)
+ {
+ }
+ 
+-static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx)
++static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx,
++                                              bool final)
+ {
+ }
+ 
+@@ -1098,12 +1128,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next)
+ {
+ }
+ 
+-static inline void
+-perf_cgroup_set_shadow_time(struct perf_event *event, u64 now)
++static inline u64 perf_cgroup_event_time(struct perf_event *event)
+ {
++      return 0;
+ }
+ 
+-static inline u64 perf_cgroup_event_time(struct perf_event *event)
++static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now)
+ {
+       return 0;
+ }
+@@ -1525,22 +1555,59 @@ static void perf_unpin_context(struct perf_event_context *ctx)
+ /*
+  * Update the record of the current time in a context.
+  */
+-static void update_context_time(struct perf_event_context *ctx)
++static void __update_context_time(struct perf_event_context *ctx, bool adv)
+ {
+       u64 now = perf_clock();
+ 
+-      ctx->time += now - ctx->timestamp;
++      if (adv)
++              ctx->time += now - ctx->timestamp;
+       ctx->timestamp = now;
++
++      /*
++       * The above: time' = time + (now - timestamp), can be re-arranged
++       * into: time` = now + (time - timestamp), which gives a single value
++       * offset to compute future time without locks on.
++       *
++       * See perf_event_time_now(), which can be used from NMI context where
++       * it's (obviously) not possible to acquire ctx->lock in order to read
++       * both the above values in a consistent manner.
++       */
++      WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp);
++}
++
++static void update_context_time(struct perf_event_context *ctx)
++{
++      __update_context_time(ctx, true);
+ }
+ 
+ static u64 perf_event_time(struct perf_event *event)
+ {
+       struct perf_event_context *ctx = event->ctx;
+ 
++      if (unlikely(!ctx))
++              return 0;
++
+       if (is_cgroup_event(event))
+               return perf_cgroup_event_time(event);
+ 
+-      return ctx ? ctx->time : 0;
++      return ctx->time;
++}
++
++static u64 perf_event_time_now(struct perf_event *event, u64 now)
++{
++      struct perf_event_context *ctx = event->ctx;
++
++      if (unlikely(!ctx))
++              return 0;
++
++      if (is_cgroup_event(event))
++              return perf_cgroup_event_time_now(event, now);
++
++      if (!(__load_acquire(&ctx->is_active) & EVENT_TIME))
++              return ctx->time;
++
++      now += READ_ONCE(ctx->timeoffset);
++      return now;
+ }
+ 
+ static enum event_type_t get_event_type(struct perf_event *event)
+@@ -2346,7 +2413,7 @@ __perf_remove_from_context(struct perf_event *event,
+ 
+       if (ctx->is_active & EVENT_TIME) {
+               update_context_time(ctx);
+-              update_cgrp_time_from_cpuctx(cpuctx);
++              update_cgrp_time_from_cpuctx(cpuctx, false);
+       }
+ 
+       event_sched_out(event, cpuctx, ctx);
+@@ -2357,6 +2424,9 @@ __perf_remove_from_context(struct perf_event *event,
+       list_del_event(event, ctx);
+ 
+       if (!ctx->nr_events && ctx->is_active) {
++              if (ctx == &cpuctx->ctx)
++                      update_cgrp_time_from_cpuctx(cpuctx, true);
++
+               ctx->is_active = 0;
+               ctx->rotate_necessary = 0;
+               if (ctx->task) {
+@@ -2478,40 +2548,6 @@ void perf_event_disable_inatomic(struct perf_event *event)
+       irq_work_queue(&event->pending);
+ }
+ 
+-static void perf_set_shadow_time(struct perf_event *event,
+-                               struct perf_event_context *ctx)
+-{
+-      /*
+-       * use the correct time source for the time snapshot
+-       *
+-       * We could get by without this by leveraging the
+-       * fact that to get to this function, the caller
+-       * has most likely already called update_context_time()
+-       * and update_cgrp_time_xx() and thus both timestamp
+-       * are identical (or very close). Given that tstamp is,
+-       * already adjusted for cgroup, we could say that:
+-       *    tstamp - ctx->timestamp
+-       * is equivalent to
+-       *    tstamp - cgrp->timestamp.
+-       *
+-       * Then, in perf_output_read(), the calculation would
+-       * work with no changes because:
+-       * - event is guaranteed scheduled in
+-       * - no scheduled out in between
+-       * - thus the timestamp would be the same
+-       *
+-       * But this is a bit hairy.
+-       *
+-       * So instead, we have an explicit cgroup call to remain
+-       * within the time source all along. We believe it
+-       * is cleaner and simpler to understand.
+-       */
+-      if (is_cgroup_event(event))
+-              perf_cgroup_set_shadow_time(event, event->tstamp);
+-      else
+-              event->shadow_ctx_time = event->tstamp - ctx->timestamp;
+-}
+-
+ #define MAX_INTERRUPTS (~0ULL)
+ 
+ static void perf_log_throttle(struct perf_event *event, int enable);
+@@ -2552,8 +2588,6 @@ event_sched_in(struct perf_event *event,
+ 
+       perf_pmu_disable(event->pmu);
+ 
+-      perf_set_shadow_time(event, ctx);
+-
+       perf_log_itrace_start(event);
+ 
+       if (event->pmu->add(event, PERF_EF_START)) {
+@@ -3247,16 +3281,6 @@ static void ctx_sched_out(struct perf_event_context *ctx,
+               return;
+       }
+ 
+-      ctx->is_active &= ~event_type;
+-      if (!(ctx->is_active & EVENT_ALL))
+-              ctx->is_active = 0;
+-
+-      if (ctx->task) {
+-              WARN_ON_ONCE(cpuctx->task_ctx != ctx);
+-              if (!ctx->is_active)
+-                      cpuctx->task_ctx = NULL;
+-      }
+-
+       /*
+        * Always update time if it was set; not only when it changes.
+        * Otherwise we can 'forget' to update time for any but the last
+@@ -3270,7 +3294,22 @@ static void ctx_sched_out(struct perf_event_context *ctx,
+       if (is_active & EVENT_TIME) {
+               /* update (and stop) ctx time */
+               update_context_time(ctx);
+-              update_cgrp_time_from_cpuctx(cpuctx);
++              update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx);
++              /*
++               * CPU-release for the below ->is_active store,
++               * see __load_acquire() in perf_event_time_now()
++               */
++              barrier();
++      }
++
++      ctx->is_active &= ~event_type;
++      if (!(ctx->is_active & EVENT_ALL))
++              ctx->is_active = 0;
++
++      if (ctx->task) {
++              WARN_ON_ONCE(cpuctx->task_ctx != ctx);
++              if (!ctx->is_active)
++                      cpuctx->task_ctx = NULL;
+       }
+ 
+       is_active ^= ctx->is_active; /* changed bits */
+@@ -3707,13 +3746,19 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx,
+       return 0;
+ }
+ 
++/*
++ * Because the userpage is strictly per-event (there is no concept of context,
++ * so there cannot be a context indirection), every userpage must be updated
++ * when context time starts :-(
++ *
++ * IOW, we must not miss EVENT_TIME edges.
++ */
+ static inline bool event_update_userpage(struct perf_event *event)
+ {
+       if (likely(!atomic_read(&event->mmap_count)))
+               return false;
+ 
+       perf_event_update_time(event);
+-      perf_set_shadow_time(event, event->ctx);
+       perf_event_update_userpage(event);
+ 
+       return true;
+@@ -3797,13 +3842,23 @@ ctx_sched_in(struct perf_event_context *ctx,
+            struct task_struct *task)
+ {
+       int is_active = ctx->is_active;
+-      u64 now;
+ 
+       lockdep_assert_held(&ctx->lock);
+ 
+       if (likely(!ctx->nr_events))
+               return;
+ 
++      if (is_active ^ EVENT_TIME) {
++              /* start ctx time */
++              __update_context_time(ctx, false);
++              perf_cgroup_set_timestamp(task, ctx);
++              /*
++               * CPU-release for the below ->is_active store,
++               * see __load_acquire() in perf_event_time_now()
++               */
++              barrier();
++      }
++
+       ctx->is_active |= (event_type | EVENT_TIME);
+       if (ctx->task) {
+               if (!is_active)
+@@ -3814,13 +3869,6 @@ ctx_sched_in(struct perf_event_context *ctx,
+ 
+       is_active ^= ctx->is_active; /* changed bits */
+ 
+-      if (is_active & EVENT_TIME) {
+-              /* start ctx time */
+-              now = perf_clock();
+-              ctx->timestamp = now;
+-              perf_cgroup_set_timestamp(task, ctx);
+-      }
+-
+       /*
+        * First go through the list and put on any pinned groups
+        * in order to give them the best chance of going on.
+@@ -4414,6 +4462,18 @@ static inline u64 perf_event_count(struct perf_event *event)
+       return local64_read(&event->count) + atomic64_read(&event->child_count);
+ }
+ 
++static void calc_timer_values(struct perf_event *event,
++                              u64 *now,
++                              u64 *enabled,
++                              u64 *running)
++{
++      u64 ctx_time;
++
++      *now = perf_clock();
++      ctx_time = perf_event_time_now(event, *now);
++      __perf_update_times(event, ctx_time, enabled, running);
++}
++
+ /*
+  * NMI-safe method to read a local event, that is an event that
+  * is:
+@@ -4473,10 +4533,9 @@ int perf_event_read_local(struct perf_event *event, u64 *value,
+ 
+       *value = local64_read(&event->count);
+       if (enabled || running) {
+-              u64 now = event->shadow_ctx_time + perf_clock();
+-              u64 __enabled, __running;
++              u64 __enabled, __running, __now;;
+ 
+-              __perf_update_times(event, now, &__enabled, &__running);
++              calc_timer_values(event, &__now, &__enabled, &__running);
+               if (enabled)
+                       *enabled = __enabled;
+               if (running)
+@@ -5798,18 +5857,6 @@ static int perf_event_index(struct perf_event *event)
+       return event->pmu->event_idx(event);
+ }
+ 
+-static void calc_timer_values(struct perf_event *event,
+-                              u64 *now,
+-                              u64 *enabled,
+-                              u64 *running)
+-{
+-      u64 ctx_time;
+-
+-      *now = perf_clock();
+-      ctx_time = event->shadow_ctx_time + *now;
+-      __perf_update_times(event, ctx_time, enabled, running);
+-}
+-
+ static void perf_event_init_userpage(struct perf_event *event)
+ {
+       struct perf_event_mmap_page *userpg;
+@@ -6349,7 +6396,6 @@ accounting:
+               ring_buffer_attach(event, rb);
+ 
+               perf_event_update_time(event);
+-              perf_set_shadow_time(event, event->ctx);
+               perf_event_init_userpage(event);
+               perf_event_update_userpage(event);
+       } else {
+-- 
+2.34.1
+
diff --git a/queue-5.15/phylib-fix-potential-use-after-free.patch b/queue-5.15/phylib-fix-potential-use-after-free.patch

new file mode 100644 (file)

index 0000000..569dcf2
--- /dev/null
+++ b/queue-5.15/phylib-fix-potential-use-after-free.patch
@@ -0,0 +1,58 @@
+From c7a186d60eebdacf2d9454601c5929562826e865 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jan 2022 17:27:48 +0100
+Subject: phylib: fix potential use-after-free
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Marek Behún <kabel@kernel.org>
+
+[ Upstream commit cbda1b16687580d5beee38273f6241ae3725960c ]
+
+Commit bafbdd527d56 ("phylib: Add device reset GPIO support") added call
+to phy_device_reset(phydev) after the put_device() call in phy_detach().
+
+The comment before the put_device() call says that the phydev might go
+away with put_device().
+
+Fix potential use-after-free by calling phy_device_reset() before
+put_device().
+
+Fixes: bafbdd527d56 ("phylib: Add device reset GPIO support")
+Signed-off-by: Marek Behún <kabel@kernel.org>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/20220119162748.32418-1-kabel@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/phy_device.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
+index 4f9990b47a377..28f4a383aba72 100644
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -1746,6 +1746,9 @@ void phy_detach(struct phy_device *phydev)
+           phy_driver_is_genphy_10g(phydev))
+               device_release_driver(&phydev->mdio.dev);
+ 
++      /* Assert the reset signal */
++      phy_device_reset(phydev, 1);
++
+       /*
+        * The phydev might go away on the put_device() below, so avoid
+        * a use-after-free bug by reading the underlying bus first.
+@@ -1757,9 +1760,6 @@ void phy_detach(struct phy_device *phydev)
+               ndev_owner = dev->dev.parent->driver->owner;
+       if (ndev_owner != bus->owner)
+               module_put(bus->owner);
+-
+-      /* Assert the reset signal */
+-      phy_device_reset(phydev, 1);
+ }
+ EXPORT_SYMBOL(phy_detach);
+ 
+-- 
+2.34.1
+
diff --git a/queue-5.15/powerpc-64s-mask-srr0-before-checking-against-the-ma.patch b/queue-5.15/powerpc-64s-mask-srr0-before-checking-against-the-ma.patch

new file mode 100644 (file)

index 0000000..b188e73
--- /dev/null
+++ b/queue-5.15/powerpc-64s-mask-srr0-before-checking-against-the-ma.patch
@@ -0,0 +1,59 @@
+From 7129d076879177c945633bc47433edbb47d7bcd6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jan 2022 23:44:03 +1000
+Subject: powerpc/64s: Mask SRR0 before checking against the masked NIP
+
+From: Nicholas Piggin <npiggin@gmail.com>
+
+[ Upstream commit aee101d7b95a03078945681dd7f7ea5e4a1e7686 ]
+
+Commit 314f6c23dd8d ("powerpc/64s: Mask NIP before checking against
+SRR0") masked off the low 2 bits of the NIP value in the interrupt
+stack frame in case they are non-zero and mis-compare against a SRR0
+register value of a CPU which always reads back 0 from the 2 low bits
+which are reserved.
+
+This now causes the opposite problem that an implementation which does
+implement those bits in SRR0 will mis-compare against the masked NIP
+value in which they have been cleared. QEMU is one such implementation,
+and this is allowed by the architecture.
+
+This can be triggered by sigfuz by setting low bits of PT_NIP in the
+signal context.
+
+Fix this for now by masking the SRR0 bits as well. Cleaner is probably
+to sanitise these values before putting them in registers or stack, but
+this is the quick and backportable fix.
+
+Fixes: 314f6c23dd8d ("powerpc/64s: Mask NIP before checking against SRR0")
+Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220117134403.2995059-1-npiggin@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/kernel/interrupt_64.S | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
+index 4b1ff94e67eb4..4c6d1a8dcefed 100644
+--- a/arch/powerpc/kernel/interrupt_64.S
++++ b/arch/powerpc/kernel/interrupt_64.S
+@@ -30,6 +30,7 @@ COMPAT_SYS_CALL_TABLE:
+       .ifc \srr,srr
+       mfspr   r11,SPRN_SRR0
+       ld      r12,_NIP(r1)
++      clrrdi  r11,r11,2
+       clrrdi  r12,r12,2
+ 100:  tdne    r11,r12
+       EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+@@ -40,6 +41,7 @@ COMPAT_SYS_CALL_TABLE:
+       .else
+       mfspr   r11,SPRN_HSRR0
+       ld      r12,_NIP(r1)
++      clrrdi  r11,r11,2
+       clrrdi  r12,r12,2
+ 100:  tdne    r11,r12
+       EMIT_WARN_ENTRY 100b,__FILE__,__LINE__,(BUGFLAG_WARNING | BUGFLAG_ONCE)
+-- 
+2.34.1
+
diff --git a/queue-5.15/powerpc64-bpf-limit-ldbrx-to-processors-compliant-wi.patch b/queue-5.15/powerpc64-bpf-limit-ldbrx-to-processors-compliant-wi.patch

new file mode 100644 (file)

index 0000000..2433ec2
--- /dev/null
+++ b/queue-5.15/powerpc64-bpf-limit-ldbrx-to-processors-compliant-wi.patch
@@ -0,0 +1,104 @@
+From c94c276e869eb8084d695854f48475aa0df1e018 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Jan 2022 17:15:12 +0530
+Subject: powerpc64/bpf: Limit 'ldbrx' to processors compliant with ISA v2.06
+
+From: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+
+[ Upstream commit 3f5f766d5f7f95a69a630da3544a1a0cee1cdddf ]
+
+Johan reported the below crash with test_bpf on ppc64 e5500:
+
+  test_bpf: #296 ALU_END_FROM_LE 64: 0x0123456789abcdef -> 0x67452301 jited:1
+  Oops: Exception in kernel mode, sig: 4 [#1]
+  BE PAGE_SIZE=4K SMP NR_CPUS=24 QEMU e500
+  Modules linked in: test_bpf(+)
+  CPU: 0 PID: 76 Comm: insmod Not tainted 5.14.0-03771-g98c2059e008a-dirty #1
+  NIP:  8000000000061c3c LR: 80000000006dea64 CTR: 8000000000061c18
+  REGS: c0000000032d3420 TRAP: 0700   Not tainted (5.14.0-03771-g98c2059e008a-dirty)
+  MSR:  0000000080089000 <EE,ME>  CR: 88002822  XER: 20000000 IRQMASK: 0
+  <...>
+  NIP [8000000000061c3c] 0x8000000000061c3c
+  LR [80000000006dea64] .__run_one+0x104/0x17c [test_bpf]
+  Call Trace:
+   .__run_one+0x60/0x17c [test_bpf] (unreliable)
+   .test_bpf_init+0x6a8/0xdc8 [test_bpf]
+   .do_one_initcall+0x6c/0x28c
+   .do_init_module+0x68/0x28c
+   .load_module+0x2460/0x2abc
+   .__do_sys_init_module+0x120/0x18c
+   .system_call_exception+0x110/0x1b8
+   system_call_common+0xf0/0x210
+  --- interrupt: c00 at 0x101d0acc
+  <...>
+  ---[ end trace 47b2bf19090bb3d0 ]---
+
+  Illegal instruction
+
+The illegal instruction turned out to be 'ldbrx' emitted for
+BPF_FROM_[L|B]E, which was only introduced in ISA v2.06. Guard use of
+the same and implement an alternative approach for older processors.
+
+Fixes: 156d0e290e969c ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF")
+Reported-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
+Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Tested-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
+Acked-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/d1e51c6fdf572062cf3009a751c3406bda01b832.1641468127.git.naveen.n.rao@linux.vnet.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/ppc-opcode.h |  1 +
+ arch/powerpc/net/bpf_jit_comp64.c     | 22 +++++++++++++---------
+ 2 files changed, 14 insertions(+), 9 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
+index baea657bc8687..bca31a61e57f8 100644
+--- a/arch/powerpc/include/asm/ppc-opcode.h
++++ b/arch/powerpc/include/asm/ppc-opcode.h
+@@ -498,6 +498,7 @@
+ #define PPC_RAW_LDX(r, base, b)               (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+ #define PPC_RAW_LHZ(r, base, i)               (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+ #define PPC_RAW_LHBRX(r, base, b)     (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
++#define PPC_RAW_LWBRX(r, base, b)     (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+ #define PPC_RAW_LDBRX(r, base, b)     (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+ #define PPC_RAW_STWCX(s, a, b)                (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+ #define PPC_RAW_CMPWI(a, i)           (0x2c000000 | ___PPC_RA(a) | IMM_L(i))
+diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
+index 95a337b5dc2b4..57e1b6680365c 100644
+--- a/arch/powerpc/net/bpf_jit_comp64.c
++++ b/arch/powerpc/net/bpf_jit_comp64.c
+@@ -633,17 +633,21 @@ bpf_alu32_trunc:
+                               EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
+                               break;
+                       case 64:
+-                              /*
+-                               * Way easier and faster(?) to store the value
+-                               * into stack and then use ldbrx
+-                               *
+-                               * ctx->seen will be reliable in pass2, but
+-                               * the instructions generated will remain the
+-                               * same across all passes
+-                               */
++                              /* Store the value to stack and then use byte-reverse loads */
+                               PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
+                               EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)));
+-                              EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
++                              if (cpu_has_feature(CPU_FTR_ARCH_206)) {
++                                      EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
++                              } else {
++                                      EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1]));
++                                      if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
++                                              EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32));
++                                      EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4));
++                                      EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1]));
++                                      if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
++                                              EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32));
++                                      EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2]));
++                              }
+                               break;
+                       }
+                       break;
+-- 
+2.34.1
+
diff --git a/queue-5.15/rxrpc-adjust-retransmission-backoff.patch b/queue-5.15/rxrpc-adjust-retransmission-backoff.patch

new file mode 100644 (file)

index 0000000..57a7672
--- /dev/null
+++ b/queue-5.15/rxrpc-adjust-retransmission-backoff.patch
@@ -0,0 +1,93 @@
+From 1cbb4005d9ae8dbb89b79eff4f521438ac714bf6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Jan 2022 23:12:58 +0000
+Subject: rxrpc: Adjust retransmission backoff
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 2c13c05c5ff4b9fc907b07f7311821910ebaaf8a ]
+
+Improve retransmission backoff by only backing off when we retransmit data
+packets rather than when we set the lost ack timer.
+
+To this end:
+
+ (1) In rxrpc_resend(), use rxrpc_get_rto_backoff() when setting the
+     retransmission timer and only tell it that we are retransmitting if we
+     actually have things to retransmit.
+
+     Note that it's possible for the retransmission algorithm to race with
+     the processing of a received ACK, so we may see no packets needing
+     retransmission.
+
+ (2) In rxrpc_send_data_packet(), don't bump the backoff when setting the
+     ack_lost_at timer, as it may then get bumped twice.
+
+With this, when looking at one particular packet, the retransmission
+intervals were seen to be 1.5ms, 2ms, 3ms, 5ms, 9ms, 17ms, 33ms, 71ms,
+136ms, 264ms, 544ms, 1.088s, 2.1s, 4.2s and 8.3s.
+
+Fixes: c410bf01933e ("rxrpc: Fix the excessive initial retransmission timeout")
+Suggested-by: Marc Dionne <marc.dionne@auristor.com>
+Signed-off-by: David Howells <dhowells@redhat.com>
+Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
+Tested-by: Marc Dionne <marc.dionne@auristor.com>
+cc: linux-afs@lists.infradead.org
+Link: https://lore.kernel.org/r/164138117069.2023386.17446904856843997127.stgit@warthog.procyon.org.uk/
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/rxrpc/call_event.c | 8 +++-----
+ net/rxrpc/output.c     | 2 +-
+ 2 files changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
+index 6be2672a65eab..df864e6922679 100644
+--- a/net/rxrpc/call_event.c
++++ b/net/rxrpc/call_event.c
+@@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
+ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+ {
+       struct sk_buff *skb;
+-      unsigned long resend_at, rto_j;
++      unsigned long resend_at;
+       rxrpc_seq_t cursor, seq, top;
+       ktime_t now, max_age, oldest, ack_ts;
+       int ix;
+@@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+ 
+       _enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
+ 
+-      rto_j = call->peer->rto_j;
+-
+       now = ktime_get_real();
+-      max_age = ktime_sub(now, jiffies_to_usecs(rto_j));
++      max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j));
+ 
+       spin_lock_bh(&call->lock);
+ 
+@@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j)
+       }
+ 
+       resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest)));
+-      resend_at += jiffies + rto_j;
++      resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans);
+       WRITE_ONCE(call->resend_at, resend_at);
+ 
+       if (unacked)
+diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
+index 10f2bf2e9068a..a45c83f22236e 100644
+--- a/net/rxrpc/output.c
++++ b/net/rxrpc/output.c
+@@ -468,7 +468,7 @@ done:
+                       if (call->peer->rtt_count > 1) {
+                               unsigned long nowj = jiffies, ack_lost_at;
+ 
+-                              ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans);
++                              ack_lost_at = rxrpc_get_rto_backoff(call->peer, false);
+                               ack_lost_at += nowj;
+                               WRITE_ONCE(call->ack_lost_at, ack_lost_at);
+                               rxrpc_reduce_call_timer(call, ack_lost_at, nowj,
+-- 
+2.34.1
+
diff --git a/queue-5.15/sched-pelt-relax-the-sync-of-util_sum-with-util_avg.patch b/queue-5.15/sched-pelt-relax-the-sync-of-util_sum-with-util_avg.patch

new file mode 100644 (file)

index 0000000..2b55450
--- /dev/null
+++ b/queue-5.15/sched-pelt-relax-the-sync-of-util_sum-with-util_avg.patch
@@ -0,0 +1,105 @@
+From b19759818555424de7d26fde6024f936d388f140 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jan 2022 14:46:56 +0100
+Subject: sched/pelt: Relax the sync of util_sum with util_avg
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+[ Upstream commit 98b0d890220d45418cfbc5157b3382e6da5a12ab ]
+
+Rick reported performance regressions in bugzilla because of cpu frequency
+being lower than before:
+    https://bugzilla.kernel.org/show_bug.cgi?id=215045
+
+He bisected the problem to:
+commit 1c35b07e6d39 ("sched/fair: Ensure _sum and _avg values stay consistent")
+
+This commit forces util_sum to be synced with the new util_avg after
+removing the contribution of a task and before the next periodic sync. By
+doing so util_sum is rounded to its lower bound and might lost up to
+LOAD_AVG_MAX-1 of accumulated contribution which has not yet been
+reflected in util_avg.
+
+Instead of always setting util_sum to the low bound of util_avg, which can
+significantly lower the utilization of root cfs_rq after propagating the
+change down into the hierarchy, we revert the change of util_sum and
+propagate the difference.
+
+In addition, we also check that cfs's util_sum always stays above the
+lower bound for a given util_avg as it has been observed that
+sched_entity's util_sum is sometimes above cfs one.
+
+Fixes: 1c35b07e6d39 ("sched/fair: Ensure _sum and _avg values stay consistent")
+Reported-by: Rick Yiu <rickyiu@google.com>
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
+Tested-by: Sachin Sant <sachinp@linux.ibm.com>
+Link: https://lkml.kernel.org/r/20220111134659.24961-2-vincent.guittot@linaro.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 16 +++++++++++++---
+ kernel/sched/pelt.h |  4 +++-
+ 2 files changed, 16 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index d41f966f5866a..6420580f2730b 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -3422,7 +3422,6 @@ void set_task_rq_fair(struct sched_entity *se,
+       se->avg.last_update_time = n_last_update_time;
+ }
+ 
+-
+ /*
+  * When on migration a sched_entity joins/leaves the PELT hierarchy, we need to
+  * propagate its contribution. The key to this propagation is the invariant
+@@ -3490,7 +3489,6 @@ void set_task_rq_fair(struct sched_entity *se,
+  * XXX: only do this for the part of runnable > running ?
+  *
+  */
+-
+ static inline void
+ update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
+ {
+@@ -3722,7 +3720,19 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
+ 
+               r = removed_util;
+               sub_positive(&sa->util_avg, r);
+-              sa->util_sum = sa->util_avg * divider;
++              sub_positive(&sa->util_sum, r * divider);
++              /*
++               * Because of rounding, se->util_sum might ends up being +1 more than
++               * cfs->util_sum. Although this is not a problem by itself, detaching
++               * a lot of tasks with the rounding problem between 2 updates of
++               * util_avg (~1ms) can make cfs->util_sum becoming null whereas
++               * cfs_util_avg is not.
++               * Check that util_sum is still above its lower bound for the new
++               * util_avg. Given that period_contrib might have moved since the last
++               * sync, we are only sure that util_sum must be above or equal to
++               *    util_avg * minimum possible divider
++               */
++              sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * PELT_MIN_DIVIDER);
+ 
+               r = removed_runnable;
+               sub_positive(&sa->runnable_avg, r);
+diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h
+index e06071bf3472c..c336f5f481bca 100644
+--- a/kernel/sched/pelt.h
++++ b/kernel/sched/pelt.h
+@@ -37,9 +37,11 @@ update_irq_load_avg(struct rq *rq, u64 running)
+ }
+ #endif
+ 
++#define PELT_MIN_DIVIDER      (LOAD_AVG_MAX - 1024)
++
+ static inline u32 get_pelt_divider(struct sched_avg *avg)
+ {
+-      return LOAD_AVG_MAX - 1024 + avg->period_contrib;
++      return PELT_MIN_DIVIDER + avg->period_contrib;
+ }
+ 
+ static inline void cfs_se_util_change(struct sched_avg *avg)
+-- 
+2.34.1
+
diff --git a/queue-5.15/selftests-mptcp-fix-ipv6-routing-setup.patch b/queue-5.15/selftests-mptcp-fix-ipv6-routing-setup.patch

new file mode 100644 (file)

index 0000000..e967596
--- /dev/null
+++ b/queue-5.15/selftests-mptcp-fix-ipv6-routing-setup.patch
@@ -0,0 +1,60 @@
+From e88ab3d1eeb91c03f3d87442859ad933f44f0107 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jan 2022 16:35:29 -0800
+Subject: selftests: mptcp: fix ipv6 routing setup
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 9846921dba4936d92f7608315b5d1e0a8ec3a538 ]
+
+MPJ ipv6 selftests currently lack per link route to the server
+net. Additionally, ipv6 subflows endpoints are created without any
+interface specified. The end-result is that in ipv6 self-tests
+subflows are created all on the same link, leading to expected delays
+and sporadic self-tests failures.
+
+Fix the issue by adding the missing setup bits.
+
+Fixes: 523514ed0a99 ("selftests: mptcp: add ADD_ADDR IPv6 test cases")
+Reported-and-tested-by: Geliang Tang <geliang.tang@suse.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/mptcp/mptcp_join.sh | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+index 0c12602fa22e8..38777d1ef766f 100755
+--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
++++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
+@@ -75,6 +75,7 @@ init()
+ 
+               # let $ns2 reach any $ns1 address from any interface
+               ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
++              ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i
+       done
+ }
+ 
+@@ -1386,7 +1387,7 @@ ipv6_tests()
+       reset
+       ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+       ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+-      ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
++      ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
+       run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
+       chk_join_nr "single subflow IPv6" 1 1 1
+ 
+@@ -1421,7 +1422,7 @@ ipv6_tests()
+       ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+       ip netns exec $ns1 ./pm_nl_ctl add dead:beef:2::1 flags signal
+       ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+-      ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 flags subflow
++      ip netns exec $ns2 ./pm_nl_ctl add dead:beef:3::2 dev ns2eth3 flags subflow
+       run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow
+       chk_join_nr "remove subflow and signal IPv6" 2 2 2
+       chk_add_nr 1 1
+-- 
+2.34.1
+
diff --git a/queue-5.15/series b/queue-5.15/series

index 9bd6f0ee4987147dbf3be2a3edd8036d9e644399..8fc7808ea4403b6b00233d68cb15897f172aeca0 100644 (file)
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -83,3 +83,40 @@ rpmsg-char-fix-race-between-the-release-of-rpmsg_ctrldev-and-cdev.patch
  rpmsg-char-fix-race-between-the-release-of-rpmsg_eptdev-and-cdev.patch
  scsi-elx-efct-don-t-use-gfp_kernel-under-spin-lock.patch
  scsi-bnx2fc-flush-destroy_work-queue-before-calling-bnx2fc_interface_put.patch
+nfs-ensure-the-server-has-an-up-to-date-ctime-before.patch
+nfs-ensure-the-server-has-an-up-to-date-ctime-before.patch-14723
+nfsv4-handle-case-where-the-lookup-of-a-directory-fa.patch
+nfsv4-nfs_atomic_open-can-race-when-looking-up-a-non.patch
+kvm-arm64-pkvm-use-the-mm_ops-indirection-for-cache-.patch
+sunrpc-use-bit-macro-in-rpc_show_xprt_state.patch
+sunrpc-don-t-dereference-xprt-snd_task-if-it-s-a-coo.patch
+powerpc64-bpf-limit-ldbrx-to-processors-compliant-wi.patch
+netfilter-conntrack-don-t-increment-invalid-counter-.patch
+powerpc-64s-mask-srr0-before-checking-against-the-ma.patch
+perf-fix-perf_event_read_local-time.patch
+sched-pelt-relax-the-sync-of-util_sum-with-util_avg.patch
+arm-9170-1-fix-panic-when-kasan-and-kprobe-are-enabl.patch
+net-fix-information-leakage-in-proc-net-ptype.patch
+net-phy-broadcom-hook-up-soft_reset-for-bcm54616s.patch
+ipv6_tunnel-rate-limit-warning-messages.patch
+net-stmmac-dwmac-visconti-fix-bit-definitions-for-et.patch
+net-stmmac-dwmac-visconti-fix-clock-configuration-fo.patch
+phylib-fix-potential-use-after-free.patch
+ipv6-annotate-accesses-to-fn-fn_sernum.patch
+mptcp-allow-changing-the-backup-bit-by-endpoint-id.patch
+mptcp-clean-up-harmless-false-expressions.patch
+mptcp-keep-track-of-local-endpoint-still-available-f.patch
+mptcp-fix-msk-traversal-in-mptcp_nl_cmd_set_flags.patch
+mptcp-fix-removing-ids-bitmap-setting.patch
+selftests-mptcp-fix-ipv6-routing-setup.patch
+octeontx2-af-do-not-fixup-all-vf-action-entries.patch
+octeontx2-af-fix-lbk-backpressure-id-count.patch
+octeontx2-af-retry-until-rvu-block-reset-complete.patch
+octeontx2-pf-cn10k-ensure-valid-pointers-are-freed-t.patch
+octeontx2-af-verify-cq-context-updates.patch
+octeontx2-af-increase-link-credit-restore-polling-ti.patch
+octeontx2-af-cn10k-do-not-enable-rpm-loopback-for-lp.patch
+octeontx2-pf-forward-error-codes-to-vf.patch
+rxrpc-adjust-retransmission-backoff.patch
+efi-libstub-arm64-fix-image-check-alignment-at-entry.patch
+io_uring-fix-bug-in-slow-unregistering-of-nodes.patch
diff --git a/queue-5.15/sunrpc-don-t-dereference-xprt-snd_task-if-it-s-a-coo.patch b/queue-5.15/sunrpc-don-t-dereference-xprt-snd_task-if-it-s-a-coo.patch

new file mode 100644 (file)

index 0000000..1d3310a
--- /dev/null
+++ b/queue-5.15/sunrpc-don-t-dereference-xprt-snd_task-if-it-s-a-coo.patch
@@ -0,0 +1,63 @@
+From 163d8c8237eecaebd61404f0f4f6b7ae73e969bd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jan 2022 12:20:36 -0500
+Subject: SUNRPC: Don't dereference xprt->snd_task if it's a cookie
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit aed28b7a2d620cb5cd0c554cb889075c02e25e8e ]
+
+Fixes: e26d9972720e ("SUNRPC: Clean up scheduling of autoclose")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/sunrpc.h | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
+index 312507cb341f4..daaf407e9e494 100644
+--- a/include/trace/events/sunrpc.h
++++ b/include/trace/events/sunrpc.h
+@@ -936,7 +936,8 @@ TRACE_EVENT(rpc_socket_nospace,
+               { BIT(XPRT_REMOVE),             "REMOVE" },             \
+               { BIT(XPRT_CONGESTED),          "CONGESTED" },          \
+               { BIT(XPRT_CWND_WAIT),          "CWND_WAIT" },          \
+-              { BIT(XPRT_WRITE_SPACE),        "WRITE_SPACE" })
++              { BIT(XPRT_WRITE_SPACE),        "WRITE_SPACE" },        \
++              { BIT(XPRT_SND_IS_COOKIE),      "SND_IS_COOKIE" })
+ 
+ DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class,
+       TP_PROTO(
+@@ -1133,8 +1134,11 @@ DECLARE_EVENT_CLASS(xprt_writelock_event,
+                       __entry->task_id = -1;
+                       __entry->client_id = -1;
+               }
+-              __entry->snd_task_id = xprt->snd_task ?
+-                                      xprt->snd_task->tk_pid : -1;
++              if (xprt->snd_task &&
++                  !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
++                      __entry->snd_task_id = xprt->snd_task->tk_pid;
++              else
++                      __entry->snd_task_id = -1;
+       ),
+ 
+       TP_printk("task:%u@%u snd_task:%u",
+@@ -1178,8 +1182,12 @@ DECLARE_EVENT_CLASS(xprt_cong_event,
+                       __entry->task_id = -1;
+                       __entry->client_id = -1;
+               }
+-              __entry->snd_task_id = xprt->snd_task ?
+-                                      xprt->snd_task->tk_pid : -1;
++              if (xprt->snd_task &&
++                  !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
++                      __entry->snd_task_id = xprt->snd_task->tk_pid;
++              else
++                      __entry->snd_task_id = -1;
++
+               __entry->cong = xprt->cong;
+               __entry->cwnd = xprt->cwnd;
+               __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state);
+-- 
+2.34.1
+
diff --git a/queue-5.15/sunrpc-use-bit-macro-in-rpc_show_xprt_state.patch b/queue-5.15/sunrpc-use-bit-macro-in-rpc_show_xprt_state.patch

new file mode 100644 (file)

index 0000000..cda1832
--- /dev/null
+++ b/queue-5.15/sunrpc-use-bit-macro-in-rpc_show_xprt_state.patch
@@ -0,0 +1,56 @@
+From e4e0ab17ef8c18fafa1713b3aeb1163368e612dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Oct 2021 18:02:38 -0400
+Subject: SUNRPC: Use BIT() macro in rpc_show_xprt_state()
+
+From: Chuck Lever <chuck.lever@oracle.com>
+
+[ Upstream commit 76497b1adb89175eee85afc437f08a68247314b3 ]
+
+Clean up: BIT() is preferred over open-coding the shift.
+
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/sunrpc.h | 24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
+index 2d04eb96d4183..312507cb341f4 100644
+--- a/include/trace/events/sunrpc.h
++++ b/include/trace/events/sunrpc.h
+@@ -925,18 +925,18 @@ TRACE_EVENT(rpc_socket_nospace,
+ 
+ #define rpc_show_xprt_state(x)                                                \
+       __print_flags(x, "|",                                           \
+-              { (1UL << XPRT_LOCKED),         "LOCKED"},              \
+-              { (1UL << XPRT_CONNECTED),      "CONNECTED"},           \
+-              { (1UL << XPRT_CONNECTING),     "CONNECTING"},          \
+-              { (1UL << XPRT_CLOSE_WAIT),     "CLOSE_WAIT"},          \
+-              { (1UL << XPRT_BOUND),          "BOUND"},               \
+-              { (1UL << XPRT_BINDING),        "BINDING"},             \
+-              { (1UL << XPRT_CLOSING),        "CLOSING"},             \
+-              { (1UL << XPRT_OFFLINE),        "OFFLINE"},             \
+-              { (1UL << XPRT_REMOVE),         "REMOVE"},              \
+-              { (1UL << XPRT_CONGESTED),      "CONGESTED"},           \
+-              { (1UL << XPRT_CWND_WAIT),      "CWND_WAIT"},           \
+-              { (1UL << XPRT_WRITE_SPACE),    "WRITE_SPACE"})
++              { BIT(XPRT_LOCKED),             "LOCKED" },             \
++              { BIT(XPRT_CONNECTED),          "CONNECTED" },          \
++              { BIT(XPRT_CONNECTING),         "CONNECTING" },         \
++              { BIT(XPRT_CLOSE_WAIT),         "CLOSE_WAIT" },         \
++              { BIT(XPRT_BOUND),              "BOUND" },              \
++              { BIT(XPRT_BINDING),            "BINDING" },            \
++              { BIT(XPRT_CLOSING),            "CLOSING" },            \
++              { BIT(XPRT_OFFLINE),            "OFFLINE" },            \
++              { BIT(XPRT_REMOVE),             "REMOVE" },             \
++              { BIT(XPRT_CONGESTED),          "CONGESTED" },          \
++              { BIT(XPRT_CWND_WAIT),          "CWND_WAIT" },          \
++              { BIT(XPRT_WRITE_SPACE),        "WRITE_SPACE" })
+ 
+ DECLARE_EVENT_CLASS(rpc_xprt_lifetime_class,
+       TP_PROTO(
+-- 
+2.34.1
+
author	Sasha Levin <sashal@kernel.org>
	Sun, 30 Jan 2022 15:27:50 +0000 (10:27 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Sun, 30 Jan 2022 15:27:50 +0000 (10:27 -0500)
queue-5.15/arm-9170-1-fix-panic-when-kasan-and-kprobe-are-enabl.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/efi-libstub-arm64-fix-image-check-alignment-at-entry.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ipv6-annotate-accesses-to-fn-fn_sernum.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/ipv6_tunnel-rate-limit-warning-messages.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-arm64-pkvm-use-the-mm_ops-indirection-for-cache-.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/mptcp-allow-changing-the-backup-bit-by-endpoint-id.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/mptcp-clean-up-harmless-false-expressions.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/mptcp-fix-msk-traversal-in-mptcp_nl_cmd_set_flags.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/mptcp-fix-removing-ids-bitmap-setting.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/mptcp-keep-track-of-local-endpoint-still-available-f.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-fix-information-leakage-in-proc-net-ptype.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-phy-broadcom-hook-up-soft_reset-for-bcm54616s.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-stmmac-dwmac-visconti-fix-bit-definitions-for-et.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/net-stmmac-dwmac-visconti-fix-clock-configuration-fo.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/netfilter-conntrack-don-t-increment-invalid-counter-.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/nfs-ensure-the-server-has-an-up-to-date-ctime-before.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/nfs-ensure-the-server-has-an-up-to-date-ctime-before.patch-14723	[new file with mode: 0644]	patch \| blob
queue-5.15/nfsv4-handle-case-where-the-lookup-of-a-directory-fa.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/nfsv4-nfs_atomic_open-can-race-when-looking-up-a-non.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/octeontx2-af-cn10k-do-not-enable-rpm-loopback-for-lp.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/octeontx2-af-do-not-fixup-all-vf-action-entries.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/octeontx2-af-fix-lbk-backpressure-id-count.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/octeontx2-af-increase-link-credit-restore-polling-ti.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/octeontx2-af-retry-until-rvu-block-reset-complete.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/octeontx2-af-verify-cq-context-updates.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/octeontx2-pf-cn10k-ensure-valid-pointers-are-freed-t.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/octeontx2-pf-forward-error-codes-to-vf.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/perf-fix-perf_event_read_local-time.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/phylib-fix-potential-use-after-free.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/powerpc-64s-mask-srr0-before-checking-against-the-ma.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/powerpc64-bpf-limit-ldbrx-to-processors-compliant-wi.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/rxrpc-adjust-retransmission-backoff.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/sched-pelt-relax-the-sync-of-util_sum-with-util_avg.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/selftests-mptcp-fix-ipv6-routing-setup.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series		patch \| blob \| blame \| history
queue-5.15/sunrpc-don-t-dereference-xprt-snd_task-if-it-s-a-coo.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/sunrpc-use-bit-macro-in-rpc_show_xprt_state.patch	[new file with mode: 0644]	patch \| blob