From: Sasha Levin Date: Mon, 4 Oct 2021 03:18:11 +0000 (-0400) Subject: Fixes for 5.14 X-Git-Tag: v4.4.286~48 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=797f8d4351ae2fac3f1ef1f6829d142b1250b1fa;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.14 Signed-off-by: Sasha Levin --- diff --git a/queue-5.14/af_unix-fix-races-in-sk_peer_pid-and-sk_peer_cred-ac.patch b/queue-5.14/af_unix-fix-races-in-sk_peer_pid-and-sk_peer_cred-ac.patch new file mode 100644 index 00000000000..54bb51b5150 --- /dev/null +++ b/queue-5.14/af_unix-fix-races-in-sk_peer_pid-and-sk_peer_cred-ac.patch @@ -0,0 +1,190 @@ +From 393f5055e6f1c122e58c0443c48de13ed5a845f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Sep 2021 15:57:50 -0700 +Subject: af_unix: fix races in sk_peer_pid and sk_peer_cred accesses + +From: Eric Dumazet + +[ Upstream commit 35306eb23814444bd4021f8a1c3047d3cb0c8b2b ] + +Jann Horn reported that SO_PEERCRED and SO_PEERGROUPS implementations +are racy, as af_unix can concurrently change sk_peer_pid and sk_peer_cred. + +In order to fix this issue, this patch adds a new spinlock that needs +to be used whenever these fields are read or written. + +Jann also pointed out that l2cap_sock_get_peer_pid_cb() is currently +reading sk->sk_peer_pid which makes no sense, as this field +is only possibly set by AF_UNIX sockets. +We will have to clean this in a separate patch. +This could be done by reverting b48596d1dc25 "Bluetooth: L2CAP: Add get_peer_pid callback" +or implementing what was truly expected. + +Fixes: 109f6e39fa07 ("af_unix: Allow SO_PEERCRED to work across namespaces.") +Signed-off-by: Eric Dumazet +Reported-by: Jann Horn +Cc: Eric W. Biederman +Cc: Luiz Augusto von Dentz +Cc: Marcel Holtmann +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + include/net/sock.h | 2 ++ + net/core/sock.c | 32 ++++++++++++++++++++++++++------ + net/unix/af_unix.c | 34 ++++++++++++++++++++++++++++------ + 3 files changed, 56 insertions(+), 12 deletions(-) + +diff --git a/include/net/sock.h b/include/net/sock.h +index 980b471b569d..db0cb8aa591f 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -487,8 +487,10 @@ struct sock { + u8 sk_prefer_busy_poll; + u16 sk_busy_poll_budget; + #endif ++ spinlock_t sk_peer_lock; + struct pid *sk_peer_pid; + const struct cred *sk_peer_cred; ++ + long sk_rcvtimeo; + ktime_t sk_stamp; + #if BITS_PER_LONG==32 +diff --git a/net/core/sock.c b/net/core/sock.c +index 1cf0edc79f37..bd1b34b3b778 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1366,6 +1366,16 @@ int sock_setsockopt(struct socket *sock, int level, int optname, + } + EXPORT_SYMBOL(sock_setsockopt); + ++static const struct cred *sk_get_peer_cred(struct sock *sk) ++{ ++ const struct cred *cred; ++ ++ spin_lock(&sk->sk_peer_lock); ++ cred = get_cred(sk->sk_peer_cred); ++ spin_unlock(&sk->sk_peer_lock); ++ ++ return cred; ++} + + static void cred_to_ucred(struct pid *pid, const struct cred *cred, + struct ucred *ucred) +@@ -1542,7 +1552,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname, + struct ucred peercred; + if (len > sizeof(peercred)) + len = sizeof(peercred); ++ ++ spin_lock(&sk->sk_peer_lock); + cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); ++ spin_unlock(&sk->sk_peer_lock); ++ + if (copy_to_user(optval, &peercred, len)) + return -EFAULT; + goto lenout; +@@ -1550,20 +1564,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname, + + case SO_PEERGROUPS: + { ++ const struct cred *cred; + int ret, n; + +- if (!sk->sk_peer_cred) ++ cred = sk_get_peer_cred(sk); ++ if (!cred) + return -ENODATA; + +- n = sk->sk_peer_cred->group_info->ngroups; ++ n = cred->group_info->ngroups; + if (len < n * sizeof(gid_t)) { + len = n * sizeof(gid_t); ++ put_cred(cred); + return put_user(len, optlen) ? -EFAULT : -ERANGE; + } + len = n * sizeof(gid_t); + +- ret = groups_to_user((gid_t __user *)optval, +- sk->sk_peer_cred->group_info); ++ ret = groups_to_user((gid_t __user *)optval, cred->group_info); ++ put_cred(cred); + if (ret) + return ret; + goto lenout; +@@ -1921,9 +1938,10 @@ static void __sk_destruct(struct rcu_head *head) + sk->sk_frag.page = NULL; + } + +- if (sk->sk_peer_cred) +- put_cred(sk->sk_peer_cred); ++ /* We do not need to acquire sk->sk_peer_lock, we are the last user. */ ++ put_cred(sk->sk_peer_cred); + put_pid(sk->sk_peer_pid); ++ + if (likely(sk->sk_net_refcnt)) + put_net(sock_net(sk)); + sk_prot_free(sk->sk_prot_creator, sk); +@@ -3124,6 +3142,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) + + sk->sk_peer_pid = NULL; + sk->sk_peer_cred = NULL; ++ spin_lock_init(&sk->sk_peer_lock); ++ + sk->sk_write_pending = 0; + sk->sk_rcvlowat = 1; + sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; +diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c +index 91ff09d833e8..f96ee27d9ff2 100644 +--- a/net/unix/af_unix.c ++++ b/net/unix/af_unix.c +@@ -600,20 +600,42 @@ static void unix_release_sock(struct sock *sk, int embrion) + + static void init_peercred(struct sock *sk) + { +- put_pid(sk->sk_peer_pid); +- if (sk->sk_peer_cred) +- put_cred(sk->sk_peer_cred); ++ const struct cred *old_cred; ++ struct pid *old_pid; ++ ++ spin_lock(&sk->sk_peer_lock); ++ old_pid = sk->sk_peer_pid; ++ old_cred = sk->sk_peer_cred; + sk->sk_peer_pid = get_pid(task_tgid(current)); + sk->sk_peer_cred = get_current_cred(); ++ spin_unlock(&sk->sk_peer_lock); ++ ++ put_pid(old_pid); ++ put_cred(old_cred); + } + + static void copy_peercred(struct sock *sk, struct sock *peersk) + { +- put_pid(sk->sk_peer_pid); +- if (sk->sk_peer_cred) +- put_cred(sk->sk_peer_cred); ++ const struct cred *old_cred; ++ struct pid *old_pid; ++ ++ if (sk < peersk) { ++ spin_lock(&sk->sk_peer_lock); ++ spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); ++ } else { ++ spin_lock(&peersk->sk_peer_lock); ++ spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); ++ } ++ old_pid = sk->sk_peer_pid; ++ old_cred = sk->sk_peer_cred; + sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); + sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); ++ ++ spin_unlock(&sk->sk_peer_lock); ++ spin_unlock(&peersk->sk_peer_lock); ++ ++ put_pid(old_pid); ++ put_cred(old_cred); + } + + static int unix_listen(struct socket *sock, int backlog) +-- +2.33.0 + diff --git a/queue-5.14/bpf-exempt-cap_bpf-from-checks-against-bpf_jit_limit.patch b/queue-5.14/bpf-exempt-cap_bpf-from-checks-against-bpf_jit_limit.patch new file mode 100644 index 00000000000..72c36466cfc --- /dev/null +++ b/queue-5.14/bpf-exempt-cap_bpf-from-checks-against-bpf_jit_limit.patch @@ -0,0 +1,41 @@ +From 3dbae7cae7027a5cc097a50a898cc8de38625b48 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 Sep 2021 12:11:52 +0100 +Subject: bpf: Exempt CAP_BPF from checks against bpf_jit_limit + +From: Lorenz Bauer + +[ Upstream commit 8a98ae12fbefdb583a7696de719a1d57e5e940a2 ] + +When introducing CAP_BPF, bpf_jit_charge_modmem() was not changed to treat +programs with CAP_BPF as privileged for the purpose of JIT memory allocation. +This means that a program without CAP_BPF can block a program with CAP_BPF +from loading a program. + +Fix this by checking bpf_capable() in bpf_jit_charge_modmem(). + +Fixes: 2c78ee898d8f ("bpf: Implement CAP_BPF") +Signed-off-by: Lorenz Bauer +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20210922111153.19843-1-lmb@cloudflare.com +Signed-off-by: Sasha Levin +--- + kernel/bpf/core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c +index 0a28a8095d3e..c019611fbc8f 100644 +--- a/kernel/bpf/core.c ++++ b/kernel/bpf/core.c +@@ -827,7 +827,7 @@ int bpf_jit_charge_modmem(u32 pages) + { + if (atomic_long_add_return(pages, &bpf_jit_current) > + (bpf_jit_limit >> PAGE_SHIFT)) { +- if (!capable(CAP_SYS_ADMIN)) { ++ if (!bpf_capable()) { + atomic_long_sub(pages, &bpf_jit_current); + return -EPERM; + } +-- +2.33.0 + diff --git a/queue-5.14/bpf-handle-return-value-of-bpf_prog_type_struct_ops-.patch b/queue-5.14/bpf-handle-return-value-of-bpf_prog_type_struct_ops-.patch new file mode 100644 index 00000000000..78711daf9a4 --- /dev/null +++ b/queue-5.14/bpf-handle-return-value-of-bpf_prog_type_struct_ops-.patch @@ -0,0 +1,202 @@ +From 002cc5e32dba549339a895f110e2f935e6f1bccb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Sep 2021 10:33:51 +0800 +Subject: bpf: Handle return value of BPF_PROG_TYPE_STRUCT_OPS prog + +From: Hou Tao + +[ Upstream commit 356ed64991c6847a0c4f2e8fa3b1133f7a14f1fc ] + +Currently if a function ptr in struct_ops has a return value, its +caller will get a random return value from it, because the return +value of related BPF_PROG_TYPE_STRUCT_OPS prog is just dropped. + +So adding a new flag BPF_TRAMP_F_RET_FENTRY_RET to tell bpf trampoline +to save and return the return value of struct_ops prog if ret_size of +the function ptr is greater than 0. Also restricting the flag to be +used alone. + +Fixes: 85d33df357b6 ("bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS") +Signed-off-by: Hou Tao +Signed-off-by: Alexei Starovoitov +Acked-by: Martin KaFai Lau +Link: https://lore.kernel.org/bpf/20210914023351.3664499-1-houtao1@huawei.com +Signed-off-by: Sasha Levin +--- + arch/x86/net/bpf_jit_comp.c | 53 ++++++++++++++++++++++++++++--------- + include/linux/bpf.h | 2 ++ + kernel/bpf/bpf_struct_ops.c | 7 +++-- + 3 files changed, 47 insertions(+), 15 deletions(-) + +diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c +index 16d76f814e9b..47780844598a 100644 +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -1744,7 +1744,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, + } + + static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, +- struct bpf_prog *p, int stack_size, bool mod_ret) ++ struct bpf_prog *p, int stack_size, bool save_ret) + { + u8 *prog = *pprog; + u8 *jmp_insn; +@@ -1777,11 +1777,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, + if (emit_call(&prog, p->bpf_func, prog)) + return -EINVAL; + +- /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return ++ /* ++ * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return + * of the previous call which is then passed on the stack to + * the next BPF program. ++ * ++ * BPF_TRAMP_FENTRY trampoline may need to return the return ++ * value of BPF_PROG_TYPE_STRUCT_OPS prog. + */ +- if (mod_ret) ++ if (save_ret) + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + + /* replace 2 nops with JE insn, since jmp target is known */ +@@ -1828,13 +1832,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) + } + + static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, +- struct bpf_tramp_progs *tp, int stack_size) ++ struct bpf_tramp_progs *tp, int stack_size, ++ bool save_ret) + { + int i; + u8 *prog = *pprog; + + for (i = 0; i < tp->nr_progs; i++) { +- if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) ++ if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, ++ save_ret)) + return -EINVAL; + } + *pprog = prog; +@@ -1877,6 +1883,23 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, + return 0; + } + ++static bool is_valid_bpf_tramp_flags(unsigned int flags) ++{ ++ if ((flags & BPF_TRAMP_F_RESTORE_REGS) && ++ (flags & BPF_TRAMP_F_SKIP_FRAME)) ++ return false; ++ ++ /* ++ * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops, ++ * and it must be used alone. ++ */ ++ if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) && ++ (flags & ~BPF_TRAMP_F_RET_FENTRY_RET)) ++ return false; ++ ++ return true; ++} ++ + /* Example: + * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); + * its 'struct btf_func_model' will be nr_args=2 +@@ -1949,17 +1972,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i + struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; + u8 **branches = NULL; + u8 *prog; ++ bool save_ret; + + /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ + if (nr_args > 6) + return -ENOTSUPP; + +- if ((flags & BPF_TRAMP_F_RESTORE_REGS) && +- (flags & BPF_TRAMP_F_SKIP_FRAME)) ++ if (!is_valid_bpf_tramp_flags(flags)) + return -EINVAL; + +- if (flags & BPF_TRAMP_F_CALL_ORIG) +- stack_size += 8; /* room for return value of orig_call */ ++ /* room for return value of orig_call or fentry prog */ ++ save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); ++ if (save_ret) ++ stack_size += 8; + + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* skip patched call instruction and point orig_call to actual +@@ -1986,7 +2011,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i + } + + if (fentry->nr_progs) +- if (invoke_bpf(m, &prog, fentry, stack_size)) ++ if (invoke_bpf(m, &prog, fentry, stack_size, ++ flags & BPF_TRAMP_F_RET_FENTRY_RET)) + return -EINVAL; + + if (fmod_ret->nr_progs) { +@@ -2033,7 +2059,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i + } + + if (fexit->nr_progs) +- if (invoke_bpf(m, &prog, fexit, stack_size)) { ++ if (invoke_bpf(m, &prog, fexit, stack_size, false)) { + ret = -EINVAL; + goto cleanup; + } +@@ -2053,9 +2079,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i + ret = -EINVAL; + goto cleanup; + } +- /* restore original return value back into RAX */ +- emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); + } ++ /* restore return value of orig_call or fentry prog back into RAX */ ++ if (save_ret) ++ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); + + EMIT1(0x5B); /* pop rbx */ + EMIT1(0xC9); /* leave */ +diff --git a/include/linux/bpf.h b/include/linux/bpf.h +index e8e2b0393ca9..11da5671d4f0 100644 +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -553,6 +553,8 @@ struct btf_func_model { + * programs only. Should not be used with normal calls and indirect calls. + */ + #define BPF_TRAMP_F_SKIP_FRAME BIT(2) ++/* Return the return value of fentry prog. Only used by bpf_struct_ops. */ ++#define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) + + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 + * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 +diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c +index 70f6fd4fa305..2ce17447fb76 100644 +--- a/kernel/bpf/bpf_struct_ops.c ++++ b/kernel/bpf/bpf_struct_ops.c +@@ -367,6 +367,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, + const struct btf_type *mtype, *ptype; + struct bpf_prog *prog; + u32 moff; ++ u32 flags; + + moff = btf_member_bit_offset(t, member) / 8; + ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); +@@ -430,10 +431,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, + + tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; + tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; ++ flags = st_ops->func_models[i].ret_size > 0 ? ++ BPF_TRAMP_F_RET_FENTRY_RET : 0; + err = arch_prepare_bpf_trampoline(NULL, image, + st_map->image + PAGE_SIZE, +- &st_ops->func_models[i], 0, +- tprogs, NULL); ++ &st_ops->func_models[i], ++ flags, tprogs, NULL); + if (err < 0) + goto reset_unlock; + +-- +2.33.0 + diff --git a/queue-5.14/bpf-mips-validate-conditional-branch-offsets.patch b/queue-5.14/bpf-mips-validate-conditional-branch-offsets.patch new file mode 100644 index 00000000000..4d656020cc2 --- /dev/null +++ b/queue-5.14/bpf-mips-validate-conditional-branch-offsets.patch @@ -0,0 +1,271 @@ +From 260a959fca4dd51cd5c5f8a97cedf6a64b564b53 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Sep 2021 17:04:37 +0100 +Subject: bpf, mips: Validate conditional branch offsets + +From: Piotr Krysiuk + +[ Upstream commit 37cb28ec7d3a36a5bace7063a3dba633ab110f8b ] + +The conditional branch instructions on MIPS use 18-bit signed offsets +allowing for a branch range of 128 KBytes (backward and forward). +However, this limit is not observed by the cBPF JIT compiler, and so +the JIT compiler emits out-of-range branches when translating certain +cBPF programs. A specific example of such a cBPF program is included in +the "BPF_MAXINSNS: exec all MSH" test from lib/test_bpf.c that executes +anomalous machine code containing incorrect branch offsets under JIT. + +Furthermore, this issue can be abused to craft undesirable machine +code, where the control flow is hijacked to execute arbitrary Kernel +code. + +The following steps can be used to reproduce the issue: + + # echo 1 > /proc/sys/net/core/bpf_jit_enable + # modprobe test_bpf test_name="BPF_MAXINSNS: exec all MSH" + +This should produce multiple warnings from build_bimm() similar to: + + ------------[ cut here ]------------ + WARNING: CPU: 0 PID: 209 at arch/mips/mm/uasm-mips.c:210 build_insn+0x558/0x590 + Micro-assembler field overflow + Modules linked in: test_bpf(+) + CPU: 0 PID: 209 Comm: modprobe Not tainted 5.14.3 #1 + Stack : 00000000 807bb824 82b33c9c 801843c0 00000000 00000004 00000000 63c9b5ee + 82b33af4 80999898 80910000 80900000 82fd6030 00000001 82b33a98 82087180 + 00000000 00000000 80873b28 00000000 000000fc 82b3394c 00000000 2e34312e + 6d6d6f43 809a180f 809a1836 6f6d203a 80900000 00000001 82b33bac 80900000 + 00027f80 00000000 00000000 807bb824 00000000 804ed790 001cc317 00000001 + [...] + Call Trace: + [<80108f44>] show_stack+0x38/0x118 + [<807a7aac>] dump_stack_lvl+0x5c/0x7c + [<807a4b3c>] __warn+0xcc/0x140 + [<807a4c3c>] warn_slowpath_fmt+0x8c/0xb8 + [<8011e198>] build_insn+0x558/0x590 + [<8011e358>] uasm_i_bne+0x20/0x2c + [<80127b48>] build_body+0xa58/0x2a94 + [<80129c98>] bpf_jit_compile+0x114/0x1e4 + [<80613fc4>] bpf_prepare_filter+0x2ec/0x4e4 + [<8061423c>] bpf_prog_create+0x80/0xc4 + [] test_bpf_init+0x300/0xba8 [test_bpf] + [<8010051c>] do_one_initcall+0x50/0x1d4 + [<801c5e54>] do_init_module+0x60/0x220 + [<801c8b20>] sys_finit_module+0xc4/0xfc + [<801144d0>] syscall_common+0x34/0x58 + [...] + ---[ end trace a287d9742503c645 ]--- + +Then the anomalous machine code executes: + +=> 0xc0a18000: addiu sp,sp,-16 + 0xc0a18004: sw s3,0(sp) + 0xc0a18008: sw s4,4(sp) + 0xc0a1800c: sw s5,8(sp) + 0xc0a18010: sw ra,12(sp) + 0xc0a18014: move s5,a0 + 0xc0a18018: move s4,zero + 0xc0a1801c: move s3,zero + + # __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0) + 0xc0a18020: lui t6,0x8012 + 0xc0a18024: ori t4,t6,0x9e14 + 0xc0a18028: li a1,0 + 0xc0a1802c: jalr t4 + 0xc0a18030: move a0,s5 + 0xc0a18034: bnez v0,0xc0a1ffb8 # incorrect branch offset + 0xc0a18038: move v0,zero + 0xc0a1803c: andi s4,s3,0xf + 0xc0a18040: b 0xc0a18048 + 0xc0a18044: sll s4,s4,0x2 + [...] + + # __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0) + 0xc0a1ffa0: lui t6,0x8012 + 0xc0a1ffa4: ori t4,t6,0x9e14 + 0xc0a1ffa8: li a1,0 + 0xc0a1ffac: jalr t4 + 0xc0a1ffb0: move a0,s5 + 0xc0a1ffb4: bnez v0,0xc0a1ffb8 # incorrect branch offset + 0xc0a1ffb8: move v0,zero + 0xc0a1ffbc: andi s4,s3,0xf + 0xc0a1ffc0: b 0xc0a1ffc8 + 0xc0a1ffc4: sll s4,s4,0x2 + + # __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0) + 0xc0a1ffc8: lui t6,0x8012 + 0xc0a1ffcc: ori t4,t6,0x9e14 + 0xc0a1ffd0: li a1,0 + 0xc0a1ffd4: jalr t4 + 0xc0a1ffd8: move a0,s5 + 0xc0a1ffdc: bnez v0,0xc0a3ffb8 # correct branch offset + 0xc0a1ffe0: move v0,zero + 0xc0a1ffe4: andi s4,s3,0xf + 0xc0a1ffe8: b 0xc0a1fff0 + 0xc0a1ffec: sll s4,s4,0x2 + [...] + + # epilogue + 0xc0a3ffb8: lw s3,0(sp) + 0xc0a3ffbc: lw s4,4(sp) + 0xc0a3ffc0: lw s5,8(sp) + 0xc0a3ffc4: lw ra,12(sp) + 0xc0a3ffc8: addiu sp,sp,16 + 0xc0a3ffcc: jr ra + 0xc0a3ffd0: nop + +To mitigate this issue, we assert the branch ranges for each emit call +that could generate an out-of-range branch. + +Fixes: 36366e367ee9 ("MIPS: BPF: Restore MIPS32 cBPF JIT") +Fixes: c6610de353da ("MIPS: net: Add BPF JIT") +Signed-off-by: Piotr Krysiuk +Signed-off-by: Daniel Borkmann +Tested-by: Johan Almbladh +Acked-by: Johan Almbladh +Cc: Paul Burton +Cc: Thomas Bogendoerfer +Link: https://lore.kernel.org/bpf/20210915160437.4080-1-piotras@gmail.com +Signed-off-by: Sasha Levin +--- + arch/mips/net/bpf_jit.c | 57 +++++++++++++++++++++++++++++++---------- + 1 file changed, 43 insertions(+), 14 deletions(-) + +diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c +index 0af88622c619..cb6d22439f71 100644 +--- a/arch/mips/net/bpf_jit.c ++++ b/arch/mips/net/bpf_jit.c +@@ -662,6 +662,11 @@ static void build_epilogue(struct jit_ctx *ctx) + ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ + func##_positive) + ++static bool is_bad_offset(int b_off) ++{ ++ return b_off > 0x1ffff || b_off < -0x20000; ++} ++ + static int build_body(struct jit_ctx *ctx) + { + const struct bpf_prog *prog = ctx->skf; +@@ -728,7 +733,10 @@ static int build_body(struct jit_ctx *ctx) + /* Load return register on DS for failures */ + emit_reg_move(r_ret, r_zero, ctx); + /* Return with error */ +- emit_b(b_imm(prog->len, ctx), ctx); ++ b_off = b_imm(prog->len, ctx); ++ if (is_bad_offset(b_off)) ++ return -E2BIG; ++ emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_LD | BPF_W | BPF_IND: +@@ -775,8 +783,10 @@ static int build_body(struct jit_ctx *ctx) + emit_jalr(MIPS_R_RA, r_s0, ctx); + emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ + /* Check the error value */ +- emit_bcond(MIPS_COND_NE, r_ret, 0, +- b_imm(prog->len, ctx), ctx); ++ b_off = b_imm(prog->len, ctx); ++ if (is_bad_offset(b_off)) ++ return -E2BIG; ++ emit_bcond(MIPS_COND_NE, r_ret, 0, b_off, ctx); + emit_reg_move(r_ret, r_zero, ctx); + /* We are good */ + /* X <- P[1:K] & 0xf */ +@@ -855,8 +865,10 @@ static int build_body(struct jit_ctx *ctx) + /* A /= X */ + ctx->flags |= SEEN_X | SEEN_A; + /* Check if r_X is zero */ +- emit_bcond(MIPS_COND_EQ, r_X, r_zero, +- b_imm(prog->len, ctx), ctx); ++ b_off = b_imm(prog->len, ctx); ++ if (is_bad_offset(b_off)) ++ return -E2BIG; ++ emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); + emit_load_imm(r_ret, 0, ctx); /* delay slot */ + emit_div(r_A, r_X, ctx); + break; +@@ -864,8 +876,10 @@ static int build_body(struct jit_ctx *ctx) + /* A %= X */ + ctx->flags |= SEEN_X | SEEN_A; + /* Check if r_X is zero */ +- emit_bcond(MIPS_COND_EQ, r_X, r_zero, +- b_imm(prog->len, ctx), ctx); ++ b_off = b_imm(prog->len, ctx); ++ if (is_bad_offset(b_off)) ++ return -E2BIG; ++ emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); + emit_load_imm(r_ret, 0, ctx); /* delay slot */ + emit_mod(r_A, r_X, ctx); + break; +@@ -926,7 +940,10 @@ static int build_body(struct jit_ctx *ctx) + break; + case BPF_JMP | BPF_JA: + /* pc += K */ +- emit_b(b_imm(i + k + 1, ctx), ctx); ++ b_off = b_imm(i + k + 1, ctx); ++ if (is_bad_offset(b_off)) ++ return -E2BIG; ++ emit_b(b_off, ctx); + emit_nop(ctx); + break; + case BPF_JMP | BPF_JEQ | BPF_K: +@@ -1056,12 +1073,16 @@ static int build_body(struct jit_ctx *ctx) + break; + case BPF_RET | BPF_A: + ctx->flags |= SEEN_A; +- if (i != prog->len - 1) ++ if (i != prog->len - 1) { + /* + * If this is not the last instruction + * then jump to the epilogue + */ +- emit_b(b_imm(prog->len, ctx), ctx); ++ b_off = b_imm(prog->len, ctx); ++ if (is_bad_offset(b_off)) ++ return -E2BIG; ++ emit_b(b_off, ctx); ++ } + emit_reg_move(r_ret, r_A, ctx); /* delay slot */ + break; + case BPF_RET | BPF_K: +@@ -1075,7 +1096,10 @@ static int build_body(struct jit_ctx *ctx) + * If this is not the last instruction + * then jump to the epilogue + */ +- emit_b(b_imm(prog->len, ctx), ctx); ++ b_off = b_imm(prog->len, ctx); ++ if (is_bad_offset(b_off)) ++ return -E2BIG; ++ emit_b(b_off, ctx); + emit_nop(ctx); + } + break; +@@ -1133,8 +1157,10 @@ static int build_body(struct jit_ctx *ctx) + /* Load *dev pointer */ + emit_load_ptr(r_s0, r_skb, off, ctx); + /* error (0) in the delay slot */ +- emit_bcond(MIPS_COND_EQ, r_s0, r_zero, +- b_imm(prog->len, ctx), ctx); ++ b_off = b_imm(prog->len, ctx); ++ if (is_bad_offset(b_off)) ++ return -E2BIG; ++ emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_off, ctx); + emit_reg_move(r_ret, r_zero, ctx); + if (code == (BPF_ANC | SKF_AD_IFINDEX)) { + BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4); +@@ -1244,7 +1270,10 @@ void bpf_jit_compile(struct bpf_prog *fp) + + /* Generate the actual JIT code */ + build_prologue(&ctx); +- build_body(&ctx); ++ if (build_body(&ctx)) { ++ module_memfree(ctx.target); ++ goto out; ++ } + build_epilogue(&ctx); + + /* Update the icache */ +-- +2.33.0 + diff --git a/queue-5.14/bpf-x86-fix-bpf-mapping-of-atomic-fetch-implementati.patch b/queue-5.14/bpf-x86-fix-bpf-mapping-of-atomic-fetch-implementati.patch new file mode 100644 index 00000000000..f9dd051162e --- /dev/null +++ b/queue-5.14/bpf-x86-fix-bpf-mapping-of-atomic-fetch-implementati.patch @@ -0,0 +1,157 @@ +From b1a7c61ec2a30524dcc52b0dcbe136c3f0d0e0bb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Sep 2021 13:11:57 +0000 +Subject: bpf, x86: Fix bpf mapping of atomic fetch implementation + +From: Johan Almbladh + +[ Upstream commit ced185824c89b60e65b5a2606954c098320cdfb8 ] + +Fix the case where the dst register maps to %rax as otherwise this produces +an incorrect mapping with the implementation in 981f94c3e921 ("bpf: Add +bitwise atomic instructions") as %rax is clobbered given it's part of the +cmpxchg as operand. + +The issue is similar to b29dd96b905f ("bpf, x86: Fix BPF_FETCH atomic and/or/ +xor with r0 as src") just that the case of dst register was missed. + +Before, dst=r0 (%rax) src=r2 (%rsi): + + [...] + c5: mov %rax,%r10 + c8: mov 0x0(%rax),%rax <---+ (broken) + cc: mov %rax,%r11 | + cf: and %rsi,%r11 | + d2: lock cmpxchg %r11,0x0(%rax) <---+ + d8: jne 0x00000000000000c8 | + da: mov %rax,%rsi | + dd: mov %r10,%rax | + [...] | + | +After, dst=r0 (%rax) src=r2 (%rsi): | + | + [...] | + da: mov %rax,%r10 | + dd: mov 0x0(%r10),%rax <---+ (fixed) + e1: mov %rax,%r11 | + e4: and %rsi,%r11 | + e7: lock cmpxchg %r11,0x0(%r10) <---+ + ed: jne 0x00000000000000dd + ef: mov %rax,%rsi + f2: mov %r10,%rax + [...] + +The remaining combinations were fine as-is though: + +After, dst=r9 (%r15) src=r0 (%rax): + + [...] + dc: mov %rax,%r10 + df: mov 0x0(%r15),%rax + e3: mov %rax,%r11 + e6: and %r10,%r11 + e9: lock cmpxchg %r11,0x0(%r15) + ef: jne 0x00000000000000df _ + f1: mov %rax,%r10 | (unneeded, but + f4: mov %r10,%rax _| not a problem) + [...] + +After, dst=r9 (%r15) src=r4 (%rcx): + + [...] + de: mov %rax,%r10 + e1: mov 0x0(%r15),%rax + e5: mov %rax,%r11 + e8: and %rcx,%r11 + eb: lock cmpxchg %r11,0x0(%r15) + f1: jne 0x00000000000000e1 + f3: mov %rax,%rcx + f6: mov %r10,%rax + [...] + +The case of dst == src register is rejected by the verifier and +therefore not supported, but x86 JIT also handles this case just +fine. + +After, dst=r0 (%rax) src=r0 (%rax): + + [...] + eb: mov %rax,%r10 + ee: mov 0x0(%r10),%rax + f2: mov %rax,%r11 + f5: and %r10,%r11 + f8: lock cmpxchg %r11,0x0(%r10) + fe: jne 0x00000000000000ee + 100: mov %rax,%r10 + 103: mov %r10,%rax + [...] + +Fixes: 981f94c3e921 ("bpf: Add bitwise atomic instructions") +Reported-by: Johan Almbladh +Signed-off-by: Johan Almbladh +Co-developed-by: Daniel Borkmann +Signed-off-by: Daniel Borkmann +Reviewed-by: Brendan Jackman +Acked-by: Alexei Starovoitov +Signed-off-by: Sasha Levin +--- + arch/x86/net/bpf_jit_comp.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c +index 47780844598a..ffcc4d29ad50 100644 +--- a/arch/x86/net/bpf_jit_comp.c ++++ b/arch/x86/net/bpf_jit_comp.c +@@ -1341,9 +1341,10 @@ st: if (is_imm8(insn->off)) + if (insn->imm == (BPF_AND | BPF_FETCH) || + insn->imm == (BPF_OR | BPF_FETCH) || + insn->imm == (BPF_XOR | BPF_FETCH)) { +- u8 *branch_target; + bool is64 = BPF_SIZE(insn->code) == BPF_DW; + u32 real_src_reg = src_reg; ++ u32 real_dst_reg = dst_reg; ++ u8 *branch_target; + + /* + * Can't be implemented with a single x86 insn. +@@ -1354,11 +1355,13 @@ st: if (is_imm8(insn->off)) + emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0); + if (src_reg == BPF_REG_0) + real_src_reg = BPF_REG_AX; ++ if (dst_reg == BPF_REG_0) ++ real_dst_reg = BPF_REG_AX; + + branch_target = prog; + /* Load old value */ + emit_ldx(&prog, BPF_SIZE(insn->code), +- BPF_REG_0, dst_reg, insn->off); ++ BPF_REG_0, real_dst_reg, insn->off); + /* + * Perform the (commutative) operation locally, + * put the result in the AUX_REG. +@@ -1369,7 +1372,8 @@ st: if (is_imm8(insn->off)) + add_2reg(0xC0, AUX_REG, real_src_reg)); + /* Attempt to swap in new value */ + err = emit_atomic(&prog, BPF_CMPXCHG, +- dst_reg, AUX_REG, insn->off, ++ real_dst_reg, AUX_REG, ++ insn->off, + BPF_SIZE(insn->code)); + if (WARN_ON(err)) + return err; +@@ -1383,11 +1387,10 @@ st: if (is_imm8(insn->off)) + /* Restore R0 after clobbering RAX */ + emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX); + break; +- + } + + err = emit_atomic(&prog, insn->imm, dst_reg, src_reg, +- insn->off, BPF_SIZE(insn->code)); ++ insn->off, BPF_SIZE(insn->code)); + if (err) + return err; + break; +-- +2.33.0 + diff --git a/queue-5.14/driver-core-fw_devlink-add-support-for-fwnode_flag_n.patch b/queue-5.14/driver-core-fw_devlink-add-support-for-fwnode_flag_n.patch new file mode 100644 index 00000000000..30f48befca9 --- /dev/null +++ b/queue-5.14/driver-core-fw_devlink-add-support-for-fwnode_flag_n.patch @@ -0,0 +1,90 @@ +From 605a9d2fb020f425018b82def64988659038f406 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Sep 2021 10:09:38 -0700 +Subject: driver core: fw_devlink: Add support for + FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD + +From: Saravana Kannan + +[ Upstream commit 5501765a02a6c324f78581e6bb8209d054fe13ae ] + +If a parent device is also a supplier to a child device, fw_devlink=on by +design delays the probe() of the child device until the probe() of the +parent finishes successfully. + +However, some drivers of such parent devices (where parent is also a +supplier) expect the child device to finish probing successfully as soon as +they are added using device_add() and before the probe() of the parent +device has completed successfully. One example of such a case is discussed +in the link mentioned below. + +Add a flag to make fw_devlink=on not enforce these supplier-consumer +relationships, so these drivers can continue working. + +Link: https://lore.kernel.org/netdev/CAGETcx_uj0V4DChME-gy5HGKTYnxLBX=TH2rag29f_p=UcG+Tg@mail.gmail.com/ +Fixes: ea718c699055 ("Revert "Revert "driver core: Set fw_devlink=on by default""") +Signed-off-by: Saravana Kannan +Link: https://lore.kernel.org/r/20210915170940.617415-3-saravanak@google.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/base/core.c | 19 +++++++++++++++++++ + include/linux/fwnode.h | 11 ++++++++--- + 2 files changed, 27 insertions(+), 3 deletions(-) + +diff --git a/drivers/base/core.c b/drivers/base/core.c +index 8c77e14987d4..3d2fc70b9951 100644 +--- a/drivers/base/core.c ++++ b/drivers/base/core.c +@@ -1721,6 +1721,25 @@ static int fw_devlink_create_devlink(struct device *con, + struct device *sup_dev; + int ret = 0; + ++ /* ++ * In some cases, a device P might also be a supplier to its child node ++ * C. However, this would defer the probe of C until the probe of P ++ * completes successfully. This is perfectly fine in the device driver ++ * model. device_add() doesn't guarantee probe completion of the device ++ * by the time it returns. ++ * ++ * However, there are a few drivers that assume C will finish probing ++ * as soon as it's added and before P finishes probing. So, we provide ++ * a flag to let fw_devlink know not to delay the probe of C until the ++ * probe of P completes successfully. ++ * ++ * When such a flag is set, we can't create device links where P is the ++ * supplier of C as that would delay the probe of C. ++ */ ++ if (sup_handle->flags & FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD && ++ fwnode_is_ancestor_of(sup_handle, con->fwnode)) ++ return -EINVAL; ++ + sup_dev = get_dev_from_fwnode(sup_handle); + if (sup_dev) { + /* +diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h +index 59828516ebaf..9f4ad719bfe3 100644 +--- a/include/linux/fwnode.h ++++ b/include/linux/fwnode.h +@@ -22,10 +22,15 @@ struct device; + * LINKS_ADDED: The fwnode has already be parsed to add fwnode links. + * NOT_DEVICE: The fwnode will never be populated as a struct device. + * INITIALIZED: The hardware corresponding to fwnode has been initialized. ++ * NEEDS_CHILD_BOUND_ON_ADD: For this fwnode/device to probe successfully, its ++ * driver needs its child devices to be bound with ++ * their respective drivers as soon as they are ++ * added. + */ +-#define FWNODE_FLAG_LINKS_ADDED BIT(0) +-#define FWNODE_FLAG_NOT_DEVICE BIT(1) +-#define FWNODE_FLAG_INITIALIZED BIT(2) ++#define FWNODE_FLAG_LINKS_ADDED BIT(0) ++#define FWNODE_FLAG_NOT_DEVICE BIT(1) ++#define FWNODE_FLAG_INITIALIZED BIT(2) ++#define FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD BIT(3) + + struct fwnode_handle { + struct fwnode_handle *secondary; +-- +2.33.0 + diff --git a/queue-5.14/drm-i915-gvt-fix-the-usage-of-ww-lock-in-gvt-schedul.patch b/queue-5.14/drm-i915-gvt-fix-the-usage-of-ww-lock-in-gvt-schedul.patch new file mode 100644 index 00000000000..c255c0f6664 --- /dev/null +++ b/queue-5.14/drm-i915-gvt-fix-the-usage-of-ww-lock-in-gvt-schedul.patch @@ -0,0 +1,51 @@ +From a8606a48630f274b304124633fa27658db45c93b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 26 Aug 2021 14:38:34 +0000 +Subject: drm/i915/gvt: fix the usage of ww lock in gvt scheduler. + +From: Zhi A Wang + +[ Upstream commit d168cd797982db9db617113644c87b8f5f3cf27e ] + +As the APIs related to ww lock in i915 was changed recently, the usage of +ww lock in GVT-g scheduler needs to be changed accrodingly. We noticed a +deadlock when GVT-g scheduler submits the workload to i915. After some +investigation, it seems the way of how to use ww lock APIs has been +changed. Releasing a ww now requires a explicit i915_gem_ww_ctx_fini(). + +Fixes: 67f1120381df ("drm/i915/gvt: Introduce per object locking in GVT scheduler.") +Cc: Zhenyu Wang +Signed-off-by: Zhi A Wang +Signed-off-by: Zhenyu Wang +Link: http://patchwork.freedesktop.org/patch/msgid/20210826143834.25410-1-zhi.a.wang@intel.com +Acked-by: Zhenyu Wang +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/gvt/scheduler.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c +index 734c37c5e347..527b59b86312 100644 +--- a/drivers/gpu/drm/i915/gvt/scheduler.c ++++ b/drivers/gpu/drm/i915/gvt/scheduler.c +@@ -576,7 +576,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) + + /* No one is going to touch shadow bb from now on. */ + i915_gem_object_flush_map(bb->obj); +- i915_gem_object_unlock(bb->obj); ++ i915_gem_ww_ctx_fini(&ww); + } + } + return 0; +@@ -630,7 +630,7 @@ static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx) + return ret; + } + +- i915_gem_object_unlock(wa_ctx->indirect_ctx.obj); ++ i915_gem_ww_ctx_fini(&ww); + + /* FIXME: we are not tracking our pinned VMA leaving it + * up to the core to fix up the stray pin_count upon +-- +2.33.0 + diff --git a/queue-5.14/drm-i915-remove-warning-from-the-rps-worker.patch b/queue-5.14/drm-i915-remove-warning-from-the-rps-worker.patch new file mode 100644 index 00000000000..47ca2af70b0 --- /dev/null +++ b/queue-5.14/drm-i915-remove-warning-from-the-rps-worker.patch @@ -0,0 +1,44 @@ +From 0e1cf8fd378d7181c0b2cb07b934bdcfeed4cb65 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Sep 2021 14:34:12 +0530 +Subject: drm/i915: Remove warning from the rps worker + +From: Tejas Upadhyay + +[ Upstream commit 4b8bcaf8a6d6ab5db51e30865def5cb694eb2966 ] + +In commit 4e5c8a99e1cb ("drm/i915: Drop i915_request.lock requirement +for intel_rps_boost()"), we decoupled the rps worker from the pm so +that we could avoid the synchronization penalty which makes the +assertion liable to run too early. Which makes warning invalid hence +removed. + +Fixes: 4e5c8a99e1cb ("drm/i915: Drop i915_request.lock requirement for intel_rps_boost()") + +Reviewed-by: Chris Wilson +Signed-off-by: Tejas Upadhyay +Signed-off-by: Matt Roper +Link: https://patchwork.freedesktop.org/patch/msgid/20210914090412.1393498-1-tejaskumarx.surendrakumar.upadhyay@intel.com +(cherry picked from commit a837a0686308d95ad9c48d32b4dfe86a17dc98c2) +Signed-off-by: Jani Nikula +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/gt/intel_rps.c | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c +index 06e9a8ed4e03..db9c212a240e 100644 +--- a/drivers/gpu/drm/i915/gt/intel_rps.c ++++ b/drivers/gpu/drm/i915/gt/intel_rps.c +@@ -861,8 +861,6 @@ void intel_rps_park(struct intel_rps *rps) + { + int adj; + +- GEM_BUG_ON(atomic_read(&rps->num_waiters)); +- + if (!intel_rps_clear_active(rps)) + return; + +-- +2.33.0 + diff --git a/queue-5.14/drm-i915-request-fix-early-tracepoints.patch b/queue-5.14/drm-i915-request-fix-early-tracepoints.patch new file mode 100644 index 00000000000..ed0d5137833 --- /dev/null +++ b/queue-5.14/drm-i915-request-fix-early-tracepoints.patch @@ -0,0 +1,122 @@ +From f3f306a0b886de922602e95e39f7412841c40082 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 21 Sep 2021 14:42:02 +0100 +Subject: drm/i915/request: fix early tracepoints + +From: Matthew Auld + +[ Upstream commit c83ff0186401169eb27ce5057d820b7a863455c3 ] + +Currently we blow up in trace_dma_fence_init, when calling into +get_driver_name or get_timeline_name, since both the engine and context +might be NULL(or contain some garbage address) in the case of newly +allocated slab objects via the request ctor. Note that we also use +SLAB_TYPESAFE_BY_RCU here, which allows requests to be immediately +freed, but delay freeing the underlying page by an RCU grace period. +With this scheme requests can be re-allocated, at the same time as they +are also being read by some lockless RCU lookup mechanism. + +In the ctor case, which is only called for new slab objects(i.e allocate +new page and call the ctor for each object) it's safe to reset the +context/engine prior to calling into dma_fence_init, since we can be +certain that no one is doing an RCU lookup which might depend on peeking +at the engine/context, like in active_engine(), since the object can't +yet be externally visible. + +In the recycled case(which might also be externally visible) the request +refcount always transitions from 0->1 after we set the context/engine +etc, which should ensure it's valid to dereference the engine for +example, when doing an RCU list-walk, so long as we can also increment +the refcount first. If the refcount is already zero, then the request is +considered complete/released. If it's non-zero, then the request might +be in the process of being re-allocated, or potentially still in flight, +however after successfully incrementing the refcount, it's possible to +carefully inspect the request state, to determine if the request is +still what we were looking for. Note that all externally visible +requests returned to the cache must have zero refcount. + +One possible fix then is to move dma_fence_init out from the request +ctor. Originally this was how it was done, but it was moved in: + +commit 855e39e65cfc33a73724f1cc644ffc5754864a20 +Author: Chris Wilson +Date: Mon Feb 3 09:41:48 2020 +0000 + + drm/i915: Initialise basic fence before acquiring seqno + +where it looks like intel_timeline_get_seqno() relied on some of the +rq->fence state, but that is no longer the case since: + +commit 12ca695d2c1ed26b2dcbb528b42813bd0f216cfc +Author: Maarten Lankhorst +Date: Tue Mar 23 16:49:50 2021 +0100 + + drm/i915: Do not share hwsp across contexts any more, v8. + +intel_timeline_get_seqno() could also be cleaned up slightly by dropping +the request argument. + +Moving dma_fence_init back out of the ctor, should ensure we have enough +of the request initialised in case of trace_dma_fence_init. +Functionally this should be the same, and is effectively what we were +already open coding before, except now we also assign the fence->lock +and fence->ops, but since these are invariant for recycled +requests(which might be externally visible), and will therefore already +hold the same value, it shouldn't matter. + +An alternative fix, since we don't yet have a fully initialised request +when in the ctor, is just setting the context/engine as NULL, but this +does require adding some extra handling in get_driver_name etc. + +v2(Daniel): + - Try to make the commit message less confusing + +Fixes: 855e39e65cfc ("drm/i915: Initialise basic fence before acquiring seqno") +Signed-off-by: Matthew Auld +Cc: Michael Mason +Cc: Daniel Vetter +Reviewed-by: Daniel Vetter +Link: https://patchwork.freedesktop.org/patch/msgid/20210921134202.3803151-1-matthew.auld@intel.com +(cherry picked from commit be988eaee1cb208c4445db46bc3ceaf75f586f0b) +Signed-off-by: Jani Nikula +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/i915_request.c | 11 ++--------- + 1 file changed, 2 insertions(+), 9 deletions(-) + +diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c +index 37aef1308573..7db972fa7024 100644 +--- a/drivers/gpu/drm/i915/i915_request.c ++++ b/drivers/gpu/drm/i915/i915_request.c +@@ -914,8 +914,6 @@ static void __i915_request_ctor(void *arg) + i915_sw_fence_init(&rq->submit, submit_notify); + i915_sw_fence_init(&rq->semaphore, semaphore_notify); + +- dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0); +- + rq->capture_list = NULL; + + init_llist_head(&rq->execute_cb); +@@ -978,17 +976,12 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) + rq->ring = ce->ring; + rq->execution_mask = ce->engine->mask; + +- kref_init(&rq->fence.refcount); +- rq->fence.flags = 0; +- rq->fence.error = 0; +- INIT_LIST_HEAD(&rq->fence.cb_list); +- + ret = intel_timeline_get_seqno(tl, rq, &seqno); + if (ret) + goto err_free; + +- rq->fence.context = tl->fence_context; +- rq->fence.seqno = seqno; ++ dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, ++ tl->fence_context, seqno); + + RCU_INIT_POINTER(rq->timeline, tl); + rq->hwsp_seqno = tl->hwsp_seqno; +-- +2.33.0 + diff --git a/queue-5.14/dsa-mv88e6xxx-6161-use-chip-wide-max-mtu.patch b/queue-5.14/dsa-mv88e6xxx-6161-use-chip-wide-max-mtu.patch new file mode 100644 index 00000000000..f1da718d934 --- /dev/null +++ b/queue-5.14/dsa-mv88e6xxx-6161-use-chip-wide-max-mtu.patch @@ -0,0 +1,49 @@ +From 72f264d10e0be7539f1cb79900d7bee50eadca28 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 26 Sep 2021 19:41:24 +0200 +Subject: dsa: mv88e6xxx: 6161: Use chip wide MAX MTU +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Andrew Lunn + +[ Upstream commit fe23036192c95b66e60d019d2ec1814d0d561ffd ] + +The datasheets suggests the 6161 uses a per port setting for jumbo +frames. Testing has however shown this is not correct, it uses the old +style chip wide MTU control. Change the ops in the 6161 structure to +reflect this. + +Fixes: 1baf0fac10fb ("net: dsa: mv88e6xxx: Use chip-wide max frame size for MTU") +Reported by: 曹煜 +Signed-off-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mv88e6xxx/chip.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c +index 1c122a1f2f97..f99f09c50722 100644 +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -3657,7 +3657,6 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { + .port_set_ucast_flood = mv88e6352_port_set_ucast_flood, + .port_set_mcast_flood = mv88e6352_port_set_mcast_flood, + .port_set_ether_type = mv88e6351_port_set_ether_type, +- .port_set_jumbo_size = mv88e6165_port_set_jumbo_size, + .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, + .port_pause_limit = mv88e6097_port_pause_limit, + .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, +@@ -3682,6 +3681,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { + .avb_ops = &mv88e6165_avb_ops, + .ptp_ops = &mv88e6165_ptp_ops, + .phylink_validate = mv88e6185_phylink_validate, ++ .set_max_frame_size = mv88e6185_g1_set_max_frame_size, + }; + + static const struct mv88e6xxx_ops mv88e6165_ops = { +-- +2.33.0 + diff --git a/queue-5.14/dsa-mv88e6xxx-fix-mtu-definition.patch b/queue-5.14/dsa-mv88e6xxx-fix-mtu-definition.patch new file mode 100644 index 00000000000..806c240d6aa --- /dev/null +++ b/queue-5.14/dsa-mv88e6xxx-fix-mtu-definition.patch @@ -0,0 +1,96 @@ +From 53ac726c27bf657797864b37a2177fd895e15ecf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 26 Sep 2021 19:41:25 +0200 +Subject: dsa: mv88e6xxx: Fix MTU definition +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Andrew Lunn + +[ Upstream commit b92ce2f54c0f0ff781e914ec189c25f7bf1b1ec2 ] + +The MTU passed to the DSA driver is the payload size, typically 1500. +However, the switch uses the frame size when applying restrictions. +Adjust the MTU with the size of the Ethernet header and the frame +checksum. The VLAN header also needs to be included when the frame +size it per port, but not when it is global. + +Fixes: 1baf0fac10fb ("net: dsa: mv88e6xxx: Use chip-wide max frame size for MTU") +Reported by: 曹煜 +Signed-off-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mv88e6xxx/chip.c | 12 ++++++------ + drivers/net/dsa/mv88e6xxx/global1.c | 2 ++ + drivers/net/dsa/mv88e6xxx/port.c | 2 ++ + 3 files changed, 10 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c +index f99f09c50722..014950a343f4 100644 +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -2775,8 +2775,8 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) + if (err) + return err; + +- /* Port Control 2: don't force a good FCS, set the maximum frame size to +- * 10240 bytes, disable 802.1q tags checking, don't discard tagged or ++ /* Port Control 2: don't force a good FCS, set the MTU size to ++ * 10222 bytes, disable 802.1q tags checking, don't discard tagged or + * untagged frames on this port, do a destination address lookup on all + * received packets as usual, disable ARP mirroring and don't send a + * copy of all transmitted/received frames on this port to the CPU. +@@ -2795,7 +2795,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) + return err; + + if (chip->info->ops->port_set_jumbo_size) { +- err = chip->info->ops->port_set_jumbo_size(chip, port, 10240); ++ err = chip->info->ops->port_set_jumbo_size(chip, port, 10218); + if (err) + return err; + } +@@ -2885,10 +2885,10 @@ static int mv88e6xxx_get_max_mtu(struct dsa_switch *ds, int port) + struct mv88e6xxx_chip *chip = ds->priv; + + if (chip->info->ops->port_set_jumbo_size) +- return 10240; ++ return 10240 - VLAN_ETH_HLEN - ETH_FCS_LEN; + else if (chip->info->ops->set_max_frame_size) +- return 1632; +- return 1522; ++ return 1632 - VLAN_ETH_HLEN - ETH_FCS_LEN; ++ return 1522 - VLAN_ETH_HLEN - ETH_FCS_LEN; + } + + static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu) +diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c +index 815b0f681d69..5848112036b0 100644 +--- a/drivers/net/dsa/mv88e6xxx/global1.c ++++ b/drivers/net/dsa/mv88e6xxx/global1.c +@@ -232,6 +232,8 @@ int mv88e6185_g1_set_max_frame_size(struct mv88e6xxx_chip *chip, int mtu) + u16 val; + int err; + ++ mtu += ETH_HLEN + ETH_FCS_LEN; ++ + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &val); + if (err) + return err; +diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c +index f77e2ee64a60..451028c57af8 100644 +--- a/drivers/net/dsa/mv88e6xxx/port.c ++++ b/drivers/net/dsa/mv88e6xxx/port.c +@@ -1277,6 +1277,8 @@ int mv88e6165_port_set_jumbo_size(struct mv88e6xxx_chip *chip, int port, + u16 reg; + int err; + ++ size += VLAN_ETH_HLEN + ETH_FCS_LEN; ++ + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL2, ®); + if (err) + return err; +-- +2.33.0 + diff --git a/queue-5.14/dsa-mv88e6xxx-include-tagger-overhead-when-setting-m.patch b/queue-5.14/dsa-mv88e6xxx-include-tagger-overhead-when-setting-m.patch new file mode 100644 index 00000000000..f08c6b21ec7 --- /dev/null +++ b/queue-5.14/dsa-mv88e6xxx-include-tagger-overhead-when-setting-m.patch @@ -0,0 +1,72 @@ +From 868a9e26e447de5849e3b3b59872c6051ac4ead9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 26 Sep 2021 19:41:26 +0200 +Subject: dsa: mv88e6xxx: Include tagger overhead when setting MTU for DSA and + CPU ports +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Andrew Lunn + +[ Upstream commit b9c587fed61cf88bd45822c3159644445f6d5aa6 ] + +Same members of the Marvell Ethernet switches impose MTU restrictions +on ports used for connecting to the CPU or another switch for DSA. If +the MTU is set too low, tagged frames will be discarded. Ensure the +worst case tagger overhead is included in setting the MTU for DSA and +CPU ports. + +Fixes: 1baf0fac10fb ("net: dsa: mv88e6xxx: Use chip-wide max frame size for MTU") +Reported by: 曹煜 +Signed-off-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/mv88e6xxx/chip.c | 9 ++++++--- + drivers/net/dsa/mv88e6xxx/chip.h | 1 + + 2 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c +index 014950a343f4..66b4f4a9832a 100644 +--- a/drivers/net/dsa/mv88e6xxx/chip.c ++++ b/drivers/net/dsa/mv88e6xxx/chip.c +@@ -2885,10 +2885,10 @@ static int mv88e6xxx_get_max_mtu(struct dsa_switch *ds, int port) + struct mv88e6xxx_chip *chip = ds->priv; + + if (chip->info->ops->port_set_jumbo_size) +- return 10240 - VLAN_ETH_HLEN - ETH_FCS_LEN; ++ return 10240 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN; + else if (chip->info->ops->set_max_frame_size) +- return 1632 - VLAN_ETH_HLEN - ETH_FCS_LEN; +- return 1522 - VLAN_ETH_HLEN - ETH_FCS_LEN; ++ return 1632 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN; ++ return 1522 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN; + } + + static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu) +@@ -2896,6 +2896,9 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu) + struct mv88e6xxx_chip *chip = ds->priv; + int ret = 0; + ++ if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) ++ new_mtu += EDSA_HLEN; ++ + mv88e6xxx_reg_lock(chip); + if (chip->info->ops->port_set_jumbo_size) + ret = chip->info->ops->port_set_jumbo_size(chip, port, new_mtu); +diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h +index 675b1f3e43b7..59f316cc8583 100644 +--- a/drivers/net/dsa/mv88e6xxx/chip.h ++++ b/drivers/net/dsa/mv88e6xxx/chip.h +@@ -18,6 +18,7 @@ + #include + #include + ++#define EDSA_HLEN 8 + #define MV88E6XXX_N_FID 4096 + + /* PVT limits for 4-bit port and 5-bit switch */ +-- +2.33.0 + diff --git a/queue-5.14/e100-fix-buffer-overrun-in-e100_get_regs.patch b/queue-5.14/e100-fix-buffer-overrun-in-e100_get_regs.patch new file mode 100644 index 00000000000..9d7d2458917 --- /dev/null +++ b/queue-5.14/e100-fix-buffer-overrun-in-e100_get_regs.patch @@ -0,0 +1,107 @@ +From 4672fdb7db2ffe83b6fa8ae7e24fc6874095315c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 Sep 2021 10:52:37 -0700 +Subject: e100: fix buffer overrun in e100_get_regs + +From: Jacob Keller + +[ Upstream commit 51032e6f17ce990d06123ad7307f258c50d25aa7 ] + +The e100_get_regs function is used to implement a simple register dump +for the e100 device. The data is broken into a couple of MAC control +registers, and then a series of PHY registers, followed by a memory dump +buffer. + +The total length of the register dump is defined as (1 + E100_PHY_REGS) +* sizeof(u32) + sizeof(nic->mem->dump_buf). + +The logic for filling in the PHY registers uses a convoluted inverted +count for loop which counts from E100_PHY_REGS (0x1C) down to 0, and +assigns the slots 1 + E100_PHY_REGS - i. The first loop iteration will +fill in [1] and the final loop iteration will fill in [1 + 0x1C]. This +is actually one more than the supposed number of PHY registers. + +The memory dump buffer is then filled into the space at +[2 + E100_PHY_REGS] which will cause that memcpy to assign 4 bytes past +the total size. + +The end result is that we overrun the total buffer size allocated by the +kernel, which could lead to a panic or other issues due to memory +corruption. + +It is difficult to determine the actual total number of registers +here. The only 8255x datasheet I could find indicates there are 28 total +MDI registers. However, we're reading 29 here, and reading them in +reverse! + +In addition, the ethtool e100 register dump interface appears to read +the first PHY register to determine if the device is in MDI or MDIx +mode. This doesn't appear to be documented anywhere within the 8255x +datasheet. I can only assume it must be in register 28 (the extra +register we're reading here). + +Lets not change any of the intended meaning of what we copy here. Just +extend the space by 4 bytes to account for the extra register and +continue copying the data out in the same order. + +Change the E100_PHY_REGS value to be the correct total (29) so that the +total register dump size is calculated properly. Fix the offset for +where we copy the dump buffer so that it doesn't overrun the total size. + +Re-write the for loop to use counting up instead of the convoluted +down-counting. Correct the mdio_read offset to use the 0-based register +offsets, but maintain the bizarre reverse ordering so that we have the +ABI expected by applications like ethtool. This requires and additional +subtraction of 1. It seems a bit odd but it makes the flow of assignment +into the register buffer easier to follow. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: Felicitas Hetzelt +Signed-off-by: Jacob Keller +Tested-by: Jacob Keller +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/e100.c | 16 ++++++++++------ + 1 file changed, 10 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c +index 71b9f0563b32..1fa68ebe9432 100644 +--- a/drivers/net/ethernet/intel/e100.c ++++ b/drivers/net/ethernet/intel/e100.c +@@ -2437,7 +2437,7 @@ static void e100_get_drvinfo(struct net_device *netdev, + sizeof(info->bus_info)); + } + +-#define E100_PHY_REGS 0x1C ++#define E100_PHY_REGS 0x1D + static int e100_get_regs_len(struct net_device *netdev) + { + struct nic *nic = netdev_priv(netdev); +@@ -2459,14 +2459,18 @@ static void e100_get_regs(struct net_device *netdev, + buff[0] = ioread8(&nic->csr->scb.cmd_hi) << 24 | + ioread8(&nic->csr->scb.cmd_lo) << 16 | + ioread16(&nic->csr->scb.status); +- for (i = E100_PHY_REGS; i >= 0; i--) +- buff[1 + E100_PHY_REGS - i] = +- mdio_read(netdev, nic->mii.phy_id, i); ++ for (i = 0; i < E100_PHY_REGS; i++) ++ /* Note that we read the registers in reverse order. This ++ * ordering is the ABI apparently used by ethtool and other ++ * applications. ++ */ ++ buff[1 + i] = mdio_read(netdev, nic->mii.phy_id, ++ E100_PHY_REGS - 1 - i); + memset(nic->mem->dump_buf, 0, sizeof(nic->mem->dump_buf)); + e100_exec_cb(nic, NULL, e100_dump); + msleep(10); +- memcpy(&buff[2 + E100_PHY_REGS], nic->mem->dump_buf, +- sizeof(nic->mem->dump_buf)); ++ memcpy(&buff[1 + E100_PHY_REGS], nic->mem->dump_buf, ++ sizeof(nic->mem->dump_buf)); + } + + static void e100_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +-- +2.33.0 + diff --git a/queue-5.14/e100-fix-length-calculation-in-e100_get_regs_len.patch b/queue-5.14/e100-fix-length-calculation-in-e100_get_regs_len.patch new file mode 100644 index 00000000000..b234c4dc104 --- /dev/null +++ b/queue-5.14/e100-fix-length-calculation-in-e100_get_regs_len.patch @@ -0,0 +1,50 @@ +From 7597a16bdad4c9ace8e753903a973948a0ffdf6a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 Sep 2021 10:52:36 -0700 +Subject: e100: fix length calculation in e100_get_regs_len + +From: Jacob Keller + +[ Upstream commit 4329c8dc110b25d5f04ed20c6821bb60deff279f ] + +commit abf9b902059f ("e100: cleanup unneeded math") tried to simplify +e100_get_regs_len and remove a double 'divide and then multiply' +calculation that the e100_reg_regs_len function did. + +This change broke the size calculation entirely as it failed to account +for the fact that the numbered registers are actually 4 bytes wide and +not 1 byte. This resulted in a significant under allocation of the +register buffer used by e100_get_regs. + +Fix this by properly multiplying the register count by u32 first before +adding the size of the dump buffer. + +Fixes: abf9b902059f ("e100: cleanup unneeded math") +Reported-by: Felicitas Hetzelt +Signed-off-by: Jacob Keller +Signed-off-by: Tony Nguyen +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/e100.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c +index 1b0958bd24f6..71b9f0563b32 100644 +--- a/drivers/net/ethernet/intel/e100.c ++++ b/drivers/net/ethernet/intel/e100.c +@@ -2441,7 +2441,11 @@ static void e100_get_drvinfo(struct net_device *netdev, + static int e100_get_regs_len(struct net_device *netdev) + { + struct nic *nic = netdev_priv(netdev); +- return 1 + E100_PHY_REGS + sizeof(nic->mem->dump_buf); ++ ++ /* We know the number of registers, and the size of the dump buffer. ++ * Calculate the total size in bytes. ++ */ ++ return (1 + E100_PHY_REGS) * sizeof(u32) + sizeof(nic->mem->dump_buf); + } + + static void e100_get_regs(struct net_device *netdev, +-- +2.33.0 + diff --git a/queue-5.14/hwmon-mlxreg-fan-return-non-zero-value-when-fan-curr.patch b/queue-5.14/hwmon-mlxreg-fan-return-non-zero-value-when-fan-curr.patch new file mode 100644 index 00000000000..903cf111d46 --- /dev/null +++ b/queue-5.14/hwmon-mlxreg-fan-return-non-zero-value-when-fan-curr.patch @@ -0,0 +1,128 @@ +From 528ff3c8c56953eebd1df491ef6b882ec6594109 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 16 Sep 2021 21:31:51 +0300 +Subject: hwmon: (mlxreg-fan) Return non-zero value when fan current state is + enforced from sysfs + +From: Vadim Pasternak + +[ Upstream commit e6fab7af6ba1bc77c78713a83876f60ca7a4a064 ] + +Fan speed minimum can be enforced from sysfs. For example, setting +current fan speed to 20 is used to enforce fan speed to be at 100% +speed, 19 - to be not below 90% speed, etcetera. This feature provides +ability to limit fan speed according to some system wise +considerations, like absence of some replaceable units or high system +ambient temperature. + +Request for changing fan minimum speed is configuration request and can +be set only through 'sysfs' write procedure. In this situation value of +argument 'state' is above nominal fan speed maximum. + +Return non-zero code in this case to avoid +thermal_cooling_device_stats_update() call, because in this case +statistics update violates thermal statistics table range. +The issues is observed in case kernel is configured with option +CONFIG_THERMAL_STATISTICS. + +Here is the trace from KASAN: +[ 159.506659] BUG: KASAN: slab-out-of-bounds in thermal_cooling_device_stats_update+0x7d/0xb0 +[ 159.516016] Read of size 4 at addr ffff888116163840 by task hw-management.s/7444 +[ 159.545625] Call Trace: +[ 159.548366] dump_stack+0x92/0xc1 +[ 159.552084] ? thermal_cooling_device_stats_update+0x7d/0xb0 +[ 159.635869] thermal_zone_device_update+0x345/0x780 +[ 159.688711] thermal_zone_device_set_mode+0x7d/0xc0 +[ 159.694174] mlxsw_thermal_modules_init+0x48f/0x590 [mlxsw_core] +[ 159.700972] ? mlxsw_thermal_set_cur_state+0x5a0/0x5a0 [mlxsw_core] +[ 159.731827] mlxsw_thermal_init+0x763/0x880 [mlxsw_core] +[ 160.070233] RIP: 0033:0x7fd995909970 +[ 160.074239] Code: 73 01 c3 48 8b 0d 28 d5 2b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 99 2d 2c 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff .. +[ 160.095242] RSP: 002b:00007fff54f5d938 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 +[ 160.103722] RAX: ffffffffffffffda RBX: 0000000000000013 RCX: 00007fd995909970 +[ 160.111710] RDX: 0000000000000013 RSI: 0000000001906008 RDI: 0000000000000001 +[ 160.119699] RBP: 0000000001906008 R08: 00007fd995bc9760 R09: 00007fd996210700 +[ 160.127687] R10: 0000000000000073 R11: 0000000000000246 R12: 0000000000000013 +[ 160.135673] R13: 0000000000000001 R14: 00007fd995bc8600 R15: 0000000000000013 +[ 160.143671] +[ 160.145338] Allocated by task 2924: +[ 160.149242] kasan_save_stack+0x19/0x40 +[ 160.153541] __kasan_kmalloc+0x7f/0xa0 +[ 160.157743] __kmalloc+0x1a2/0x2b0 +[ 160.161552] thermal_cooling_device_setup_sysfs+0xf9/0x1a0 +[ 160.167687] __thermal_cooling_device_register+0x1b5/0x500 +[ 160.173833] devm_thermal_of_cooling_device_register+0x60/0xa0 +[ 160.180356] mlxreg_fan_probe+0x474/0x5e0 [mlxreg_fan] +[ 160.248140] +[ 160.249807] The buggy address belongs to the object at ffff888116163400 +[ 160.249807] which belongs to the cache kmalloc-1k of size 1024 +[ 160.263814] The buggy address is located 64 bytes to the right of +[ 160.263814] 1024-byte region [ffff888116163400, ffff888116163800) +[ 160.277536] The buggy address belongs to the page: +[ 160.282898] page:0000000012275840 refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff888116167000 pfn:0x116160 +[ 160.294872] head:0000000012275840 order:3 compound_mapcount:0 compound_pincount:0 +[ 160.303251] flags: 0x200000000010200(slab|head|node=0|zone=2) +[ 160.309694] raw: 0200000000010200 ffffea00046f7208 ffffea0004928208 ffff88810004dbc0 +[ 160.318367] raw: ffff888116167000 00000000000a0006 00000001ffffffff 0000000000000000 +[ 160.327033] page dumped because: kasan: bad access detected +[ 160.333270] +[ 160.334937] Memory state around the buggy address: +[ 160.356469] >ffff888116163800: fc .. + +Fixes: 65afb4c8e7e4 ("hwmon: (mlxreg-fan) Add support for Mellanox FAN driver") +Signed-off-by: Vadim Pasternak +Link: https://lore.kernel.org/r/20210916183151.869427-1-vadimp@nvidia.com +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/mlxreg-fan.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/drivers/hwmon/mlxreg-fan.c b/drivers/hwmon/mlxreg-fan.c +index 116681fde33d..89fe7b9fe26b 100644 +--- a/drivers/hwmon/mlxreg-fan.c ++++ b/drivers/hwmon/mlxreg-fan.c +@@ -315,8 +315,8 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, + { + struct mlxreg_fan *fan = cdev->devdata; + unsigned long cur_state; ++ int i, config = 0; + u32 regval; +- int i; + int err; + + /* +@@ -329,6 +329,12 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, + * overwritten. + */ + if (state >= MLXREG_FAN_SPEED_MIN && state <= MLXREG_FAN_SPEED_MAX) { ++ /* ++ * This is configuration change, which is only supported through sysfs. ++ * For configuration non-zero value is to be returned to avoid thermal ++ * statistics update. ++ */ ++ config = 1; + state -= MLXREG_FAN_MAX_STATE; + for (i = 0; i < state; i++) + fan->cooling_levels[i] = state; +@@ -343,7 +349,7 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, + + cur_state = MLXREG_FAN_PWM_DUTY2STATE(regval); + if (state < cur_state) +- return 0; ++ return config; + + state = cur_state; + } +@@ -359,7 +365,7 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, + dev_err(fan->dev, "Failed to write PWM duty\n"); + return err; + } +- return 0; ++ return config; + } + + static const struct thermal_cooling_device_ops mlxreg_fan_cooling_ops = { +-- +2.33.0 + diff --git a/queue-5.14/hwmon-occ-fix-p10-vrm-temp-sensors.patch b/queue-5.14/hwmon-occ-fix-p10-vrm-temp-sensors.patch new file mode 100644 index 00000000000..80be6573a73 --- /dev/null +++ b/queue-5.14/hwmon-occ-fix-p10-vrm-temp-sensors.patch @@ -0,0 +1,62 @@ +From 77aad0c4b35f1f8fa15cd2583254889c74d578e3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Sep 2021 10:36:04 -0500 +Subject: hwmon: (occ) Fix P10 VRM temp sensors + +From: Eddie James + +[ Upstream commit ffa2600044979aff4bd6238edb9af815a47d7c32 ] + +The P10 (temp sensor version 0x10) doesn't do the same VRM status +reporting that was used on P9. It just reports the temperature, so +drop the check for VRM fru type in the sysfs show function, and don't +set the name to "alarm". + +Fixes: db4919ec86 ("hwmon: (occ) Add new temperature sensor type") +Signed-off-by: Eddie James +Link: https://lore.kernel.org/r/20210929153604.14968-1-eajames@linux.ibm.com +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/occ/common.c | 17 +++++------------ + 1 file changed, 5 insertions(+), 12 deletions(-) + +diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c +index 0d68a78be980..ae664613289c 100644 +--- a/drivers/hwmon/occ/common.c ++++ b/drivers/hwmon/occ/common.c +@@ -340,18 +340,11 @@ static ssize_t occ_show_temp_10(struct device *dev, + if (val == OCC_TEMP_SENSOR_FAULT) + return -EREMOTEIO; + +- /* +- * VRM doesn't return temperature, only alarm bit. This +- * attribute maps to tempX_alarm instead of tempX_input for +- * VRM +- */ +- if (temp->fru_type != OCC_FRU_TYPE_VRM) { +- /* sensor not ready */ +- if (val == 0) +- return -EAGAIN; ++ /* sensor not ready */ ++ if (val == 0) ++ return -EAGAIN; + +- val *= 1000; +- } ++ val *= 1000; + break; + case 2: + val = temp->fru_type; +@@ -886,7 +879,7 @@ static int occ_setup_sensor_attrs(struct occ *occ) + 0, i); + attr++; + +- if (sensors->temp.version > 1 && ++ if (sensors->temp.version == 2 && + temp->fru_type == OCC_FRU_TYPE_VRM) { + snprintf(attr->name, sizeof(attr->name), + "temp%d_alarm", s); +-- +2.33.0 + diff --git a/queue-5.14/hwmon-pmbus-mp2975-add-missed-pout-attribute-for-pag.patch b/queue-5.14/hwmon-pmbus-mp2975-add-missed-pout-attribute-for-pag.patch new file mode 100644 index 00000000000..be50a9f11cf --- /dev/null +++ b/queue-5.14/hwmon-pmbus-mp2975-add-missed-pout-attribute-for-pag.patch @@ -0,0 +1,38 @@ +From 039b68f70c28f70aa8ed7428836756721b3adba2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Sep 2021 10:07:40 +0300 +Subject: hwmon: (pmbus/mp2975) Add missed POUT attribute for page 1 mp2975 + controller + +From: Vadim Pasternak + +[ Upstream commit 2292e2f685cd5c65e3f47bbcf9f469513acc3195 ] + +Add missed attribute for reading POUT from page 1. +It is supported by device, but has been missed in initial commit. + +Fixes: 2c6fcbb21149 ("hwmon: (pmbus) Add support for MPS Multi-phase mp2975 controller") +Signed-off-by: Vadim Pasternak +Link: https://lore.kernel.org/r/20210927070740.2149290-1-vadimp@nvidia.com +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/pmbus/mp2975.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c +index eb94bd5f4e2a..51986adfbf47 100644 +--- a/drivers/hwmon/pmbus/mp2975.c ++++ b/drivers/hwmon/pmbus/mp2975.c +@@ -54,7 +54,7 @@ + + #define MP2975_RAIL2_FUNC (PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT | \ + PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT | \ +- PMBUS_PHASE_VIRTUAL) ++ PMBUS_HAVE_POUT | PMBUS_PHASE_VIRTUAL) + + struct mp2975_data { + struct pmbus_driver_info info; +-- +2.33.0 + diff --git a/queue-5.14/hwmon-tmp421-fix-rounding-for-negative-values.patch b/queue-5.14/hwmon-tmp421-fix-rounding-for-negative-values.patch new file mode 100644 index 00000000000..7b29054ee35 --- /dev/null +++ b/queue-5.14/hwmon-tmp421-fix-rounding-for-negative-values.patch @@ -0,0 +1,74 @@ +From a8e54cfdd848ed88f3402dbd8f7a21f89a5e9ea1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 Sep 2021 12:30:11 +0300 +Subject: hwmon: (tmp421) fix rounding for negative values + +From: Paul Fertser + +[ Upstream commit 724e8af85854c4d3401313b6dd7d79cf792d8990 ] + +Old code produces -24999 for 0b1110011100000000 input in standard format due to +always rounding up rather than "away from zero". + +Use the common macro for division, unify and simplify the conversion code along +the way. + +Fixes: 9410700b881f ("hwmon: Add driver for Texas Instruments TMP421/422/423 sensor chips") +Signed-off-by: Paul Fertser +Link: https://lore.kernel.org/r/20210924093011.26083-3-fercerpav@gmail.com +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/tmp421.c | 24 ++++++++---------------- + 1 file changed, 8 insertions(+), 16 deletions(-) + +diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c +index c9ef83627bb7..b963a369c5ab 100644 +--- a/drivers/hwmon/tmp421.c ++++ b/drivers/hwmon/tmp421.c +@@ -100,23 +100,17 @@ struct tmp421_data { + s16 temp[4]; + }; + +-static int temp_from_s16(s16 reg) ++static int temp_from_raw(u16 reg, bool extended) + { + /* Mask out status bits */ + int temp = reg & ~0xf; + +- return (temp * 1000 + 128) / 256; +-} +- +-static int temp_from_u16(u16 reg) +-{ +- /* Mask out status bits */ +- int temp = reg & ~0xf; +- +- /* Add offset for extended temperature range. */ +- temp -= 64 * 256; ++ if (extended) ++ temp = temp - 64 * 256; ++ else ++ temp = (s16)temp; + +- return (temp * 1000 + 128) / 256; ++ return DIV_ROUND_CLOSEST(temp * 1000, 256); + } + + static int tmp421_update_device(struct tmp421_data *data) +@@ -172,10 +166,8 @@ static int tmp421_read(struct device *dev, enum hwmon_sensor_types type, + + switch (attr) { + case hwmon_temp_input: +- if (tmp421->config & TMP421_CONFIG_RANGE) +- *val = temp_from_u16(tmp421->temp[channel]); +- else +- *val = temp_from_s16(tmp421->temp[channel]); ++ *val = temp_from_raw(tmp421->temp[channel], ++ tmp421->config & TMP421_CONFIG_RANGE); + return 0; + case hwmon_temp_fault: + /* +-- +2.33.0 + diff --git a/queue-5.14/hwmon-tmp421-report-pvld-condition-as-fault.patch b/queue-5.14/hwmon-tmp421-report-pvld-condition-as-fault.patch new file mode 100644 index 00000000000..bb3a1297ef6 --- /dev/null +++ b/queue-5.14/hwmon-tmp421-report-pvld-condition-as-fault.patch @@ -0,0 +1,54 @@ +From 75fc9814eeb72310eb14138174a9371aaa8c91be Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 Sep 2021 12:30:10 +0300 +Subject: hwmon: (tmp421) report /PVLD condition as fault + +From: Paul Fertser + +[ Upstream commit 540effa7f283d25bcc13c0940d808002fee340b8 ] + +For both local and remote sensors all the supported ICs can report an +"undervoltage lockout" condition which means the conversion wasn't +properly performed due to insufficient power supply voltage and so the +measurement results can't be trusted. + +Fixes: 9410700b881f ("hwmon: Add driver for Texas Instruments TMP421/422/423 sensor chips") +Signed-off-by: Paul Fertser +Link: https://lore.kernel.org/r/20210924093011.26083-2-fercerpav@gmail.com +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/tmp421.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c +index 8fd8c3a94dfe..c9ef83627bb7 100644 +--- a/drivers/hwmon/tmp421.c ++++ b/drivers/hwmon/tmp421.c +@@ -179,10 +179,10 @@ static int tmp421_read(struct device *dev, enum hwmon_sensor_types type, + return 0; + case hwmon_temp_fault: + /* +- * The OPEN bit signals a fault. This is bit 0 of the temperature +- * register (low byte). ++ * Any of OPEN or /PVLD bits indicate a hardware mulfunction ++ * and the conversion result may be incorrect + */ +- *val = tmp421->temp[channel] & 0x01; ++ *val = !!(tmp421->temp[channel] & 0x03); + return 0; + default: + return -EOPNOTSUPP; +@@ -195,9 +195,6 @@ static umode_t tmp421_is_visible(const void *data, enum hwmon_sensor_types type, + { + switch (attr) { + case hwmon_temp_fault: +- if (channel == 0) +- return 0; +- return 0444; + case hwmon_temp_input: + return 0444; + default: +-- +2.33.0 + diff --git a/queue-5.14/ib-cma-do-not-send-igmp-leaves-for-sendonly-multicas.patch b/queue-5.14/ib-cma-do-not-send-igmp-leaves-for-sendonly-multicas.patch new file mode 100644 index 00000000000..f9f9d3bedd6 --- /dev/null +++ b/queue-5.14/ib-cma-do-not-send-igmp-leaves-for-sendonly-multicas.patch @@ -0,0 +1,85 @@ +From 2e5ea813150c8289d40b0ab545867bdbaf519ffa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 Sep 2021 13:43:28 +0200 +Subject: IB/cma: Do not send IGMP leaves for sendonly Multicast groups + +From: Christoph Lameter + +[ Upstream commit 2cc74e1ee31d00393b6698ec80b322fd26523da4 ] + +ROCE uses IGMP for Multicast instead of the native Infiniband system where +joins are required in order to post messages on the Multicast group. On +Ethernet one can send Multicast messages to arbitrary addresses without +the need to subscribe to a group. + +So ROCE correctly does not send IGMP joins during rdma_join_multicast(). + +F.e. in cma_iboe_join_multicast() we see: + + if (addr->sa_family == AF_INET) { + if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) { + ib.rec.hop_limit = IPV6_DEFAULT_HOPLIMIT; + if (!send_only) { + err = cma_igmp_send(ndev, &ib.rec.mgid, + true); + } + } + } else { + +So the IGMP join is suppressed as it is unnecessary. + +However no such check is done in destroy_mc(). And therefore leaving a +sendonly multicast group will send an IGMP leave. + +This means that the following scenario can lead to a multicast receiver +unexpectedly being unsubscribed from a MC group: + +1. Sender thread does a sendonly join on MC group X. No IGMP join + is sent. + +2. Receiver thread does a regular join on the same MC Group x. + IGMP join is sent and the receiver begins to get messages. + +3. Sender thread terminates and destroys MC group X. + IGMP leave is sent and the receiver no longer receives data. + +This patch adds the same logic for sendonly joins to destroy_mc() that is +also used in cma_iboe_join_multicast(). + +Fixes: ab15c95a17b3 ("IB/core: Support for CMA multicast join flags") +Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2109081340540.668072@gentwo.de +Signed-off-by: Christoph Lameter +Signed-off-by: Jason Gunthorpe +Signed-off-by: Sasha Levin +--- + drivers/infiniband/core/cma.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c +index 36ab9da70932..107462905b21 100644 +--- a/drivers/infiniband/core/cma.c ++++ b/drivers/infiniband/core/cma.c +@@ -1818,6 +1818,8 @@ static void cma_release_port(struct rdma_id_private *id_priv) + static void destroy_mc(struct rdma_id_private *id_priv, + struct cma_multicast *mc) + { ++ bool send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); ++ + if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num)) + ib_sa_free_multicast(mc->sa_mc); + +@@ -1834,7 +1836,10 @@ static void destroy_mc(struct rdma_id_private *id_priv, + + cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr, + &mgid); +- cma_igmp_send(ndev, &mgid, false); ++ ++ if (!send_only) ++ cma_igmp_send(ndev, &mgid, false); ++ + dev_put(ndev); + } + +-- +2.33.0 + diff --git a/queue-5.14/interconnect-qcom-sdm660-correct-noc_qos_priority-sh.patch b/queue-5.14/interconnect-qcom-sdm660-correct-noc_qos_priority-sh.patch new file mode 100644 index 00000000000..7ccc14298ad --- /dev/null +++ b/queue-5.14/interconnect-qcom-sdm660-correct-noc_qos_priority-sh.patch @@ -0,0 +1,62 @@ +From b3fb71e7618f68d23cb73a14a52095e0d436e8a2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Sep 2021 15:49:55 +0300 +Subject: interconnect: qcom: sdm660: Correct NOC_QOS_PRIORITY shift and mask + +From: Shawn Guo + +[ Upstream commit 5833c9b8766298e73c11766f9585d4ea4fa785ff ] + +The NOC_QOS_PRIORITY shift and mask do not match what vendor kernel +defines [1]. Correct them per vendor kernel. As the result of +NOC_QOS_PRIORITY_P0_SHIFT being 0, the definition can be dropped and +regmap_update_bits() call on P0 can be simplified a bit. + +[1] https://source.codeaurora.org/quic/la/kernel/msm-4.4/tree/drivers/soc/qcom/msm_bus/msm_bus_noc_adhoc.c?h=LA.UM.8.2.r1-04800-sdm660.0#n37 + +Fixes: f80a1d414328 ("interconnect: qcom: Add SDM660 interconnect provider driver") +Signed-off-by: Shawn Guo +Reviewed-by: Dmitry Baryshkov +Reviewed-by: AngeloGioacchino Del Regno +Link: https://lore.kernel.org/r/20210902054915.28689-1-shawn.guo@linaro.org +Signed-off-by: Georgi Djakov +Signed-off-by: Sasha Levin +--- + drivers/interconnect/qcom/sdm660.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +diff --git a/drivers/interconnect/qcom/sdm660.c b/drivers/interconnect/qcom/sdm660.c +index ac13046537e8..99eef7e2d326 100644 +--- a/drivers/interconnect/qcom/sdm660.c ++++ b/drivers/interconnect/qcom/sdm660.c +@@ -44,9 +44,9 @@ + #define NOC_PERM_MODE_BYPASS (1 << NOC_QOS_MODE_BYPASS) + + #define NOC_QOS_PRIORITYn_ADDR(n) (0x8 + (n * 0x1000)) +-#define NOC_QOS_PRIORITY_MASK 0xf ++#define NOC_QOS_PRIORITY_P1_MASK 0xc ++#define NOC_QOS_PRIORITY_P0_MASK 0x3 + #define NOC_QOS_PRIORITY_P1_SHIFT 0x2 +-#define NOC_QOS_PRIORITY_P0_SHIFT 0x3 + + #define NOC_QOS_MODEn_ADDR(n) (0xc + (n * 0x1000)) + #define NOC_QOS_MODEn_MASK 0x3 +@@ -624,13 +624,12 @@ static int qcom_icc_noc_set_qos_priority(struct regmap *rmap, + /* Must be updated one at a time, P1 first, P0 last */ + val = qos->areq_prio << NOC_QOS_PRIORITY_P1_SHIFT; + rc = regmap_update_bits(rmap, NOC_QOS_PRIORITYn_ADDR(qos->qos_port), +- NOC_QOS_PRIORITY_MASK, val); ++ NOC_QOS_PRIORITY_P1_MASK, val); + if (rc) + return rc; + +- val = qos->prio_level << NOC_QOS_PRIORITY_P0_SHIFT; + return regmap_update_bits(rmap, NOC_QOS_PRIORITYn_ADDR(qos->qos_port), +- NOC_QOS_PRIORITY_MASK, val); ++ NOC_QOS_PRIORITY_P0_MASK, qos->prio_level); + } + + static int qcom_icc_set_noc_qos(struct icc_node *src, u64 max_bw) +-- +2.33.0 + diff --git a/queue-5.14/interconnect-qcom-sdm660-fix-id-of-slv_cnoc_mnoc_cfg.patch b/queue-5.14/interconnect-qcom-sdm660-fix-id-of-slv_cnoc_mnoc_cfg.patch new file mode 100644 index 00000000000..cf5185eb3ae --- /dev/null +++ b/queue-5.14/interconnect-qcom-sdm660-fix-id-of-slv_cnoc_mnoc_cfg.patch @@ -0,0 +1,78 @@ +From e0a2002b5d8a4b14f808b8620fccde64f8e78d65 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Sep 2021 15:49:55 +0300 +Subject: interconnect: qcom: sdm660: Fix id of slv_cnoc_mnoc_cfg + +From: Shawn Guo + +[ Upstream commit a06c2e5c048e5e07fac9daf3073bd0b6582913c7 ] + +The id of slv_cnoc_mnoc_cfg node is mistakenly coded as id of +slv_blsp_1. It causes the following warning on slv_blsp_1 node adding. +Correct the id of slv_cnoc_mnoc_cfg node. + +[ 1.948180] ------------[ cut here ]------------ +[ 1.954122] WARNING: CPU: 2 PID: 7 at drivers/interconnect/core.c:962 icc_node_add+0xe4/0xf8 +[ 1.958994] Modules linked in: +[ 1.967399] CPU: 2 PID: 7 Comm: kworker/u16:0 Not tainted 5.14.0-rc6-next-20210818 #21 +[ 1.970275] Hardware name: Xiaomi Redmi Note 7 (DT) +[ 1.978169] Workqueue: events_unbound deferred_probe_work_func +[ 1.982945] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--) +[ 1.988849] pc : icc_node_add+0xe4/0xf8 +[ 1.995699] lr : qnoc_probe+0x350/0x438 +[ 1.999519] sp : ffff80001008bb10 +[ 2.003337] x29: ffff80001008bb10 x28: 000000000000001a x27: ffffb83ddc61ee28 +[ 2.006818] x26: ffff2fe341d44080 x25: ffff2fe340f3aa80 x24: ffffb83ddc98f0e8 +[ 2.013938] x23: 0000000000000024 x22: ffff2fe3408b7400 x21: 0000000000000000 +[ 2.021054] x20: ffff2fe3408b7410 x19: ffff2fe341d44080 x18: 0000000000000010 +[ 2.028173] x17: ffff2fe3bdd0aac0 x16: 0000000000000281 x15: ffff2fe3400f5528 +[ 2.035290] x14: 000000000000013f x13: ffff2fe3400f5528 x12: 00000000ffffffea +[ 2.042410] x11: ffffb83ddc9109d0 x10: ffffb83ddc8f8990 x9 : ffffb83ddc8f89e8 +[ 2.049527] x8 : 0000000000017fe8 x7 : c0000000ffffefff x6 : 0000000000000001 +[ 2.056645] x5 : 0000000000057fa8 x4 : 0000000000000000 x3 : ffffb83ddc9903b0 +[ 2.063764] x2 : 1a1f6fde34d45500 x1 : ffff2fe340f3a880 x0 : ffff2fe340f3a880 +[ 2.070882] Call trace: +[ 2.077989] icc_node_add+0xe4/0xf8 +[ 2.080247] qnoc_probe+0x350/0x438 +[ 2.083718] platform_probe+0x68/0xd8 +[ 2.087191] really_probe+0xb8/0x300 +[ 2.091011] __driver_probe_device+0x78/0xe0 +[ 2.094659] driver_probe_device+0x80/0x110 +[ 2.098911] __device_attach_driver+0x90/0xe0 +[ 2.102818] bus_for_each_drv+0x78/0xc8 +[ 2.107331] __device_attach+0xf0/0x150 +[ 2.110977] device_initial_probe+0x14/0x20 +[ 2.114796] bus_probe_device+0x9c/0xa8 +[ 2.118963] deferred_probe_work_func+0x88/0xc0 +[ 2.122784] process_one_work+0x1a4/0x338 +[ 2.127296] worker_thread+0x1f8/0x420 +[ 2.131464] kthread+0x150/0x160 +[ 2.135107] ret_from_fork+0x10/0x20 +[ 2.138495] ---[ end trace 5eea8768cb620e87 ]--- + +Signed-off-by: Shawn Guo +Reviewed-by: Bjorn Andersson +Fixes: f80a1d414328 ("interconnect: qcom: Add SDM660 interconnect provider driver") +Link: https://lore.kernel.org/r/20210823014003.31391-1-shawn.guo@linaro.org +Signed-off-by: Georgi Djakov +Signed-off-by: Sasha Levin +--- + drivers/interconnect/qcom/sdm660.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/interconnect/qcom/sdm660.c b/drivers/interconnect/qcom/sdm660.c +index 632dbdd21915..ac13046537e8 100644 +--- a/drivers/interconnect/qcom/sdm660.c ++++ b/drivers/interconnect/qcom/sdm660.c +@@ -307,7 +307,7 @@ DEFINE_QNODE(slv_bimc_cfg, SDM660_SLAVE_BIMC_CFG, 4, -1, 56, true, -1, 0, -1, 0) + DEFINE_QNODE(slv_prng, SDM660_SLAVE_PRNG, 4, -1, 44, true, -1, 0, -1, 0); + DEFINE_QNODE(slv_spdm, SDM660_SLAVE_SPDM, 4, -1, 60, true, -1, 0, -1, 0); + DEFINE_QNODE(slv_qdss_cfg, SDM660_SLAVE_QDSS_CFG, 4, -1, 63, true, -1, 0, -1, 0); +-DEFINE_QNODE(slv_cnoc_mnoc_cfg, SDM660_SLAVE_BLSP_1, 4, -1, 66, true, -1, 0, -1, SDM660_MASTER_CNOC_MNOC_CFG); ++DEFINE_QNODE(slv_cnoc_mnoc_cfg, SDM660_SLAVE_CNOC_MNOC_CFG, 4, -1, 66, true, -1, 0, -1, SDM660_MASTER_CNOC_MNOC_CFG); + DEFINE_QNODE(slv_snoc_cfg, SDM660_SLAVE_SNOC_CFG, 4, -1, 70, true, -1, 0, -1, 0); + DEFINE_QNODE(slv_qm_cfg, SDM660_SLAVE_QM_CFG, 4, -1, 212, true, -1, 0, -1, 0); + DEFINE_QNODE(slv_clk_ctl, SDM660_SLAVE_CLK_CTL, 4, -1, 47, true, -1, 0, -1, 0); +-- +2.33.0 + diff --git a/queue-5.14/ionic-fix-gathering-of-debug-stats.patch b/queue-5.14/ionic-fix-gathering-of-debug-stats.patch new file mode 100644 index 00000000000..e73716bf7cb --- /dev/null +++ b/queue-5.14/ionic-fix-gathering-of-debug-stats.patch @@ -0,0 +1,43 @@ +From ae2c306501e52fb42c0518ea36fb98ea16b2aa93 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Sep 2021 14:07:18 -0700 +Subject: ionic: fix gathering of debug stats + +From: Shannon Nelson + +[ Upstream commit c23bb54f28d61a48008428e8cd320c947993919b ] + +Don't print stats for which we haven't reserved space as it can +cause nasty memory bashing and related bad behaviors. + +Fixes: aa620993b1e5 ("ionic: pull per-q stats work out of queue loops") +Signed-off-by: Shannon Nelson +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/pensando/ionic/ionic_stats.c | 9 --------- + 1 file changed, 9 deletions(-) + +diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c +index 58a854666c62..c14de5fcedea 100644 +--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c ++++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c +@@ -380,15 +380,6 @@ static void ionic_sw_stats_get_txq_values(struct ionic_lif *lif, u64 **buf, + &ionic_dbg_intr_stats_desc[i]); + (*buf)++; + } +- for (i = 0; i < IONIC_NUM_DBG_NAPI_STATS; i++) { +- **buf = IONIC_READ_STAT64(&txqcq->napi_stats, +- &ionic_dbg_napi_stats_desc[i]); +- (*buf)++; +- } +- for (i = 0; i < IONIC_MAX_NUM_NAPI_CNTR; i++) { +- **buf = txqcq->napi_stats.work_done_cntr[i]; +- (*buf)++; +- } + for (i = 0; i < IONIC_MAX_NUM_SG_CNTR; i++) { + **buf = txstats->sg_cntr[i]; + (*buf)++; +-- +2.33.0 + diff --git a/queue-5.14/ipvs-check-that-ip_vs_conn_tab_bits-is-between-8-and.patch b/queue-5.14/ipvs-check-that-ip_vs_conn_tab_bits-is-between-8-and.patch new file mode 100644 index 00000000000..b082125860a --- /dev/null +++ b/queue-5.14/ipvs-check-that-ip_vs_conn_tab_bits-is-between-8-and.patch @@ -0,0 +1,46 @@ +From 04e9bf720282cd6273c8756343afd9c7a033f5e9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Sep 2021 18:08:39 +0200 +Subject: ipvs: check that ip_vs_conn_tab_bits is between 8 and 20 + +From: Andrea Claudi + +[ Upstream commit 69e73dbfda14fbfe748d3812da1244cce2928dcb ] + +ip_vs_conn_tab_bits may be provided by the user through the +conn_tab_bits module parameter. If this value is greater than 31, or +less than 0, the shift operator used to derive tab_size causes undefined +behaviour. + +Fix this checking ip_vs_conn_tab_bits value to be in the range specified +in ipvs Kconfig. If not, simply use default value. + +Fixes: 6f7edb4881bf ("IPVS: Allow boot time change of hash size") +Reported-by: Yi Chen +Signed-off-by: Andrea Claudi +Acked-by: Julian Anastasov +Acked-by: Simon Horman +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/ipvs/ip_vs_conn.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c +index c100c6b112c8..2c467c422dc6 100644 +--- a/net/netfilter/ipvs/ip_vs_conn.c ++++ b/net/netfilter/ipvs/ip_vs_conn.c +@@ -1468,6 +1468,10 @@ int __init ip_vs_conn_init(void) + int idx; + + /* Compute size and mask */ ++ if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) { ++ pr_info("conn_tab_bits not in [8, 20]. Using default value\n"); ++ ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; ++ } + ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; + ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1; + +-- +2.33.0 + diff --git a/queue-5.14/ixgbe-fix-null-pointer-dereference-in-ixgbe_xdp_setu.patch b/queue-5.14/ixgbe-fix-null-pointer-dereference-in-ixgbe_xdp_setu.patch new file mode 100644 index 00000000000..d9252dcec43 --- /dev/null +++ b/queue-5.14/ixgbe-fix-null-pointer-dereference-in-ixgbe_xdp_setu.patch @@ -0,0 +1,119 @@ +From 0427d9cb6be05b4df0f79ff18b1457b3b132c6b5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Sep 2021 15:23:59 -0700 +Subject: ixgbe: Fix NULL pointer dereference in ixgbe_xdp_setup + +From: Feng Zhou + +[ Upstream commit 513e605d7a9ce136886cb42ebb2c40e9a6eb6333 ] + +The ixgbe driver currently generates a NULL pointer dereference with +some machine (online cpus < 63). This is due to the fact that the +maximum value of num_xdp_queues is nr_cpu_ids. Code is in +"ixgbe_set_rss_queues"". + +Here's how the problem repeats itself: +Some machine (online cpus < 63), And user set num_queues to 63 through +ethtool. Code is in the "ixgbe_set_channels", + adapter->ring_feature[RING_F_FDIR].limit = count; + +It becomes 63. + +When user use xdp, "ixgbe_set_rss_queues" will set queues num. + adapter->num_rx_queues = rss_i; + adapter->num_tx_queues = rss_i; + adapter->num_xdp_queues = ixgbe_xdp_queues(adapter); + +And rss_i's value is from + f = &adapter->ring_feature[RING_F_FDIR]; + rss_i = f->indices = f->limit; + +So "num_rx_queues" > "num_xdp_queues", when run to "ixgbe_xdp_setup", + for (i = 0; i < adapter->num_rx_queues; i++) + if (adapter->xdp_ring[i]->xsk_umem) + +It leads to panic. + +Call trace: +[exception RIP: ixgbe_xdp+368] +RIP: ffffffffc02a76a0 RSP: ffff9fe16202f8d0 RFLAGS: 00010297 +RAX: 0000000000000000 RBX: 0000000000000020 RCX: 0000000000000000 +RDX: 0000000000000000 RSI: 000000000000001c RDI: ffffffffa94ead90 +RBP: ffff92f8f24c0c18 R8: 0000000000000000 R9: 0000000000000000 +R10: ffff9fe16202f830 R11: 0000000000000000 R12: ffff92f8f24c0000 +R13: ffff9fe16202fc01 R14: 000000000000000a R15: ffffffffc02a7530 +ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 + 7 [ffff9fe16202f8f0] dev_xdp_install at ffffffffa89fbbcc + 8 [ffff9fe16202f920] dev_change_xdp_fd at ffffffffa8a08808 + 9 [ffff9fe16202f960] do_setlink at ffffffffa8a20235 +10 [ffff9fe16202fa88] rtnl_setlink at ffffffffa8a20384 +11 [ffff9fe16202fc78] rtnetlink_rcv_msg at ffffffffa8a1a8dd +12 [ffff9fe16202fcf0] netlink_rcv_skb at ffffffffa8a717eb +13 [ffff9fe16202fd40] netlink_unicast at ffffffffa8a70f88 +14 [ffff9fe16202fd80] netlink_sendmsg at ffffffffa8a71319 +15 [ffff9fe16202fdf0] sock_sendmsg at ffffffffa89df290 +16 [ffff9fe16202fe08] __sys_sendto at ffffffffa89e19c8 +17 [ffff9fe16202ff30] __x64_sys_sendto at ffffffffa89e1a64 +18 [ffff9fe16202ff38] do_syscall_64 at ffffffffa84042b9 +19 [ffff9fe16202ff50] entry_SYSCALL_64_after_hwframe at ffffffffa8c0008c + +So I fix ixgbe_max_channels so that it will not allow a setting of queues +to be higher than the num_online_cpus(). And when run to ixgbe_xdp_setup, +take the smaller value of num_rx_queues and num_xdp_queues. + +Fixes: 4a9b32f30f80 ("ixgbe: fix potential RX buffer starvation for AF_XDP") +Signed-off-by: Feng Zhou +Tested-by: Sandeep Penigalapati +Signed-off-by: Tony Nguyen +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 2 +- + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 ++++++-- + 2 files changed, 7 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +index 4ceaca0f6ce3..21321d164708 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +@@ -3204,7 +3204,7 @@ static unsigned int ixgbe_max_channels(struct ixgbe_adapter *adapter) + max_combined = ixgbe_max_rss_indices(adapter); + } + +- return max_combined; ++ return min_t(int, max_combined, num_online_cpus()); + } + + static void ixgbe_get_channels(struct net_device *dev, +diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +index 14aea40da50f..77350e5fdf97 100644 +--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c ++++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +@@ -10112,6 +10112,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) + struct ixgbe_adapter *adapter = netdev_priv(dev); + struct bpf_prog *old_prog; + bool need_reset; ++ int num_queues; + + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + return -EINVAL; +@@ -10161,11 +10162,14 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) + /* Kick start the NAPI context if there is an AF_XDP socket open + * on that queue id. This so that receiving will start. + */ +- if (need_reset && prog) +- for (i = 0; i < adapter->num_rx_queues; i++) ++ if (need_reset && prog) { ++ num_queues = min_t(int, adapter->num_rx_queues, ++ adapter->num_xdp_queues); ++ for (i = 0; i < num_queues; i++) + if (adapter->xdp_ring[i]->xsk_pool) + (void)ixgbe_xsk_wakeup(adapter->netdev, i, + XDP_WAKEUP_RX); ++ } + + return 0; + } +-- +2.33.0 + diff --git a/queue-5.14/kvm-fix-objtool-relocation-warning.patch b/queue-5.14/kvm-fix-objtool-relocation-warning.patch new file mode 100644 index 00000000000..b43bee0b733 --- /dev/null +++ b/queue-5.14/kvm-fix-objtool-relocation-warning.patch @@ -0,0 +1,66 @@ +From 06d12aec7037e70b028e11f9edbf7c5f6f2400e5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 3 Oct 2021 13:34:19 -0700 +Subject: kvm: fix objtool relocation warning + +From: Linus Torvalds + +[ Upstream commit 291073a566b2094c7192872cc0f17ce73d83cb76 ] + +The recent change to make objtool aware of more symbol relocation types +(commit 24ff65257375: "objtool: Teach get_alt_entry() about more +relocation types") also added another check, and resulted in this +objtool warning when building kvm on x86: + + arch/x86/kvm/emulate.o: warning: objtool: __ex_table+0x4: don't know how to handle reloc symbol type: kvm_fastop_exception + +The reason seems to be that kvm_fastop_exception() is marked as a global +symbol, which causes the relocation to ke kept around for objtool. And +at the same time, the kvm_fastop_exception definition (which is done as +an inline asm statement) doesn't actually set the type of the global, +which then makes objtool unhappy. + +The minimal fix is to just not mark kvm_fastop_exception as being a +global symbol. It's only used in that one compilation unit anyway, so +it was always pointless. That's how all the other local exception table +labels are done. + +I'm not entirely happy about the kinds of games that the kvm code plays +with doing its own exception handling, and the fact that it confused +objtool is most definitely a symptom of the code being a bit too subtle +and ad-hoc. But at least this trivial one-liner makes objtool no longer +upset about what is going on. + +Fixes: 24ff65257375 ("objtool: Teach get_alt_entry() about more relocation types") +Link: https://lore.kernel.org/lkml/CAHk-=wiZwq-0LknKhXN4M+T8jbxn_2i9mcKpO+OaBSSq_Eh7tg@mail.gmail.com/ +Cc: Borislav Petkov +Cc: Paolo Bonzini +Cc: Sean Christopherson +Cc: Vitaly Kuznetsov +Cc: Wanpeng Li +Cc: Jim Mattson +Cc: Joerg Roedel +Cc: Peter Zijlstra +Cc: Josh Poimboeuf +Cc: Nathan Chancellor +Signed-off-by: Linus Torvalds +Signed-off-by: Sasha Levin +--- + arch/x86/kvm/emulate.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c +index 2837110e66ed..50050d06672b 100644 +--- a/arch/x86/kvm/emulate.c ++++ b/arch/x86/kvm/emulate.c +@@ -435,7 +435,6 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop); + __FOP_RET(#op) + + asm(".pushsection .fixup, \"ax\"\n" +- ".global kvm_fastop_exception \n" + "kvm_fastop_exception: xor %esi, %esi; ret\n" + ".popsection"); + +-- +2.33.0 + diff --git a/queue-5.14/libbpf-fix-segfault-in-static-linker-for-objects-wit.patch b/queue-5.14/libbpf-fix-segfault-in-static-linker-for-objects-wit.patch new file mode 100644 index 00000000000..5cf7ff8d22d --- /dev/null +++ b/queue-5.14/libbpf-fix-segfault-in-static-linker-for-objects-wit.patch @@ -0,0 +1,66 @@ +From cbb6ee365aa0dc45d15ef22b752453d98bf325d8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 Sep 2021 08:07:25 +0530 +Subject: libbpf: Fix segfault in static linker for objects without BTF + +From: Kumar Kartikeya Dwivedi + +[ Upstream commit bcfd367c2839f2126c048fe59700ec1b538e2b06 ] + +When a BPF object is compiled without BTF info (without -g), +trying to link such objects using bpftool causes a SIGSEGV due to +btf__get_nr_types accessing obj->btf which is NULL. Fix this by +checking for the NULL pointer, and return error. + +Reproducer: +$ cat a.bpf.c +extern int foo(void); +int bar(void) { return foo(); } +$ cat b.bpf.c +int foo(void) { return 0; } +$ clang -O2 -target bpf -c a.bpf.c +$ clang -O2 -target bpf -c b.bpf.c +$ bpftool gen obj out a.bpf.o b.bpf.o +Segmentation fault (core dumped) + +After fix: +$ bpftool gen obj out a.bpf.o b.bpf.o +libbpf: failed to find BTF info for object 'a.bpf.o' +Error: failed to link 'a.bpf.o': Unknown error -22 (-22) + +Fixes: a46349227cd8 (libbpf: Add linker extern resolution support for functions and global variables) +Signed-off-by: Kumar Kartikeya Dwivedi +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/20210924023725.70228-1-memxor@gmail.com +Signed-off-by: Sasha Levin +--- + tools/lib/bpf/linker.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c +index 10911a8cad0f..2df880cefdae 100644 +--- a/tools/lib/bpf/linker.c ++++ b/tools/lib/bpf/linker.c +@@ -1649,11 +1649,17 @@ static bool btf_is_non_static(const struct btf_type *t) + static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sym_name, + int *out_btf_sec_id, int *out_btf_id) + { +- int i, j, n = btf__get_nr_types(obj->btf), m, btf_id = 0; ++ int i, j, n, m, btf_id = 0; + const struct btf_type *t; + const struct btf_var_secinfo *vi; + const char *name; + ++ if (!obj->btf) { ++ pr_warn("failed to find BTF info for object '%s'\n", obj->filename); ++ return -EINVAL; ++ } ++ ++ n = btf__get_nr_types(obj->btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(obj->btf, i); + +-- +2.33.0 + diff --git a/queue-5.14/mac80211-fix-ieee80211_amsdu_aggregate-frag_tail-bug.patch b/queue-5.14/mac80211-fix-ieee80211_amsdu_aggregate-frag_tail-bug.patch new file mode 100644 index 00000000000..ad1eaee62e1 --- /dev/null +++ b/queue-5.14/mac80211-fix-ieee80211_amsdu_aggregate-frag_tail-bug.patch @@ -0,0 +1,52 @@ +From 1102e73304236bc22044a333ca539e08a4b9bafd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Aug 2021 15:32:40 +0800 +Subject: mac80211: Fix ieee80211_amsdu_aggregate frag_tail bug + +From: Chih-Kang Chang + +[ Upstream commit fe94bac626d9c1c5bc98ab32707be8a9d7f8adba ] + +In ieee80211_amsdu_aggregate() set a pointer frag_tail point to the +end of skb_shinfo(head)->frag_list, and use it to bind other skb in +the end of this function. But when execute ieee80211_amsdu_aggregate() +->ieee80211_amsdu_realloc_pad()->pskb_expand_head(), the address of +skb_shinfo(head)->frag_list will be changed. However, the +ieee80211_amsdu_aggregate() not update frag_tail after call +pskb_expand_head(). That will cause the second skb can't bind to the +head skb appropriately.So we update the address of frag_tail to fix it. + +Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support") +Signed-off-by: Chih-Kang Chang +Signed-off-by: Zong-Zhe Yang +Signed-off-by: Ping-Ke Shih +Link: https://lore.kernel.org/r/20210830073240.12736-1-pkshih@realtek.com +[reword comment] +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/tx.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c +index fa09a369214d..0208f68af394 100644 +--- a/net/mac80211/tx.c ++++ b/net/mac80211/tx.c +@@ -3380,6 +3380,14 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, + if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head)) + goto out; + ++ /* If n == 2, the "while (*frag_tail)" loop above didn't execute ++ * and frag_tail should be &skb_shinfo(head)->frag_list. ++ * However, ieee80211_amsdu_prepare_head() can reallocate it. ++ * Reload frag_tail to have it pointing to the correct place. ++ */ ++ if (n == 2) ++ frag_tail = &skb_shinfo(head)->frag_list; ++ + /* + * Pad out the previous subframe to a multiple of 4 by adding the + * padding to the next one, that's being added. Note that head->len +-- +2.33.0 + diff --git a/queue-5.14/mac80211-hwsim-fix-late-beacon-hrtimer-handling.patch b/queue-5.14/mac80211-hwsim-fix-late-beacon-hrtimer-handling.patch new file mode 100644 index 00000000000..bc96b92fd0e --- /dev/null +++ b/queue-5.14/mac80211-hwsim-fix-late-beacon-hrtimer-handling.patch @@ -0,0 +1,66 @@ +From be9852ac03ef188dd96804ad9d4309f1f0c7cc55 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Sep 2021 11:29:37 +0200 +Subject: mac80211-hwsim: fix late beacon hrtimer handling + +From: Johannes Berg + +[ Upstream commit 313bbd1990b6ddfdaa7da098d0c56b098a833572 ] + +Thomas explained in https://lore.kernel.org/r/87mtoeb4hb.ffs@tglx +that our handling of the hrtimer here is wrong: If the timer fires +late (e.g. due to vCPU scheduling, as reported by Dmitry/syzbot) +then it tries to actually rearm the timer at the next deadline, +which might be in the past already: + + 1 2 3 N N+1 + | | | ... | | + + ^ intended to fire here (1) + ^ next deadline here (2) + ^ actually fired here + +The next time it fires, it's later, but will still try to schedule +for the next deadline (now 3), etc. until it catches up with N, +but that might take a long time, causing stalls etc. + +Now, all of this is simulation, so we just have to fix it, but +note that the behaviour is wrong even per spec, since there's no +value then in sending all those beacons unaligned - they should be +aligned to the TBTT (1, 2, 3, ... in the picture), and if we're a +bit (or a lot) late, then just resume at that point. + +Therefore, change the code to use hrtimer_forward_now() which will +ensure that the next firing of the timer would be at N+1 (in the +picture), i.e. the next interval point after the current time. + +Suggested-by: Thomas Gleixner +Reported-by: Dmitry Vyukov +Reported-by: syzbot+0e964fad69a9c462bc1e@syzkaller.appspotmail.com +Fixes: 01e59e467ecf ("mac80211_hwsim: hrtimer beacon") +Reviewed-by: Thomas Gleixner +Link: https://lore.kernel.org/r/20210915112936.544f383472eb.I3f9712009027aa09244b65399bf18bf482a8c4f1@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/mac80211_hwsim.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c +index ffa894f7312a..0adae76eb8df 100644 +--- a/drivers/net/wireless/mac80211_hwsim.c ++++ b/drivers/net/wireless/mac80211_hwsim.c +@@ -1867,8 +1867,8 @@ mac80211_hwsim_beacon(struct hrtimer *timer) + bcn_int -= data->bcn_delta; + data->bcn_delta = 0; + } +- hrtimer_forward(&data->beacon_timer, hrtimer_get_expires(timer), +- ns_to_ktime(bcn_int * NSEC_PER_USEC)); ++ hrtimer_forward_now(&data->beacon_timer, ++ ns_to_ktime(bcn_int * NSEC_PER_USEC)); + return HRTIMER_RESTART; + } + +-- +2.33.0 + diff --git a/queue-5.14/mac80211-limit-injected-vht-mcs-nss-in-ieee80211_par.patch b/queue-5.14/mac80211-limit-injected-vht-mcs-nss-in-ieee80211_par.patch new file mode 100644 index 00000000000..52edf690910 --- /dev/null +++ b/queue-5.14/mac80211-limit-injected-vht-mcs-nss-in-ieee80211_par.patch @@ -0,0 +1,84 @@ +From 514639d6979d1f36ca7e50b9602268c8530a077a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 20 Sep 2021 14:45:22 +0200 +Subject: mac80211: limit injected vht mcs/nss in ieee80211_parse_tx_radiotap + +From: Lorenzo Bianconi + +[ Upstream commit 13cb6d826e0ac0d144b0d48191ff1a111d32f0c6 ] + +Limit max values for vht mcs and nss in ieee80211_parse_tx_radiotap +routine in order to fix the following warning reported by syzbot: + +WARNING: CPU: 0 PID: 10717 at include/net/mac80211.h:989 ieee80211_rate_set_vht include/net/mac80211.h:989 [inline] +WARNING: CPU: 0 PID: 10717 at include/net/mac80211.h:989 ieee80211_parse_tx_radiotap+0x101e/0x12d0 net/mac80211/tx.c:2244 +Modules linked in: +CPU: 0 PID: 10717 Comm: syz-executor.5 Not tainted 5.14.0-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:ieee80211_rate_set_vht include/net/mac80211.h:989 [inline] +RIP: 0010:ieee80211_parse_tx_radiotap+0x101e/0x12d0 net/mac80211/tx.c:2244 +RSP: 0018:ffffc9000186f3e8 EFLAGS: 00010216 +RAX: 0000000000000618 RBX: ffff88804ef76500 RCX: ffffc900143a5000 +RDX: 0000000000040000 RSI: ffffffff888f478e RDI: 0000000000000003 +RBP: 00000000ffffffff R08: 0000000000000000 R09: 0000000000000100 +R10: ffffffff888f46f9 R11: 0000000000000000 R12: 00000000fffffff8 +R13: ffff88804ef7653c R14: 0000000000000001 R15: 0000000000000004 +FS: 00007fbf5718f700(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000001b2de23000 CR3: 000000006a671000 CR4: 00000000001506f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600 +Call Trace: + ieee80211_monitor_select_queue+0xa6/0x250 net/mac80211/iface.c:740 + netdev_core_pick_tx+0x169/0x2e0 net/core/dev.c:4089 + __dev_queue_xmit+0x6f9/0x3710 net/core/dev.c:4165 + __bpf_tx_skb net/core/filter.c:2114 [inline] + __bpf_redirect_no_mac net/core/filter.c:2139 [inline] + __bpf_redirect+0x5ba/0xd20 net/core/filter.c:2162 + ____bpf_clone_redirect net/core/filter.c:2429 [inline] + bpf_clone_redirect+0x2ae/0x420 net/core/filter.c:2401 + bpf_prog_eeb6f53a69e5c6a2+0x59/0x234 + bpf_dispatcher_nop_func include/linux/bpf.h:717 [inline] + __bpf_prog_run include/linux/filter.h:624 [inline] + bpf_prog_run include/linux/filter.h:631 [inline] + bpf_test_run+0x381/0xa30 net/bpf/test_run.c:119 + bpf_prog_test_run_skb+0xb84/0x1ee0 net/bpf/test_run.c:663 + bpf_prog_test_run kernel/bpf/syscall.c:3307 [inline] + __sys_bpf+0x2137/0x5df0 kernel/bpf/syscall.c:4605 + __do_sys_bpf kernel/bpf/syscall.c:4691 [inline] + __se_sys_bpf kernel/bpf/syscall.c:4689 [inline] + __x64_sys_bpf+0x75/0xb0 kernel/bpf/syscall.c:4689 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae +RIP: 0033:0x4665f9 + +Reported-by: syzbot+0196ac871673f0c20f68@syzkaller.appspotmail.com +Fixes: 646e76bb5daf4 ("mac80211: parse VHT info in injected frames") +Signed-off-by: Lorenzo Bianconi +Link: https://lore.kernel.org/r/c26c3f02dcb38ab63b2f2534cb463d95ee81bb13.1632141760.git.lorenzo@kernel.org +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/tx.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c +index 0208f68af394..751e601c4623 100644 +--- a/net/mac80211/tx.c ++++ b/net/mac80211/tx.c +@@ -2209,7 +2209,11 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, + } + + vht_mcs = iterator.this_arg[4] >> 4; ++ if (vht_mcs > 11) ++ vht_mcs = 0; + vht_nss = iterator.this_arg[4] & 0xF; ++ if (!vht_nss || vht_nss > 8) ++ vht_nss = 1; + break; + + /* +-- +2.33.0 + diff --git a/queue-5.14/mac80211-mesh-fix-potentially-unaligned-access.patch b/queue-5.14/mac80211-mesh-fix-potentially-unaligned-access.patch new file mode 100644 index 00000000000..d642ffa1ed0 --- /dev/null +++ b/queue-5.14/mac80211-mesh-fix-potentially-unaligned-access.patch @@ -0,0 +1,45 @@ +From 79baeb59481fef9aae2b7aad768971f24366ffbd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 20 Sep 2021 15:40:05 +0200 +Subject: mac80211: mesh: fix potentially unaligned access + +From: Johannes Berg + +[ Upstream commit b9731062ce8afd35cf723bf3a8ad55d208f915a5 ] + +The pointer here points directly into the frame, so the +access is potentially unaligned. Use get_unaligned_le16 +to avoid that. + +Fixes: 3f52b7e328c5 ("mac80211: mesh power save basics") +Link: https://lore.kernel.org/r/20210920154009.3110ff75be0c.Ib6a2ff9e9cc9bc6fca50fce631ec1ce725cc926b@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/mesh_ps.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c +index 204830a55240..3fbd0b9ff913 100644 +--- a/net/mac80211/mesh_ps.c ++++ b/net/mac80211/mesh_ps.c +@@ -2,6 +2,7 @@ + /* + * Copyright 2012-2013, Marco Porsch + * Copyright 2012-2013, cozybit Inc. ++ * Copyright (C) 2021 Intel Corporation + */ + + #include "mesh.h" +@@ -588,7 +589,7 @@ void ieee80211_mps_frame_release(struct sta_info *sta, + + /* only transmit to PS STA with announced, non-zero awake window */ + if (test_sta_flag(sta, WLAN_STA_PS_STA) && +- (!elems->awake_window || !le16_to_cpu(*elems->awake_window))) ++ (!elems->awake_window || !get_unaligned_le16(elems->awake_window))) + return; + + if (!test_sta_flag(sta, WLAN_STA_MPSP_OWNER)) +-- +2.33.0 + diff --git a/queue-5.14/mptcp-allow-changing-the-backup-bit-when-no-sockets-.patch b/queue-5.14/mptcp-allow-changing-the-backup-bit-when-no-sockets-.patch new file mode 100644 index 00000000000..ed2af32273e --- /dev/null +++ b/queue-5.14/mptcp-allow-changing-the-backup-bit-when-no-sockets-.patch @@ -0,0 +1,43 @@ +From 17f926cc07a7e81463dfe3cd290daf8e5b78a701 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Sep 2021 17:04:12 -0700 +Subject: mptcp: allow changing the 'backup' bit when no sockets are open + +From: Davide Caratti + +[ Upstream commit 3f4a08909e2c740f8045efc74c4cf82eeaae3e36 ] + +current Linux refuses to change the 'backup' bit of MPTCP endpoints, i.e. +using MPTCP_PM_CMD_SET_FLAGS, unless it finds (at least) one subflow that +matches the endpoint address. There is no reason for that, so we can just +ignore the return value of mptcp_nl_addr_backup(). In this way, endpoints +can reconfigure their 'backup' flag even if no MPTCP sockets are open (or +more generally, in case the MP_PRIO message is not sent out). + +Fixes: 0f9f696a502e ("mptcp: add set_flags command in PM netlink") +Signed-off-by: Davide Caratti +Signed-off-by: Mat Martineau +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/mptcp/pm_netlink.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c +index 89251cbe9f1a..81103b29c0af 100644 +--- a/net/mptcp/pm_netlink.c ++++ b/net/mptcp/pm_netlink.c +@@ -1558,9 +1558,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info) + + list_for_each_entry(entry, &pernet->local_addr_list, list) { + if (addresses_equal(&entry->addr, &addr.addr, true)) { +- ret = mptcp_nl_addr_backup(net, &entry->addr, bkup); +- if (ret) +- return ret; ++ mptcp_nl_addr_backup(net, &entry->addr, bkup); + + if (bkup) + entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP; +-- +2.33.0 + diff --git a/queue-5.14/mptcp-don-t-return-sockets-in-foreign-netns.patch b/queue-5.14/mptcp-don-t-return-sockets-in-foreign-netns.patch new file mode 100644 index 00000000000..303e9c1d782 --- /dev/null +++ b/queue-5.14/mptcp-don-t-return-sockets-in-foreign-netns.patch @@ -0,0 +1,215 @@ +From 317822638622e3ecac51de0440f1df16e72e4e59 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Sep 2021 17:04:11 -0700 +Subject: mptcp: don't return sockets in foreign netns + +From: Florian Westphal + +[ Upstream commit ea1300b9df7c8e8b65695a08b8f6aaf4b25fec9c ] + +mptcp_token_get_sock() may return a mptcp socket that is in +a different net namespace than the socket that received the token value. + +The mptcp syncookie code path had an explicit check for this, +this moves the test into mptcp_token_get_sock() function. + +Eventually token.c should be converted to pernet storage, but +such change is not suitable for net tree. + +Fixes: 2c5ebd001d4f0 ("mptcp: refactor token container") +Signed-off-by: Florian Westphal +Signed-off-by: Mat Martineau +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/mptcp/mptcp_diag.c | 2 +- + net/mptcp/protocol.h | 2 +- + net/mptcp/subflow.c | 2 +- + net/mptcp/syncookies.c | 13 +------------ + net/mptcp/token.c | 11 ++++++++--- + net/mptcp/token_test.c | 14 ++++++++------ + 6 files changed, 20 insertions(+), 24 deletions(-) + +diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c +index f48eb6315bbb..292374fb0779 100644 +--- a/net/mptcp/mptcp_diag.c ++++ b/net/mptcp/mptcp_diag.c +@@ -36,7 +36,7 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb, + struct sock *sk; + + net = sock_net(in_skb->sk); +- msk = mptcp_token_get_sock(req->id.idiag_cookie[0]); ++ msk = mptcp_token_get_sock(net, req->id.idiag_cookie[0]); + if (!msk) + goto out_nosk; + +diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h +index 6ac564d584c1..c8a49e92e66f 100644 +--- a/net/mptcp/protocol.h ++++ b/net/mptcp/protocol.h +@@ -680,7 +680,7 @@ int mptcp_token_new_connect(struct sock *sk); + void mptcp_token_accept(struct mptcp_subflow_request_sock *r, + struct mptcp_sock *msk); + bool mptcp_token_exists(u32 token); +-struct mptcp_sock *mptcp_token_get_sock(u32 token); ++struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token); + struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, + long *s_num); + void mptcp_token_destroy(struct mptcp_sock *msk); +diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c +index 966f777d35ce..1f3039b829a7 100644 +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -86,7 +86,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req) + struct mptcp_sock *msk; + int local_id; + +- msk = mptcp_token_get_sock(subflow_req->token); ++ msk = mptcp_token_get_sock(sock_net(req_to_sk(req)), subflow_req->token); + if (!msk) { + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN); + return NULL; +diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c +index 37127781aee9..7f22526346a7 100644 +--- a/net/mptcp/syncookies.c ++++ b/net/mptcp/syncookies.c +@@ -108,18 +108,12 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl + + e->valid = 0; + +- msk = mptcp_token_get_sock(e->token); ++ msk = mptcp_token_get_sock(net, e->token); + if (!msk) { + spin_unlock_bh(&join_entry_locks[i]); + return false; + } + +- /* If this fails, the token got re-used in the mean time by another +- * mptcp socket in a different netns, i.e. entry is outdated. +- */ +- if (!net_eq(sock_net((struct sock *)msk), net)) +- goto err_put; +- + subflow_req->remote_nonce = e->remote_nonce; + subflow_req->local_nonce = e->local_nonce; + subflow_req->backup = e->backup; +@@ -128,11 +122,6 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl + subflow_req->msk = msk; + spin_unlock_bh(&join_entry_locks[i]); + return true; +- +-err_put: +- spin_unlock_bh(&join_entry_locks[i]); +- sock_put((struct sock *)msk); +- return false; + } + + void __init mptcp_join_cookie_init(void) +diff --git a/net/mptcp/token.c b/net/mptcp/token.c +index a98e554b034f..e581b341c5be 100644 +--- a/net/mptcp/token.c ++++ b/net/mptcp/token.c +@@ -231,6 +231,7 @@ bool mptcp_token_exists(u32 token) + + /** + * mptcp_token_get_sock - retrieve mptcp connection sock using its token ++ * @net: restrict to this namespace + * @token: token of the mptcp connection to retrieve + * + * This function returns the mptcp connection structure with the given token. +@@ -238,7 +239,7 @@ bool mptcp_token_exists(u32 token) + * + * returns NULL if no connection with the given token value exists. + */ +-struct mptcp_sock *mptcp_token_get_sock(u32 token) ++struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token) + { + struct hlist_nulls_node *pos; + struct token_bucket *bucket; +@@ -251,11 +252,15 @@ struct mptcp_sock *mptcp_token_get_sock(u32 token) + again: + sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) { + msk = mptcp_sk(sk); +- if (READ_ONCE(msk->token) != token) ++ if (READ_ONCE(msk->token) != token || ++ !net_eq(sock_net(sk), net)) + continue; ++ + if (!refcount_inc_not_zero(&sk->sk_refcnt)) + goto not_found; +- if (READ_ONCE(msk->token) != token) { ++ ++ if (READ_ONCE(msk->token) != token || ++ !net_eq(sock_net(sk), net)) { + sock_put(sk); + goto again; + } +diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c +index e1bd6f0a0676..5d984bec1cd8 100644 +--- a/net/mptcp/token_test.c ++++ b/net/mptcp/token_test.c +@@ -11,6 +11,7 @@ static struct mptcp_subflow_request_sock *build_req_sock(struct kunit *test) + GFP_USER); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, req); + mptcp_token_init_request((struct request_sock *)req); ++ sock_net_set((struct sock *)req, &init_net); + return req; + } + +@@ -22,7 +23,7 @@ static void mptcp_token_test_req_basic(struct kunit *test) + KUNIT_ASSERT_EQ(test, 0, + mptcp_token_new_request((struct request_sock *)req)); + KUNIT_EXPECT_NE(test, 0, (int)req->token); +- KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(req->token)); ++ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, req->token)); + + /* cleanup */ + mptcp_token_destroy_request((struct request_sock *)req); +@@ -55,6 +56,7 @@ static struct mptcp_sock *build_msk(struct kunit *test) + msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk); + refcount_set(&((struct sock *)msk)->sk_refcnt, 1); ++ sock_net_set((struct sock *)msk, &init_net); + return msk; + } + +@@ -74,11 +76,11 @@ static void mptcp_token_test_msk_basic(struct kunit *test) + mptcp_token_new_connect((struct sock *)icsk)); + KUNIT_EXPECT_NE(test, 0, (int)ctx->token); + KUNIT_EXPECT_EQ(test, ctx->token, msk->token); +- KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(ctx->token)); ++ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, ctx->token)); + KUNIT_EXPECT_EQ(test, 2, (int)refcount_read(&sk->sk_refcnt)); + + mptcp_token_destroy(msk); +- KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(ctx->token)); ++ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, ctx->token)); + } + + static void mptcp_token_test_accept(struct kunit *test) +@@ -90,11 +92,11 @@ static void mptcp_token_test_accept(struct kunit *test) + mptcp_token_new_request((struct request_sock *)req)); + msk->token = req->token; + mptcp_token_accept(req, msk); +- KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token)); ++ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token)); + + /* this is now a no-op */ + mptcp_token_destroy_request((struct request_sock *)req); +- KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token)); ++ KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token)); + + /* cleanup */ + mptcp_token_destroy(msk); +@@ -116,7 +118,7 @@ static void mptcp_token_test_destroyed(struct kunit *test) + + /* simulate race on removal */ + refcount_set(&sk->sk_refcnt, 0); +- KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(msk->token)); ++ KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, msk->token)); + + /* cleanup */ + mptcp_token_destroy(msk); +-- +2.33.0 + diff --git a/queue-5.14/net-enetc-fix-the-incorrect-clearing-of-if_mode-bits.patch b/queue-5.14/net-enetc-fix-the-incorrect-clearing-of-if_mode-bits.patch new file mode 100644 index 00000000000..efdedd9f0dd --- /dev/null +++ b/queue-5.14/net-enetc-fix-the-incorrect-clearing-of-if_mode-bits.patch @@ -0,0 +1,49 @@ +From b34b3b3ec21f1ccd518c5704fe1bcfa8f2c8395d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Sep 2021 16:23:33 +0300 +Subject: net: enetc: fix the incorrect clearing of IF_MODE bits + +From: Vladimir Oltean + +[ Upstream commit 325fd36ae76a6d089983b2d2eccb41237d35b221 ] + +The enetc phylink .mac_config handler intends to clear the IFMODE field +(bits 1:0) of the PM0_IF_MODE register, but incorrectly clears all the +other fields instead. + +For normal operation, the bug was inconsequential, due to the fact that +we write the PM0_IF_MODE register in two stages, first in +phylink .mac_config (which incorrectly cleared out a bunch of stuff), +then we update the speed and duplex to the correct values in +phylink .mac_link_up. + +Judging by the code (not tested), it looks like maybe loopback mode was +broken, since this is one of the settings in PM0_IF_MODE which is +incorrectly cleared. + +Fixes: c76a97218dcb ("net: enetc: force the RGMII speed and duplex instead of operating in inband mode") +Reported-by: Pavel Machek (CIP) +Signed-off-by: Vladimir Oltean +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/freescale/enetc/enetc_pf.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c +index c84f6c226743..cf00709caea4 100644 +--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c ++++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c +@@ -541,8 +541,7 @@ static void enetc_mac_config(struct enetc_hw *hw, phy_interface_t phy_mode) + + if (phy_interface_mode_is_rgmii(phy_mode)) { + val = enetc_port_rd(hw, ENETC_PM0_IF_MODE); +- val &= ~ENETC_PM0_IFM_EN_AUTO; +- val &= ENETC_PM0_IFM_IFMODE_MASK; ++ val &= ~(ENETC_PM0_IFM_EN_AUTO | ENETC_PM0_IFM_IFMODE_MASK); + val |= ENETC_PM0_IFM_IFMODE_GMII | ENETC_PM0_IFM_RG; + enetc_port_wr(hw, ENETC_PM0_IF_MODE, val); + } +-- +2.33.0 + diff --git a/queue-5.14/net-hns3-disable-firmware-compatible-features-when-u.patch b/queue-5.14/net-hns3-disable-firmware-compatible-features-when-u.patch new file mode 100644 index 00000000000..97033708554 --- /dev/null +++ b/queue-5.14/net-hns3-disable-firmware-compatible-features-when-u.patch @@ -0,0 +1,87 @@ +From fdaa5d2794fff7efd75be95a5c9ab9c2512eb335 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Sep 2021 17:35:56 +0800 +Subject: net: hns3: disable firmware compatible features when uninstall PF + +From: Guangbin Huang + +[ Upstream commit 0178839ccca36dee238a57e7f4c3c252f5dbbba6 ] + +Currently, the firmware compatible features are enabled in PF driver +initialization process, but they are not disabled in PF driver +deinitialization process and firmware keeps these features in enabled +status. + +In this case, if load an old PF driver (for example, in VM) which not +support the firmware compatible features, firmware will still send mailbox +message to PF when link status changed and PF will print +"un-supported mailbox message, code = 201". + +To fix this problem, disable these firmware compatible features in PF +driver deinitialization process. + +Fixes: ed8fb4b262ae ("net: hns3: add link change event report") +Signed-off-by: Guangbin Huang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../hisilicon/hns3/hns3pf/hclge_cmd.c | 21 ++++++++++++------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +index eb748aa35952..0f0bf3d503bf 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c +@@ -472,7 +472,7 @@ int hclge_cmd_queue_init(struct hclge_dev *hdev) + return ret; + } + +-static int hclge_firmware_compat_config(struct hclge_dev *hdev) ++static int hclge_firmware_compat_config(struct hclge_dev *hdev, bool en) + { + struct hclge_firmware_compat_cmd *req; + struct hclge_desc desc; +@@ -480,13 +480,16 @@ static int hclge_firmware_compat_config(struct hclge_dev *hdev) + + hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_IMP_COMPAT_CFG, false); + +- req = (struct hclge_firmware_compat_cmd *)desc.data; ++ if (en) { ++ req = (struct hclge_firmware_compat_cmd *)desc.data; + +- hnae3_set_bit(compat, HCLGE_LINK_EVENT_REPORT_EN_B, 1); +- hnae3_set_bit(compat, HCLGE_NCSI_ERROR_REPORT_EN_B, 1); +- if (hnae3_dev_phy_imp_supported(hdev)) +- hnae3_set_bit(compat, HCLGE_PHY_IMP_EN_B, 1); +- req->compat = cpu_to_le32(compat); ++ hnae3_set_bit(compat, HCLGE_LINK_EVENT_REPORT_EN_B, 1); ++ hnae3_set_bit(compat, HCLGE_NCSI_ERROR_REPORT_EN_B, 1); ++ if (hnae3_dev_phy_imp_supported(hdev)) ++ hnae3_set_bit(compat, HCLGE_PHY_IMP_EN_B, 1); ++ ++ req->compat = cpu_to_le32(compat); ++ } + + return hclge_cmd_send(&hdev->hw, &desc, 1); + } +@@ -543,7 +546,7 @@ int hclge_cmd_init(struct hclge_dev *hdev) + /* ask the firmware to enable some features, driver can work without + * it. + */ +- ret = hclge_firmware_compat_config(hdev); ++ ret = hclge_firmware_compat_config(hdev, true); + if (ret) + dev_warn(&hdev->pdev->dev, + "Firmware compatible features not enabled(%d).\n", +@@ -573,6 +576,8 @@ static void hclge_cmd_uninit_regs(struct hclge_hw *hw) + + void hclge_cmd_uninit(struct hclge_dev *hdev) + { ++ hclge_firmware_compat_config(hdev, false); ++ + set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state); + /* wait to ensure that the firmware completes the possible left + * over commands. +-- +2.33.0 + diff --git a/queue-5.14/net-hns3-do-not-allow-call-hns3_nic_net_open-repeate.patch b/queue-5.14/net-hns3-do-not-allow-call-hns3_nic_net_open-repeate.patch new file mode 100644 index 00000000000..6a4cd7d7d97 --- /dev/null +++ b/queue-5.14/net-hns3-do-not-allow-call-hns3_nic_net_open-repeate.patch @@ -0,0 +1,84 @@ +From 758579a862cc7d3c0ace145507d655322924e3a4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Sep 2021 17:35:49 +0800 +Subject: net: hns3: do not allow call hns3_nic_net_open repeatedly + +From: Jian Shen + +[ Upstream commit 5b09e88e1bf7fe86540fab4b5f3eece8abead39e ] + +hns3_nic_net_open() is not allowed to called repeatly, but there +is no checking for this. When doing device reset and setup tc +concurrently, there is a small oppotunity to call hns3_nic_net_open +repeatedly, and cause kernel bug by calling napi_enable twice. + +The calltrace information is like below: +[ 3078.222780] ------------[ cut here ]------------ +[ 3078.230255] kernel BUG at net/core/dev.c:6991! +[ 3078.236224] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP +[ 3078.243431] Modules linked in: hns3 hclgevf hclge hnae3 vfio_iommu_type1 vfio_pci vfio_virqfd vfio pv680_mii(O) +[ 3078.258880] CPU: 0 PID: 295 Comm: kworker/u8:5 Tainted: G O 5.14.0-rc4+ #1 +[ 3078.269102] Hardware name: , BIOS KpxxxFPGA 1P B600 V181 08/12/2021 +[ 3078.276801] Workqueue: hclge hclge_service_task [hclge] +[ 3078.288774] pstate: 60400009 (nZCv daif +PAN -UAO -TCO BTYPE=--) +[ 3078.296168] pc : napi_enable+0x80/0x84 +tc qdisc sho[w 3d0e7v8 .e3t0h218 79] lr : hns3_nic_net_open+0x138/0x510 [hns3] + +[ 3078.314771] sp : ffff8000108abb20 +[ 3078.319099] x29: ffff8000108abb20 x28: 0000000000000000 x27: ffff0820a8490300 +[ 3078.329121] x26: 0000000000000001 x25: ffff08209cfc6200 x24: 0000000000000000 +[ 3078.339044] x23: ffff0820a8490300 x22: ffff08209cd76000 x21: ffff0820abfe3880 +[ 3078.349018] x20: 0000000000000000 x19: ffff08209cd76900 x18: 0000000000000000 +[ 3078.358620] x17: 0000000000000000 x16: ffffc816e1727a50 x15: 0000ffff8f4ff930 +[ 3078.368895] x14: 0000000000000000 x13: 0000000000000000 x12: 0000259e9dbeb6b4 +[ 3078.377987] x11: 0096a8f7e764eb40 x10: 634615ad28d3eab5 x9 : ffffc816ad8885b8 +[ 3078.387091] x8 : ffff08209cfc6fb8 x7 : ffff0820ac0da058 x6 : ffff0820a8490344 +[ 3078.396356] x5 : 0000000000000140 x4 : 0000000000000003 x3 : ffff08209cd76938 +[ 3078.405365] x2 : 0000000000000000 x1 : 0000000000000010 x0 : ffff0820abfe38a0 +[ 3078.414657] Call trace: +[ 3078.418517] napi_enable+0x80/0x84 +[ 3078.424626] hns3_reset_notify_up_enet+0x78/0xd0 [hns3] +[ 3078.433469] hns3_reset_notify+0x64/0x80 [hns3] +[ 3078.441430] hclge_notify_client+0x68/0xb0 [hclge] +[ 3078.450511] hclge_reset_rebuild+0x524/0x884 [hclge] +[ 3078.458879] hclge_reset_service_task+0x3c4/0x680 [hclge] +[ 3078.467470] hclge_service_task+0xb0/0xb54 [hclge] +[ 3078.475675] process_one_work+0x1dc/0x48c +[ 3078.481888] worker_thread+0x15c/0x464 +[ 3078.487104] kthread+0x160/0x170 +[ 3078.492479] ret_from_fork+0x10/0x18 +[ 3078.498785] Code: c8027c81 35ffffa2 d50323bf d65f03c0 (d4210000) +[ 3078.506889] ---[ end trace 8ebe0340a1b0fb44 ]--- + +Once hns3_nic_net_open() is excute success, the flag +HNS3_NIC_STATE_DOWN will be cleared. So add checking for this +flag, directly return when HNS3_NIC_STATE_DOWN is no set. + +Fixes: e888402789b9 ("net: hns3: call hns3_nic_net_open() while doing HNAE3_UP_CLIENT") +Signed-off-by: Jian Shen +Signed-off-by: Guangbin Huang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +index 9faa3712ea5b..b24ad9bc8e1b 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +@@ -776,6 +776,11 @@ static int hns3_nic_net_open(struct net_device *netdev) + if (hns3_nic_resetting(netdev)) + return -EBUSY; + ++ if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) { ++ netdev_warn(netdev, "net open repeatedly!\n"); ++ return 0; ++ } ++ + netif_carrier_off(netdev); + + ret = hns3_nic_set_real_num_queue(netdev); +-- +2.33.0 + diff --git a/queue-5.14/net-hns3-don-t-rollback-when-destroy-mqprio-fail.patch b/queue-5.14/net-hns3-don-t-rollback-when-destroy-mqprio-fail.patch new file mode 100644 index 00000000000..c00e24ee95b --- /dev/null +++ b/queue-5.14/net-hns3-don-t-rollback-when-destroy-mqprio-fail.patch @@ -0,0 +1,58 @@ +From 7b3661856e86124a8dd89c9018e6b01fbfd9a6c7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Sep 2021 17:35:51 +0800 +Subject: net: hns3: don't rollback when destroy mqprio fail + +From: Jian Shen + +[ Upstream commit d82650be60ee92e7486f755f5387023278aa933f ] + +For destroy mqprio is irreversible in stack, so it's unnecessary +to rollback the tc configuration when destroy mqprio failed. +Otherwise, it may cause the configuration being inconsistent +between driver and netstack. + +As the failure is usually caused by reset, and the driver will +restore the configuration after reset, so it can keep the +configuration being consistent between driver and hardware. + +Fixes: 5a5c90917467 ("net: hns3: add support for tc mqprio offload") +Signed-off-by: Jian Shen +Signed-off-by: Guangbin Huang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +index 5fadfdbc4858..e4f87ffd41ac 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +@@ -493,12 +493,17 @@ static int hclge_setup_tc(struct hnae3_handle *h, + return hclge_notify_init_up(hdev); + + err_out: +- /* roll-back */ +- memcpy(&kinfo->tc_info, &old_tc_info, sizeof(old_tc_info)); +- if (hclge_config_tc(hdev, &kinfo->tc_info)) +- dev_err(&hdev->pdev->dev, +- "failed to roll back tc configuration\n"); +- ++ if (!tc) { ++ dev_warn(&hdev->pdev->dev, ++ "failed to destroy mqprio, will active after reset, ret = %d\n", ++ ret); ++ } else { ++ /* roll-back */ ++ memcpy(&kinfo->tc_info, &old_tc_info, sizeof(old_tc_info)); ++ if (hclge_config_tc(hdev, &kinfo->tc_info)) ++ dev_err(&hdev->pdev->dev, ++ "failed to roll back tc configuration\n"); ++ } + hclge_notify_init_up(hdev); + + return ret; +-- +2.33.0 + diff --git a/queue-5.14/net-hns3-fix-always-enable-rx-vlan-filter-problem-af.patch b/queue-5.14/net-hns3-fix-always-enable-rx-vlan-filter-problem-af.patch new file mode 100644 index 00000000000..c6173c7ab5a --- /dev/null +++ b/queue-5.14/net-hns3-fix-always-enable-rx-vlan-filter-problem-af.patch @@ -0,0 +1,52 @@ +From 108034cff84be5f156d3acc500a56922c6f71d3f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Sep 2021 17:35:55 +0800 +Subject: net: hns3: fix always enable rx vlan filter problem after selftest + +From: Guangbin Huang + +[ Upstream commit 27bf4af69fcb9845fb2f0076db5d562ec072e70f ] + +Currently, the rx vlan filter will always be disabled before selftest and +be enabled after selftest as the rx vlan filter feature is fixed on in +old device earlier than V3. + +However, this feature is not fixed in some new devices and it can be +disabled by user. In this case, it is wrong if rx vlan filter is enabled +after selftest. So fix it. + +Fixes: bcc26e8dc432 ("net: hns3: remove unused code in hns3_self_test()") +Signed-off-by: Guangbin Huang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +index 69b253424da8..83ee0f41322c 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +@@ -348,7 +348,8 @@ static void hns3_selftest_prepare(struct net_device *ndev, + + #if IS_ENABLED(CONFIG_VLAN_8021Q) + /* Disable the vlan filter for selftest does not support it */ +- if (h->ae_algo->ops->enable_vlan_filter) ++ if (h->ae_algo->ops->enable_vlan_filter && ++ ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + h->ae_algo->ops->enable_vlan_filter(h, false); + #endif + +@@ -373,7 +374,8 @@ static void hns3_selftest_restore(struct net_device *ndev, bool if_running) + h->ae_algo->ops->halt_autoneg(h, false); + + #if IS_ENABLED(CONFIG_VLAN_8021Q) +- if (h->ae_algo->ops->enable_vlan_filter) ++ if (h->ae_algo->ops->enable_vlan_filter && ++ ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + h->ae_algo->ops->enable_vlan_filter(h, true); + #endif + +-- +2.33.0 + diff --git a/queue-5.14/net-hns3-fix-mixed-flag-hclge_flag_mqprio_enable-and.patch b/queue-5.14/net-hns3-fix-mixed-flag-hclge_flag_mqprio_enable-and.patch new file mode 100644 index 00000000000..9bfa9419fb3 --- /dev/null +++ b/queue-5.14/net-hns3-fix-mixed-flag-hclge_flag_mqprio_enable-and.patch @@ -0,0 +1,135 @@ +From 2b8878e0fac247da87725936a88d628c3a0422df Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Sep 2021 17:35:52 +0800 +Subject: net: hns3: fix mixed flag HCLGE_FLAG_MQPRIO_ENABLE and + HCLGE_FLAG_DCB_ENABLE + +From: Jian Shen + +[ Upstream commit 0472e95ffeac8e61259eec17ab61608c6b35599d ] + +HCLGE_FLAG_MQPRIO_ENABLE is supposed to set when enable +multiple TCs with tc mqprio, and HCLGE_FLAG_DCB_ENABLE is +supposed to set when enable multiple TCs with ets. But +the driver mixed the flags when updating the tm configuration. + +Furtherly, PFC should be available when HCLGE_FLAG_MQPRIO_ENABLE +too, so remove the unnecessary limitation. + +Fixes: 5a5c90917467 ("net: hns3: add support for tc mqprio offload") +Signed-off-by: Jian Shen +Signed-off-by: Guangbin Huang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../hisilicon/hns3/hns3pf/hclge_dcb.c | 7 +++-- + .../ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 31 +++---------------- + 2 files changed, 10 insertions(+), 28 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +index e4f87ffd41ac..c90bfde2aecf 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +@@ -224,6 +224,10 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) + } + + hclge_tm_schd_info_update(hdev, num_tc); ++ if (num_tc > 1) ++ hdev->flag |= HCLGE_FLAG_DCB_ENABLE; ++ else ++ hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; + + ret = hclge_ieee_ets_to_tm_info(hdev, ets); + if (ret) +@@ -285,8 +289,7 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) + u8 i, j, pfc_map, *prio_tc; + int ret; + +- if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || +- hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) ++ if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + return -EINVAL; + + if (pfc->pfc_en == hdev->tm_info.pfc_en) +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +index 6f5035a788c0..f314dbd3ce11 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +@@ -727,14 +727,6 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev) + for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) + hdev->tm_info.prio_tc[i] = + (i >= hdev->tm_info.num_tc) ? 0 : i; +- +- /* DCB is enabled if we have more than 1 TC or pfc_en is +- * non-zero. +- */ +- if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en) +- hdev->flag |= HCLGE_FLAG_DCB_ENABLE; +- else +- hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; + } + + static void hclge_tm_pg_info_init(struct hclge_dev *hdev) +@@ -765,10 +757,10 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev) + + static void hclge_update_fc_mode_by_dcb_flag(struct hclge_dev *hdev) + { +- if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) { ++ if (hdev->tm_info.num_tc == 1 && !hdev->tm_info.pfc_en) { + if (hdev->fc_mode_last_time == HCLGE_FC_PFC) + dev_warn(&hdev->pdev->dev, +- "DCB is disable, but last mode is FC_PFC\n"); ++ "Only 1 tc used, but last mode is FC_PFC\n"); + + hdev->tm_info.fc_mode = hdev->fc_mode_last_time; + } else if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) { +@@ -794,7 +786,7 @@ static void hclge_update_fc_mode(struct hclge_dev *hdev) + } + } + +-static void hclge_pfc_info_init(struct hclge_dev *hdev) ++void hclge_tm_pfc_info_update(struct hclge_dev *hdev) + { + if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) + hclge_update_fc_mode(hdev); +@@ -810,7 +802,7 @@ static void hclge_tm_schd_info_init(struct hclge_dev *hdev) + + hclge_tm_vport_info_update(hdev); + +- hclge_pfc_info_init(hdev); ++ hclge_tm_pfc_info_update(hdev); + } + + static int hclge_tm_pg_to_pri_map(struct hclge_dev *hdev) +@@ -1556,19 +1548,6 @@ void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc) + hclge_tm_schd_info_init(hdev); + } + +-void hclge_tm_pfc_info_update(struct hclge_dev *hdev) +-{ +- /* DCB is enabled if we have more than 1 TC or pfc_en is +- * non-zero. +- */ +- if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en) +- hdev->flag |= HCLGE_FLAG_DCB_ENABLE; +- else +- hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; +- +- hclge_pfc_info_init(hdev); +-} +- + int hclge_tm_init_hw(struct hclge_dev *hdev, bool init) + { + int ret; +@@ -1614,7 +1593,7 @@ int hclge_tm_vport_map_update(struct hclge_dev *hdev) + if (ret) + return ret; + +- if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) ++ if (hdev->tm_info.num_tc == 1 && !hdev->tm_info.pfc_en) + return 0; + + return hclge_tm_bp_setup(hdev); +-- +2.33.0 + diff --git a/queue-5.14/net-hns3-fix-show-wrong-state-when-add-existing-uc-m.patch b/queue-5.14/net-hns3-fix-show-wrong-state-when-add-existing-uc-m.patch new file mode 100644 index 00000000000..d774aebdfad --- /dev/null +++ b/queue-5.14/net-hns3-fix-show-wrong-state-when-add-existing-uc-m.patch @@ -0,0 +1,67 @@ +From a419cc4fa65b3a6e929a70ca5c22e98f7bf35b2a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Sep 2021 17:35:53 +0800 +Subject: net: hns3: fix show wrong state when add existing uc mac address + +From: Jian Shen + +[ Upstream commit 108b3c7810e14892c4a1819b1d268a2c785c087c ] + +Currently, if function adds an existing unicast mac address, eventhough +driver will not add this address into hardware, but it will return 0 in +function hclge_add_uc_addr_common(). It will cause the state of this +unicast mac address is ACTIVE in driver, but it should be in TO-ADD state. + +To fix this problem, function hclge_add_uc_addr_common() returns -EEXIST +if mac address is existing, and delete two error log to avoid printing +them all the time after this modification. + +Fixes: 72110b567479 ("net: hns3: return 0 and print warning when hit duplicate MAC") +Signed-off-by: Jian Shen +Signed-off-by: Guangbin Huang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../hisilicon/hns3/hns3pf/hclge_main.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +index 90a72c79fec9..9920e76b4f41 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +@@ -8701,15 +8701,8 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport, + } + + /* check if we just hit the duplicate */ +- if (!ret) { +- dev_warn(&hdev->pdev->dev, "VF %u mac(%pM) exists\n", +- vport->vport_id, addr); +- return 0; +- } +- +- dev_err(&hdev->pdev->dev, +- "PF failed to add unicast entry(%pM) in the MAC table\n", +- addr); ++ if (!ret) ++ return -EEXIST; + + return ret; + } +@@ -8861,7 +8854,13 @@ static void hclge_sync_vport_mac_list(struct hclge_vport *vport, + } else { + set_bit(HCLGE_VPORT_STATE_MAC_TBL_CHANGE, + &vport->state); +- break; ++ ++ /* If one unicast mac address is existing in hardware, ++ * we need to try whether other unicast mac addresses ++ * are new addresses that can be added. ++ */ ++ if (ret != -EEXIST) ++ break; + } + } + } +-- +2.33.0 + diff --git a/queue-5.14/net-hns3-reconstruct-function-hns3_self_test.patch b/queue-5.14/net-hns3-reconstruct-function-hns3_self_test.patch new file mode 100644 index 00000000000..d3ccad4c54e --- /dev/null +++ b/queue-5.14/net-hns3-reconstruct-function-hns3_self_test.patch @@ -0,0 +1,161 @@ +From 9010fe28af9d201fb11c794b2677670b46e6125d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Aug 2021 14:06:37 +0800 +Subject: net: hns3: reconstruct function hns3_self_test + +From: Peng Li + +[ Upstream commit 4c8dab1c709c5a715bce14efdb8f4e889d86aa04 ] + +This patch reconstructs function hns3_self_test to reduce the code +cycle complexity and make code more concise. + +Signed-off-by: Peng Li +Signed-off-by: Guangbin Huang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../ethernet/hisilicon/hns3/hns3_ethtool.c | 101 +++++++++++------- + 1 file changed, 64 insertions(+), 37 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +index 82061ab6930f..69b253424da8 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +@@ -312,33 +312,8 @@ static int hns3_lp_run_test(struct net_device *ndev, enum hnae3_loop mode) + return ret_val; + } + +-/** +- * hns3_self_test - self test +- * @ndev: net device +- * @eth_test: test cmd +- * @data: test result +- */ +-static void hns3_self_test(struct net_device *ndev, +- struct ethtool_test *eth_test, u64 *data) ++static void hns3_set_selftest_param(struct hnae3_handle *h, int (*st_param)[2]) + { +- struct hns3_nic_priv *priv = netdev_priv(ndev); +- struct hnae3_handle *h = priv->ae_handle; +- int st_param[HNS3_SELF_TEST_TYPE_NUM][2]; +- bool if_running = netif_running(ndev); +- int test_index = 0; +- u32 i; +- +- if (hns3_nic_resetting(ndev)) { +- netdev_err(ndev, "dev resetting!"); +- return; +- } +- +- /* Only do offline selftest, or pass by default */ +- if (eth_test->flags != ETH_TEST_FL_OFFLINE) +- return; +- +- netif_dbg(h, drv, ndev, "self test start"); +- + st_param[HNAE3_LOOP_APP][0] = HNAE3_LOOP_APP; + st_param[HNAE3_LOOP_APP][1] = + h->flags & HNAE3_SUPPORT_APP_LOOPBACK; +@@ -355,6 +330,18 @@ static void hns3_self_test(struct net_device *ndev, + st_param[HNAE3_LOOP_PHY][0] = HNAE3_LOOP_PHY; + st_param[HNAE3_LOOP_PHY][1] = + h->flags & HNAE3_SUPPORT_PHY_LOOPBACK; ++} ++ ++static void hns3_selftest_prepare(struct net_device *ndev, ++ bool if_running, int (*st_param)[2]) ++{ ++ struct hns3_nic_priv *priv = netdev_priv(ndev); ++ struct hnae3_handle *h = priv->ae_handle; ++ ++ if (netif_msg_ifdown(h)) ++ netdev_info(ndev, "self test start\n"); ++ ++ hns3_set_selftest_param(h, st_param); + + if (if_running) + ndev->netdev_ops->ndo_stop(ndev); +@@ -373,6 +360,35 @@ static void hns3_self_test(struct net_device *ndev, + h->ae_algo->ops->halt_autoneg(h, true); + + set_bit(HNS3_NIC_STATE_TESTING, &priv->state); ++} ++ ++static void hns3_selftest_restore(struct net_device *ndev, bool if_running) ++{ ++ struct hns3_nic_priv *priv = netdev_priv(ndev); ++ struct hnae3_handle *h = priv->ae_handle; ++ ++ clear_bit(HNS3_NIC_STATE_TESTING, &priv->state); ++ ++ if (h->ae_algo->ops->halt_autoneg) ++ h->ae_algo->ops->halt_autoneg(h, false); ++ ++#if IS_ENABLED(CONFIG_VLAN_8021Q) ++ if (h->ae_algo->ops->enable_vlan_filter) ++ h->ae_algo->ops->enable_vlan_filter(h, true); ++#endif ++ ++ if (if_running) ++ ndev->netdev_ops->ndo_open(ndev); ++ ++ if (netif_msg_ifdown(h)) ++ netdev_info(ndev, "self test end\n"); ++} ++ ++static void hns3_do_selftest(struct net_device *ndev, int (*st_param)[2], ++ struct ethtool_test *eth_test, u64 *data) ++{ ++ int test_index = 0; ++ u32 i; + + for (i = 0; i < HNS3_SELF_TEST_TYPE_NUM; i++) { + enum hnae3_loop loop_type = (enum hnae3_loop)st_param[i][0]; +@@ -391,21 +407,32 @@ static void hns3_self_test(struct net_device *ndev, + + test_index++; + } ++} + +- clear_bit(HNS3_NIC_STATE_TESTING, &priv->state); +- +- if (h->ae_algo->ops->halt_autoneg) +- h->ae_algo->ops->halt_autoneg(h, false); ++/** ++ * hns3_nic_self_test - self test ++ * @ndev: net device ++ * @eth_test: test cmd ++ * @data: test result ++ */ ++static void hns3_self_test(struct net_device *ndev, ++ struct ethtool_test *eth_test, u64 *data) ++{ ++ int st_param[HNS3_SELF_TEST_TYPE_NUM][2]; ++ bool if_running = netif_running(ndev); + +-#if IS_ENABLED(CONFIG_VLAN_8021Q) +- if (h->ae_algo->ops->enable_vlan_filter) +- h->ae_algo->ops->enable_vlan_filter(h, true); +-#endif ++ if (hns3_nic_resetting(ndev)) { ++ netdev_err(ndev, "dev resetting!"); ++ return; ++ } + +- if (if_running) +- ndev->netdev_ops->ndo_open(ndev); ++ /* Only do offline selftest, or pass by default */ ++ if (eth_test->flags != ETH_TEST_FL_OFFLINE) ++ return; + +- netif_dbg(h, drv, ndev, "self test end\n"); ++ hns3_selftest_prepare(ndev, if_running, st_param); ++ hns3_do_selftest(ndev, st_param, eth_test, data); ++ hns3_selftest_restore(ndev, if_running); + } + + static void hns3_update_limit_promisc_mode(struct net_device *netdev, +-- +2.33.0 + diff --git a/queue-5.14/net-hns3-remove-tc-enable-checking.patch b/queue-5.14/net-hns3-remove-tc-enable-checking.patch new file mode 100644 index 00000000000..5f88afd0787 --- /dev/null +++ b/queue-5.14/net-hns3-remove-tc-enable-checking.patch @@ -0,0 +1,120 @@ +From 4acd72a1d2a9deac4af95786b48d603c96fead3e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Sep 2021 17:35:50 +0800 +Subject: net: hns3: remove tc enable checking + +From: Jian Shen + +[ Upstream commit a8e76fefe3de9b8e609cf192af75e7878d21fa3a ] + +Currently, in function hns3_nic_set_real_num_queue(), the +driver doesn't report the queue count and offset for disabled +tc. If user enables multiple TCs, but only maps user +priorities to partial of them, it may cause the queue range +of the unmapped TC being displayed abnormally. + +Fix it by removing the tc enable checking, ensure the queue +count is not zero. + +With this change, the tc_en is useless now, so remove it. + +Fixes: a75a8efa00c5 ("net: hns3: Fix tc setup when netdev is first up") +Signed-off-by: Jian Shen +Signed-off-by: Guangbin Huang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 - + drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 11 ++--------- + .../net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c | 5 ----- + drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c | 2 -- + 4 files changed, 2 insertions(+), 17 deletions(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h +index e0b7c3c44e7b..32987bd134a1 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h ++++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h +@@ -750,7 +750,6 @@ struct hnae3_tc_info { + u8 prio_tc[HNAE3_MAX_USER_PRIO]; /* TC indexed by prio */ + u16 tqp_count[HNAE3_MAX_TC]; + u16 tqp_offset[HNAE3_MAX_TC]; +- unsigned long tc_en; /* bitmap of TC enabled */ + u8 num_tc; /* Total number of enabled TCs */ + bool mqprio_active; + }; +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +index b24ad9bc8e1b..114692c4f797 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +@@ -620,13 +620,9 @@ static int hns3_nic_set_real_num_queue(struct net_device *netdev) + return ret; + } + +- for (i = 0; i < HNAE3_MAX_TC; i++) { +- if (!test_bit(i, &tc_info->tc_en)) +- continue; +- ++ for (i = 0; i < tc_info->num_tc; i++) + netdev_set_tc_queue(netdev, i, tc_info->tqp_count[i], + tc_info->tqp_offset[i]); +- } + } + + ret = netif_set_real_num_tx_queues(netdev, queue_size); +@@ -4830,12 +4826,9 @@ static void hns3_init_tx_ring_tc(struct hns3_nic_priv *priv) + struct hnae3_tc_info *tc_info = &kinfo->tc_info; + int i; + +- for (i = 0; i < HNAE3_MAX_TC; i++) { ++ for (i = 0; i < tc_info->num_tc; i++) { + int j; + +- if (!test_bit(i, &tc_info->tc_en)) +- continue; +- + for (j = 0; j < tc_info->tqp_count[i]; j++) { + struct hnae3_queue *q; + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +index 39f56f245d84..5fadfdbc4858 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +@@ -420,8 +420,6 @@ static int hclge_mqprio_qopt_check(struct hclge_dev *hdev, + static void hclge_sync_mqprio_qopt(struct hnae3_tc_info *tc_info, + struct tc_mqprio_qopt_offload *mqprio_qopt) + { +- int i; +- + memset(tc_info, 0, sizeof(*tc_info)); + tc_info->num_tc = mqprio_qopt->qopt.num_tc; + memcpy(tc_info->prio_tc, mqprio_qopt->qopt.prio_tc_map, +@@ -430,9 +428,6 @@ static void hclge_sync_mqprio_qopt(struct hnae3_tc_info *tc_info, + sizeof_field(struct hnae3_tc_info, tqp_count)); + memcpy(tc_info->tqp_offset, mqprio_qopt->qopt.offset, + sizeof_field(struct hnae3_tc_info, tqp_offset)); +- +- for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) +- set_bit(tc_info->prio_tc[i], &tc_info->tc_en); + } + + static int hclge_config_tc(struct hclge_dev *hdev, +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +index 44618cc4cca1..6f5035a788c0 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +@@ -687,12 +687,10 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport) + + for (i = 0; i < HNAE3_MAX_TC; i++) { + if (hdev->hw_tc_map & BIT(i) && i < kinfo->tc_info.num_tc) { +- set_bit(i, &kinfo->tc_info.tc_en); + kinfo->tc_info.tqp_offset[i] = i * kinfo->rss_size; + kinfo->tc_info.tqp_count[i] = kinfo->rss_size; + } else { + /* Set to default queue if TC is disable */ +- clear_bit(i, &kinfo->tc_info.tc_en); + kinfo->tc_info.tqp_offset[i] = 0; + kinfo->tc_info.tqp_count[i] = 1; + } +-- +2.33.0 + diff --git a/queue-5.14/net-introduce-and-use-lock_sock_fast_nested.patch b/queue-5.14/net-introduce-and-use-lock_sock_fast_nested.patch new file mode 100644 index 00000000000..863c7b550e4 --- /dev/null +++ b/queue-5.14/net-introduce-and-use-lock_sock_fast_nested.patch @@ -0,0 +1,216 @@ +From 3c4475dc947aeadcb12bf779f4ed0cc55100aac8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Sep 2021 11:59:17 +0200 +Subject: net: introduce and use lock_sock_fast_nested() + +From: Paolo Abeni + +[ Upstream commit 49054556289e8787501630b7c7a9d407da02e296 ] + +Syzkaller reported a false positive deadlock involving +the nl socket lock and the subflow socket lock: + +MPTCP: kernel_bind error, err=-98 +============================================ +WARNING: possible recursive locking detected +5.15.0-rc1-syzkaller #0 Not tainted +-------------------------------------------- +syz-executor998/6520 is trying to acquire lock: +ffff8880795718a0 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_close+0x267/0x7b0 net/mptcp/protocol.c:2738 + +but task is already holding lock: +ffff8880787c8c60 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1612 [inline] +ffff8880787c8c60 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_close+0x23/0x7b0 net/mptcp/protocol.c:2720 + +other info that might help us debug this: + Possible unsafe locking scenario: + + CPU0 + ---- + lock(k-sk_lock-AF_INET); + lock(k-sk_lock-AF_INET); + + *** DEADLOCK *** + + May be due to missing lock nesting notation + +3 locks held by syz-executor998/6520: + #0: ffffffff8d176c50 (cb_lock){++++}-{3:3}, at: genl_rcv+0x15/0x40 net/netlink/genetlink.c:802 + #1: ffffffff8d176d08 (genl_mutex){+.+.}-{3:3}, at: genl_lock net/netlink/genetlink.c:33 [inline] + #1: ffffffff8d176d08 (genl_mutex){+.+.}-{3:3}, at: genl_rcv_msg+0x3e0/0x580 net/netlink/genetlink.c:790 + #2: ffff8880787c8c60 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1612 [inline] + #2: ffff8880787c8c60 (k-sk_lock-AF_INET){+.+.}-{0:0}, at: mptcp_close+0x23/0x7b0 net/mptcp/protocol.c:2720 + +stack backtrace: +CPU: 1 PID: 6520 Comm: syz-executor998 Not tainted 5.15.0-rc1-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 + print_deadlock_bug kernel/locking/lockdep.c:2944 [inline] + check_deadlock kernel/locking/lockdep.c:2987 [inline] + validate_chain kernel/locking/lockdep.c:3776 [inline] + __lock_acquire.cold+0x149/0x3ab kernel/locking/lockdep.c:5015 + lock_acquire kernel/locking/lockdep.c:5625 [inline] + lock_acquire+0x1ab/0x510 kernel/locking/lockdep.c:5590 + lock_sock_fast+0x36/0x100 net/core/sock.c:3229 + mptcp_close+0x267/0x7b0 net/mptcp/protocol.c:2738 + inet_release+0x12e/0x280 net/ipv4/af_inet.c:431 + __sock_release net/socket.c:649 [inline] + sock_release+0x87/0x1b0 net/socket.c:677 + mptcp_pm_nl_create_listen_socket+0x238/0x2c0 net/mptcp/pm_netlink.c:900 + mptcp_nl_cmd_add_addr+0x359/0x930 net/mptcp/pm_netlink.c:1170 + genl_family_rcv_msg_doit+0x228/0x320 net/netlink/genetlink.c:731 + genl_family_rcv_msg net/netlink/genetlink.c:775 [inline] + genl_rcv_msg+0x328/0x580 net/netlink/genetlink.c:792 + netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504 + genl_rcv+0x24/0x40 net/netlink/genetlink.c:803 + netlink_unicast_kernel net/netlink/af_netlink.c:1314 [inline] + netlink_unicast+0x533/0x7d0 net/netlink/af_netlink.c:1340 + netlink_sendmsg+0x86d/0xdb0 net/netlink/af_netlink.c:1929 + sock_sendmsg_nosec net/socket.c:704 [inline] + sock_sendmsg+0xcf/0x120 net/socket.c:724 + sock_no_sendpage+0x101/0x150 net/core/sock.c:2980 + kernel_sendpage.part.0+0x1a0/0x340 net/socket.c:3504 + kernel_sendpage net/socket.c:3501 [inline] + sock_sendpage+0xe5/0x140 net/socket.c:1003 + pipe_to_sendpage+0x2ad/0x380 fs/splice.c:364 + splice_from_pipe_feed fs/splice.c:418 [inline] + __splice_from_pipe+0x43e/0x8a0 fs/splice.c:562 + splice_from_pipe fs/splice.c:597 [inline] + generic_splice_sendpage+0xd4/0x140 fs/splice.c:746 + do_splice_from fs/splice.c:767 [inline] + direct_splice_actor+0x110/0x180 fs/splice.c:936 + splice_direct_to_actor+0x34b/0x8c0 fs/splice.c:891 + do_splice_direct+0x1b3/0x280 fs/splice.c:979 + do_sendfile+0xae9/0x1240 fs/read_write.c:1249 + __do_sys_sendfile64 fs/read_write.c:1314 [inline] + __se_sys_sendfile64 fs/read_write.c:1300 [inline] + __x64_sys_sendfile64+0x1cc/0x210 fs/read_write.c:1300 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x44/0xae +RIP: 0033:0x7f215cb69969 +Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 e1 14 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007ffc96bb3868 EFLAGS: 00000246 ORIG_RAX: 0000000000000028 +RAX: ffffffffffffffda RBX: 00007f215cbad072 RCX: 00007f215cb69969 +RDX: 0000000000000000 RSI: 0000000000000004 RDI: 0000000000000005 +RBP: 0000000000000000 R08: 00007ffc96bb3a08 R09: 00007ffc96bb3a08 +R10: 0000000100000002 R11: 0000000000000246 R12: 00007ffc96bb387c +R13: 431bde82d7b634db R14: 0000000000000000 R15: 0000000000000000 + +the problem originates from uncorrect lock annotation in the mptcp +code and is only visible since commit 2dcb96bacce3 ("net: core: Correct +the sock::sk_lock.owned lockdep annotations"), but is present since +the port-based endpoint support initial implementation. + +This patch addresses the issue introducing a nested variant of +lock_sock_fast() and using it in the relevant code path. + +Fixes: 1729cf186d8a ("mptcp: create the listening socket for new port") +Fixes: 2dcb96bacce3 ("net: core: Correct the sock::sk_lock.owned lockdep annotations") +Suggested-by: Thomas Gleixner +Reported-and-tested-by: syzbot+1dd53f7a89b299d59eaf@syzkaller.appspotmail.com +Signed-off-by: Paolo Abeni +Reviewed-by: Thomas Gleixner +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + include/net/sock.h | 31 ++++++++++++++++++++++++++++++- + net/core/sock.c | 17 ++--------------- + net/mptcp/protocol.c | 2 +- + 3 files changed, 33 insertions(+), 17 deletions(-) + +diff --git a/include/net/sock.h b/include/net/sock.h +index f23cb259b0e2..980b471b569d 100644 +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1624,7 +1624,36 @@ void release_sock(struct sock *sk); + SINGLE_DEPTH_NESTING) + #define bh_unlock_sock(__sk) spin_unlock(&((__sk)->sk_lock.slock)) + +-bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock); ++bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock); ++ ++/** ++ * lock_sock_fast - fast version of lock_sock ++ * @sk: socket ++ * ++ * This version should be used for very small section, where process wont block ++ * return false if fast path is taken: ++ * ++ * sk_lock.slock locked, owned = 0, BH disabled ++ * ++ * return true if slow path is taken: ++ * ++ * sk_lock.slock unlocked, owned = 1, BH enabled ++ */ ++static inline bool lock_sock_fast(struct sock *sk) ++{ ++ /* The sk_lock has mutex_lock() semantics here. */ ++ mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); ++ ++ return __lock_sock_fast(sk); ++} ++ ++/* fast socket lock variant for caller already holding a [different] socket lock */ ++static inline bool lock_sock_fast_nested(struct sock *sk) ++{ ++ mutex_acquire(&sk->sk_lock.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_); ++ ++ return __lock_sock_fast(sk); ++} + + /** + * unlock_sock_fast - complement of lock_sock_fast +diff --git a/net/core/sock.c b/net/core/sock.c +index a3eea6e0b30a..1cf0edc79f37 100644 +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -3191,20 +3191,7 @@ void release_sock(struct sock *sk) + } + EXPORT_SYMBOL(release_sock); + +-/** +- * lock_sock_fast - fast version of lock_sock +- * @sk: socket +- * +- * This version should be used for very small section, where process wont block +- * return false if fast path is taken: +- * +- * sk_lock.slock locked, owned = 0, BH disabled +- * +- * return true if slow path is taken: +- * +- * sk_lock.slock unlocked, owned = 1, BH enabled +- */ +-bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) ++bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) + { + might_sleep(); + spin_lock_bh(&sk->sk_lock.slock); +@@ -3226,7 +3213,7 @@ bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) + local_bh_enable(); + return true; + } +-EXPORT_SYMBOL(lock_sock_fast); ++EXPORT_SYMBOL(__lock_sock_fast); + + int sock_gettstamp(struct socket *sock, void __user *userstamp, + bool timeval, bool time32) +diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c +index 4d2abdd3cd3b..7d4d40360f77 100644 +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -2622,7 +2622,7 @@ static void mptcp_close(struct sock *sk, long timeout) + inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; + mptcp_for_each_subflow(mptcp_sk(sk), subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); +- bool slow = lock_sock_fast(ssk); ++ bool slow = lock_sock_fast_nested(ssk); + + sock_orphan(ssk); + unlock_sock_fast(ssk, slow); +-- +2.33.0 + diff --git a/queue-5.14/net-ipv4-fix-rtnexthop-len-when-rta_flow-is-present.patch b/queue-5.14/net-ipv4-fix-rtnexthop-len-when-rta_flow-is-present.patch new file mode 100644 index 00000000000..2c41d9a5cde --- /dev/null +++ b/queue-5.14/net-ipv4-fix-rtnexthop-len-when-rta_flow-is-present.patch @@ -0,0 +1,116 @@ +From d1e58465899185bdcaed5e143f2d10a77bb2a972 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Sep 2021 23:03:19 +0800 +Subject: net: ipv4: Fix rtnexthop len when RTA_FLOW is present + +From: Xiao Liang + +[ Upstream commit 597aa16c782496bf74c5dc3b45ff472ade6cee64 ] + +Multipath RTA_FLOW is embedded in nexthop. Dump it in fib_add_nexthop() +to get the length of rtnexthop correct. + +Fixes: b0f60193632e ("ipv4: Refactor nexthop attributes in fib_dump_info") +Signed-off-by: Xiao Liang +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + include/net/ip_fib.h | 2 +- + include/net/nexthop.h | 2 +- + net/ipv4/fib_semantics.c | 16 +++++++++------- + net/ipv6/route.c | 5 +++-- + 4 files changed, 14 insertions(+), 11 deletions(-) + +diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h +index 3ab2563b1a23..7fd7f6093612 100644 +--- a/include/net/ip_fib.h ++++ b/include/net/ip_fib.h +@@ -597,5 +597,5 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, + int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, + u8 rt_family, unsigned char *flags, bool skip_oif); + int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, +- int nh_weight, u8 rt_family); ++ int nh_weight, u8 rt_family, u32 nh_tclassid); + #endif /* _NET_FIB_H */ +diff --git a/include/net/nexthop.h b/include/net/nexthop.h +index 10e1777877e6..28085b995ddc 100644 +--- a/include/net/nexthop.h ++++ b/include/net/nexthop.h +@@ -325,7 +325,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, + struct fib_nh_common *nhc = &nhi->fib_nhc; + int weight = nhg->nh_entries[i].weight; + +- if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0) ++ if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) + return -EMSGSIZE; + } + +diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c +index 4c0c33e4710d..27fdd86b9cee 100644 +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -1663,7 +1663,7 @@ EXPORT_SYMBOL_GPL(fib_nexthop_info); + + #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) + int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, +- int nh_weight, u8 rt_family) ++ int nh_weight, u8 rt_family, u32 nh_tclassid) + { + const struct net_device *dev = nhc->nhc_dev; + struct rtnexthop *rtnh; +@@ -1681,6 +1681,9 @@ int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, + + rtnh->rtnh_flags = flags; + ++ if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid)) ++ goto nla_put_failure; ++ + /* length of rtnetlink header + attributes */ + rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; + +@@ -1708,14 +1711,13 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) + } + + for_nexthops(fi) { +- if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, +- AF_INET) < 0) +- goto nla_put_failure; ++ u32 nh_tclassid = 0; + #ifdef CONFIG_IP_ROUTE_CLASSID +- if (nh->nh_tclassid && +- nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) +- goto nla_put_failure; ++ nh_tclassid = nh->nh_tclassid; + #endif ++ if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, ++ AF_INET, nh_tclassid) < 0) ++ goto nla_put_failure; + } endfor_nexthops(fi); + + mp_end: +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index 603340302101..0aeff2ce17b9 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -5700,14 +5700,15 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, + goto nla_put_failure; + + if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common, +- rt->fib6_nh->fib_nh_weight, AF_INET6) < 0) ++ rt->fib6_nh->fib_nh_weight, AF_INET6, ++ 0) < 0) + goto nla_put_failure; + + list_for_each_entry_safe(sibling, next_sibling, + &rt->fib6_siblings, fib6_siblings) { + if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, + sibling->fib6_nh->fib_nh_weight, +- AF_INET6) < 0) ++ AF_INET6, 0) < 0) + goto nla_put_failure; + } + +-- +2.33.0 + diff --git a/queue-5.14/net-ks8851-fix-link-error.patch b/queue-5.14/net-ks8851-fix-link-error.patch new file mode 100644 index 00000000000..b79068cbbef --- /dev/null +++ b/queue-5.14/net-ks8851-fix-link-error.patch @@ -0,0 +1,86 @@ +From c9a99cf580f51c32da3be7c58f5f630eb42e045c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Sep 2021 16:13:02 +0200 +Subject: net: ks8851: fix link error + +From: Arnd Bergmann + +[ Upstream commit 51bb08dd04a05035a64504faa47651d36b0f3125 ] + +An object file cannot be built for both loadable module and built-in +use at the same time: + +arm-linux-gnueabi-ld: drivers/net/ethernet/micrel/ks8851_common.o: in function `ks8851_probe_common': +ks8851_common.c:(.text+0xf80): undefined reference to `__this_module' + +Change the ks8851_common code to be a standalone module instead, +and use Makefile logic to ensure this is built-in if at least one +of its two users is. + +Fixes: 797047f875b5 ("net: ks8851: Implement Parallel bus operations") +Link: https://lore.kernel.org/netdev/20210125121937.3900988-1-arnd@kernel.org/ +Reviewed-by: Andrew Lunn +Acked-by: Marek Vasut +Signed-off-by: Arnd Bergmann +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/micrel/Makefile | 6 ++---- + drivers/net/ethernet/micrel/ks8851_common.c | 8 ++++++++ + 2 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/micrel/Makefile b/drivers/net/ethernet/micrel/Makefile +index 5cc00d22c708..6ecc4eb30e74 100644 +--- a/drivers/net/ethernet/micrel/Makefile ++++ b/drivers/net/ethernet/micrel/Makefile +@@ -4,8 +4,6 @@ + # + + obj-$(CONFIG_KS8842) += ks8842.o +-obj-$(CONFIG_KS8851) += ks8851.o +-ks8851-objs = ks8851_common.o ks8851_spi.o +-obj-$(CONFIG_KS8851_MLL) += ks8851_mll.o +-ks8851_mll-objs = ks8851_common.o ks8851_par.o ++obj-$(CONFIG_KS8851) += ks8851_common.o ks8851_spi.o ++obj-$(CONFIG_KS8851_MLL) += ks8851_common.o ks8851_par.o + obj-$(CONFIG_KSZ884X_PCI) += ksz884x.o +diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c +index 831518466de2..0f9c5457b93e 100644 +--- a/drivers/net/ethernet/micrel/ks8851_common.c ++++ b/drivers/net/ethernet/micrel/ks8851_common.c +@@ -1057,6 +1057,7 @@ int ks8851_suspend(struct device *dev) + + return 0; + } ++EXPORT_SYMBOL_GPL(ks8851_suspend); + + int ks8851_resume(struct device *dev) + { +@@ -1070,6 +1071,7 @@ int ks8851_resume(struct device *dev) + + return 0; + } ++EXPORT_SYMBOL_GPL(ks8851_resume); + #endif + + static int ks8851_register_mdiobus(struct ks8851_net *ks, struct device *dev) +@@ -1243,6 +1245,7 @@ int ks8851_probe_common(struct net_device *netdev, struct device *dev, + err_reg_io: + return ret; + } ++EXPORT_SYMBOL_GPL(ks8851_probe_common); + + int ks8851_remove_common(struct device *dev) + { +@@ -1261,3 +1264,8 @@ int ks8851_remove_common(struct device *dev) + + return 0; + } ++EXPORT_SYMBOL_GPL(ks8851_remove_common); ++ ++MODULE_DESCRIPTION("KS8851 Network driver"); ++MODULE_AUTHOR("Ben Dooks "); ++MODULE_LICENSE("GPL"); +-- +2.33.0 + diff --git a/queue-5.14/net-mdiobus-set-fwnode_flag_needs_child_bound_on_add.patch b/queue-5.14/net-mdiobus-set-fwnode_flag_needs_child_bound_on_add.patch new file mode 100644 index 00000000000..02229f60c82 --- /dev/null +++ b/queue-5.14/net-mdiobus-set-fwnode_flag_needs_child_bound_on_add.patch @@ -0,0 +1,53 @@ +From 44a2a3669d5c4c69b0bbe8d029f1b3148f708ed2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Sep 2021 10:09:39 -0700 +Subject: net: mdiobus: Set FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD for mdiobus + parents + +From: Saravana Kannan + +[ Upstream commit 04f41c68f18886aea5afc68be945e7195ea1d598 ] + +There are many instances of PHYs that depend on a switch to supply a +resource (Eg: interrupts). Switches also expects the PHYs to be probed +by their specific drivers as soon as they are added. If that doesn't +happen, then the switch would force the use of generic PHY drivers for +the PHY even if the PHY might have specific driver available. + +fw_devlink=on by design can cause delayed probes of PHY. To avoid, this +we need to set the FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD for the switch's +fwnode before the PHYs are added. The most generic way to do this is to +set this flag for the parent of MDIO busses which is typically the +switch. + +For more context: +https://lore.kernel.org/lkml/YTll0i6Rz3WAAYzs@lunn.ch/#t + +Fixes: ea718c699055 ("Revert "Revert "driver core: Set fw_devlink=on by default""") +Suggested-by: Andrew Lunn +Signed-off-by: Saravana Kannan +Link: https://lore.kernel.org/r/20210915170940.617415-4-saravanak@google.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/net/phy/mdio_bus.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c +index 53f034fc2ef7..ee8313a4ac71 100644 +--- a/drivers/net/phy/mdio_bus.c ++++ b/drivers/net/phy/mdio_bus.c +@@ -525,6 +525,10 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) + NULL == bus->read || NULL == bus->write) + return -EINVAL; + ++ if (bus->parent && bus->parent->of_node) ++ bus->parent->of_node->fwnode.flags |= ++ FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD; ++ + BUG_ON(bus->state != MDIOBUS_ALLOCATED && + bus->state != MDIOBUS_UNREGISTERED); + +-- +2.33.0 + diff --git a/queue-5.14/net-phy-bcm7xxx-fixed-indirect-mmd-operations.patch b/queue-5.14/net-phy-bcm7xxx-fixed-indirect-mmd-operations.patch new file mode 100644 index 00000000000..8e25b46dfbf --- /dev/null +++ b/queue-5.14/net-phy-bcm7xxx-fixed-indirect-mmd-operations.patch @@ -0,0 +1,193 @@ +From 72c959d089799ade6cf0a69d491e9cbd94e87a9d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Sep 2021 13:32:33 -0700 +Subject: net: phy: bcm7xxx: Fixed indirect MMD operations + +From: Florian Fainelli + +[ Upstream commit d88fd1b546ff19c8040cfaea76bf16aed1c5a0bb ] + +When EEE support was added to the 28nm EPHY it was assumed that it would +be able to support the standard clause 45 over clause 22 register access +method. It turns out that the PHY does not support that, which is the +very reason for using the indirect shadow mode 2 bank 3 access method. + +Implement {read,write}_mmd to allow the standard PHY library routines +pertaining to EEE querying and configuration to work correctly on these +PHYs. This forces us to implement a __phy_set_clr_bits() function that +does not grab the MDIO bus lock since the PHY driver's {read,write}_mmd +functions are always called with that lock held. + +Fixes: 83ee102a6998 ("net: phy: bcm7xxx: add support for 28nm EPHY") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/phy/bcm7xxx.c | 114 ++++++++++++++++++++++++++++++++++++-- + 1 file changed, 110 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c +index e79297a4bae8..27b6a3f507ae 100644 +--- a/drivers/net/phy/bcm7xxx.c ++++ b/drivers/net/phy/bcm7xxx.c +@@ -27,7 +27,12 @@ + #define MII_BCM7XXX_SHD_2_ADDR_CTRL 0xe + #define MII_BCM7XXX_SHD_2_CTRL_STAT 0xf + #define MII_BCM7XXX_SHD_2_BIAS_TRIM 0x1a ++#define MII_BCM7XXX_SHD_3_PCS_CTRL 0x0 ++#define MII_BCM7XXX_SHD_3_PCS_STATUS 0x1 ++#define MII_BCM7XXX_SHD_3_EEE_CAP 0x2 + #define MII_BCM7XXX_SHD_3_AN_EEE_ADV 0x3 ++#define MII_BCM7XXX_SHD_3_EEE_LP 0x4 ++#define MII_BCM7XXX_SHD_3_EEE_WK_ERR 0x5 + #define MII_BCM7XXX_SHD_3_PCS_CTRL_2 0x6 + #define MII_BCM7XXX_PCS_CTRL_2_DEF 0x4400 + #define MII_BCM7XXX_SHD_3_AN_STAT 0xb +@@ -216,25 +221,37 @@ static int bcm7xxx_28nm_resume(struct phy_device *phydev) + return genphy_config_aneg(phydev); + } + +-static int phy_set_clr_bits(struct phy_device *dev, int location, +- int set_mask, int clr_mask) ++static int __phy_set_clr_bits(struct phy_device *dev, int location, ++ int set_mask, int clr_mask) + { + int v, ret; + +- v = phy_read(dev, location); ++ v = __phy_read(dev, location); + if (v < 0) + return v; + + v &= ~clr_mask; + v |= set_mask; + +- ret = phy_write(dev, location, v); ++ ret = __phy_write(dev, location, v); + if (ret < 0) + return ret; + + return v; + } + ++static int phy_set_clr_bits(struct phy_device *dev, int location, ++ int set_mask, int clr_mask) ++{ ++ int ret; ++ ++ mutex_lock(&dev->mdio.bus->mdio_lock); ++ ret = __phy_set_clr_bits(dev, location, set_mask, clr_mask); ++ mutex_unlock(&dev->mdio.bus->mdio_lock); ++ ++ return ret; ++} ++ + static int bcm7xxx_28nm_ephy_01_afe_config_init(struct phy_device *phydev) + { + int ret; +@@ -398,6 +415,93 @@ static int bcm7xxx_28nm_ephy_config_init(struct phy_device *phydev) + return bcm7xxx_28nm_ephy_apd_enable(phydev); + } + ++#define MII_BCM7XXX_REG_INVALID 0xff ++ ++static u8 bcm7xxx_28nm_ephy_regnum_to_shd(u16 regnum) ++{ ++ switch (regnum) { ++ case MDIO_CTRL1: ++ return MII_BCM7XXX_SHD_3_PCS_CTRL; ++ case MDIO_STAT1: ++ return MII_BCM7XXX_SHD_3_PCS_STATUS; ++ case MDIO_PCS_EEE_ABLE: ++ return MII_BCM7XXX_SHD_3_EEE_CAP; ++ case MDIO_AN_EEE_ADV: ++ return MII_BCM7XXX_SHD_3_AN_EEE_ADV; ++ case MDIO_AN_EEE_LPABLE: ++ return MII_BCM7XXX_SHD_3_EEE_LP; ++ case MDIO_PCS_EEE_WK_ERR: ++ return MII_BCM7XXX_SHD_3_EEE_WK_ERR; ++ default: ++ return MII_BCM7XXX_REG_INVALID; ++ } ++} ++ ++static bool bcm7xxx_28nm_ephy_dev_valid(int devnum) ++{ ++ return devnum == MDIO_MMD_AN || devnum == MDIO_MMD_PCS; ++} ++ ++static int bcm7xxx_28nm_ephy_read_mmd(struct phy_device *phydev, ++ int devnum, u16 regnum) ++{ ++ u8 shd = bcm7xxx_28nm_ephy_regnum_to_shd(regnum); ++ int ret; ++ ++ if (!bcm7xxx_28nm_ephy_dev_valid(devnum) || ++ shd == MII_BCM7XXX_REG_INVALID) ++ return -EOPNOTSUPP; ++ ++ /* set shadow mode 2 */ ++ ret = __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, ++ MII_BCM7XXX_SHD_MODE_2, 0); ++ if (ret < 0) ++ return ret; ++ ++ /* Access the desired shadow register address */ ++ ret = __phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL, shd); ++ if (ret < 0) ++ goto reset_shadow_mode; ++ ++ ret = __phy_read(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT); ++ ++reset_shadow_mode: ++ /* reset shadow mode 2 */ ++ __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0, ++ MII_BCM7XXX_SHD_MODE_2); ++ return ret; ++} ++ ++static int bcm7xxx_28nm_ephy_write_mmd(struct phy_device *phydev, ++ int devnum, u16 regnum, u16 val) ++{ ++ u8 shd = bcm7xxx_28nm_ephy_regnum_to_shd(regnum); ++ int ret; ++ ++ if (!bcm7xxx_28nm_ephy_dev_valid(devnum) || ++ shd == MII_BCM7XXX_REG_INVALID) ++ return -EOPNOTSUPP; ++ ++ /* set shadow mode 2 */ ++ ret = __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, ++ MII_BCM7XXX_SHD_MODE_2, 0); ++ if (ret < 0) ++ return ret; ++ ++ /* Access the desired shadow register address */ ++ ret = __phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL, shd); ++ if (ret < 0) ++ goto reset_shadow_mode; ++ ++ /* Write the desired value in the shadow register */ ++ __phy_write(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT, val); ++ ++reset_shadow_mode: ++ /* reset shadow mode 2 */ ++ return __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0, ++ MII_BCM7XXX_SHD_MODE_2); ++} ++ + static int bcm7xxx_28nm_ephy_resume(struct phy_device *phydev) + { + int ret; +@@ -595,6 +699,8 @@ static void bcm7xxx_28nm_remove(struct phy_device *phydev) + .get_stats = bcm7xxx_28nm_get_phy_stats, \ + .probe = bcm7xxx_28nm_probe, \ + .remove = bcm7xxx_28nm_remove, \ ++ .read_mmd = bcm7xxx_28nm_ephy_read_mmd, \ ++ .write_mmd = bcm7xxx_28nm_ephy_write_mmd, \ + } + + #define BCM7XXX_40NM_EPHY(_oui, _name) \ +-- +2.33.0 + diff --git a/queue-5.14/net-sched-flower-protect-fl_walk-with-rcu.patch b/queue-5.14/net-sched-flower-protect-fl_walk-with-rcu.patch new file mode 100644 index 00000000000..8d73bd1f0e8 --- /dev/null +++ b/queue-5.14/net-sched-flower-protect-fl_walk-with-rcu.patch @@ -0,0 +1,212 @@ +From f32a2cc4f2df2e4765237ac6e7b10cfa7727890e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Sep 2021 18:08:49 +0300 +Subject: net: sched: flower: protect fl_walk() with rcu + +From: Vlad Buslov + +[ Upstream commit d5ef190693a7d76c5c192d108e8dec48307b46ee ] + +Patch that refactored fl_walk() to use idr_for_each_entry_continue_ul() +also removed rcu protection of individual filters which causes following +use-after-free when filter is deleted concurrently. Fix fl_walk() to obtain +rcu read lock while iterating and taking the filter reference and temporary +release the lock while calling arg->fn() callback that can sleep. + +KASAN trace: + +[ 352.773640] ================================================================== +[ 352.775041] BUG: KASAN: use-after-free in fl_walk+0x159/0x240 [cls_flower] +[ 352.776304] Read of size 4 at addr ffff8881c8251480 by task tc/2987 + +[ 352.777862] CPU: 3 PID: 2987 Comm: tc Not tainted 5.15.0-rc2+ #2 +[ 352.778980] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 +[ 352.781022] Call Trace: +[ 352.781573] dump_stack_lvl+0x46/0x5a +[ 352.782332] print_address_description.constprop.0+0x1f/0x140 +[ 352.783400] ? fl_walk+0x159/0x240 [cls_flower] +[ 352.784292] ? fl_walk+0x159/0x240 [cls_flower] +[ 352.785138] kasan_report.cold+0x83/0xdf +[ 352.785851] ? fl_walk+0x159/0x240 [cls_flower] +[ 352.786587] kasan_check_range+0x145/0x1a0 +[ 352.787337] fl_walk+0x159/0x240 [cls_flower] +[ 352.788163] ? fl_put+0x10/0x10 [cls_flower] +[ 352.789007] ? __mutex_unlock_slowpath.constprop.0+0x220/0x220 +[ 352.790102] tcf_chain_dump+0x231/0x450 +[ 352.790878] ? tcf_chain_tp_delete_empty+0x170/0x170 +[ 352.791833] ? __might_sleep+0x2e/0xc0 +[ 352.792594] ? tfilter_notify+0x170/0x170 +[ 352.793400] ? __mutex_unlock_slowpath.constprop.0+0x220/0x220 +[ 352.794477] tc_dump_tfilter+0x385/0x4b0 +[ 352.795262] ? tc_new_tfilter+0x1180/0x1180 +[ 352.796103] ? __mod_node_page_state+0x1f/0xc0 +[ 352.796974] ? __build_skb_around+0x10e/0x130 +[ 352.797826] netlink_dump+0x2c0/0x560 +[ 352.798563] ? netlink_getsockopt+0x430/0x430 +[ 352.799433] ? __mutex_unlock_slowpath.constprop.0+0x220/0x220 +[ 352.800542] __netlink_dump_start+0x356/0x440 +[ 352.801397] rtnetlink_rcv_msg+0x3ff/0x550 +[ 352.802190] ? tc_new_tfilter+0x1180/0x1180 +[ 352.802872] ? rtnl_calcit.isra.0+0x1f0/0x1f0 +[ 352.803668] ? tc_new_tfilter+0x1180/0x1180 +[ 352.804344] ? _copy_from_iter_nocache+0x800/0x800 +[ 352.805202] ? kasan_set_track+0x1c/0x30 +[ 352.805900] netlink_rcv_skb+0xc6/0x1f0 +[ 352.806587] ? rht_deferred_worker+0x6b0/0x6b0 +[ 352.807455] ? rtnl_calcit.isra.0+0x1f0/0x1f0 +[ 352.808324] ? netlink_ack+0x4d0/0x4d0 +[ 352.809086] ? netlink_deliver_tap+0x62/0x3d0 +[ 352.809951] netlink_unicast+0x353/0x480 +[ 352.810744] ? netlink_attachskb+0x430/0x430 +[ 352.811586] ? __alloc_skb+0xd7/0x200 +[ 352.812349] netlink_sendmsg+0x396/0x680 +[ 352.813132] ? netlink_unicast+0x480/0x480 +[ 352.813952] ? __import_iovec+0x192/0x210 +[ 352.814759] ? netlink_unicast+0x480/0x480 +[ 352.815580] sock_sendmsg+0x6c/0x80 +[ 352.816299] ____sys_sendmsg+0x3a5/0x3c0 +[ 352.817096] ? kernel_sendmsg+0x30/0x30 +[ 352.817873] ? __ia32_sys_recvmmsg+0x150/0x150 +[ 352.818753] ___sys_sendmsg+0xd8/0x140 +[ 352.819518] ? sendmsg_copy_msghdr+0x110/0x110 +[ 352.820402] ? ___sys_recvmsg+0xf4/0x1a0 +[ 352.821110] ? __copy_msghdr_from_user+0x260/0x260 +[ 352.821934] ? _raw_spin_lock+0x81/0xd0 +[ 352.822680] ? __handle_mm_fault+0xef3/0x1b20 +[ 352.823549] ? rb_insert_color+0x2a/0x270 +[ 352.824373] ? copy_page_range+0x16b0/0x16b0 +[ 352.825209] ? perf_event_update_userpage+0x2d0/0x2d0 +[ 352.826190] ? __fget_light+0xd9/0xf0 +[ 352.826941] __sys_sendmsg+0xb3/0x130 +[ 352.827613] ? __sys_sendmsg_sock+0x20/0x20 +[ 352.828377] ? do_user_addr_fault+0x2c5/0x8a0 +[ 352.829184] ? fpregs_assert_state_consistent+0x52/0x60 +[ 352.830001] ? exit_to_user_mode_prepare+0x32/0x160 +[ 352.830845] do_syscall_64+0x35/0x80 +[ 352.831445] entry_SYSCALL_64_after_hwframe+0x44/0xae +[ 352.832331] RIP: 0033:0x7f7bee973c17 +[ 352.833078] Code: 0c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 +[ 352.836202] RSP: 002b:00007ffcbb368e28 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +[ 352.837524] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f7bee973c17 +[ 352.838715] RDX: 0000000000000000 RSI: 00007ffcbb368e50 RDI: 0000000000000003 +[ 352.839838] RBP: 00007ffcbb36d090 R08: 00000000cea96d79 R09: 00007f7beea34a40 +[ 352.841021] R10: 00000000004059bb R11: 0000000000000246 R12: 000000000046563f +[ 352.842208] R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffcbb36d088 + +[ 352.843784] Allocated by task 2960: +[ 352.844451] kasan_save_stack+0x1b/0x40 +[ 352.845173] __kasan_kmalloc+0x7c/0x90 +[ 352.845873] fl_change+0x282/0x22db [cls_flower] +[ 352.846696] tc_new_tfilter+0x6cf/0x1180 +[ 352.847493] rtnetlink_rcv_msg+0x471/0x550 +[ 352.848323] netlink_rcv_skb+0xc6/0x1f0 +[ 352.849097] netlink_unicast+0x353/0x480 +[ 352.849886] netlink_sendmsg+0x396/0x680 +[ 352.850678] sock_sendmsg+0x6c/0x80 +[ 352.851398] ____sys_sendmsg+0x3a5/0x3c0 +[ 352.852202] ___sys_sendmsg+0xd8/0x140 +[ 352.852967] __sys_sendmsg+0xb3/0x130 +[ 352.853718] do_syscall_64+0x35/0x80 +[ 352.854457] entry_SYSCALL_64_after_hwframe+0x44/0xae + +[ 352.855830] Freed by task 7: +[ 352.856421] kasan_save_stack+0x1b/0x40 +[ 352.857139] kasan_set_track+0x1c/0x30 +[ 352.857854] kasan_set_free_info+0x20/0x30 +[ 352.858609] __kasan_slab_free+0xed/0x130 +[ 352.859348] kfree+0xa7/0x3c0 +[ 352.859951] process_one_work+0x44d/0x780 +[ 352.860685] worker_thread+0x2e2/0x7e0 +[ 352.861390] kthread+0x1f4/0x220 +[ 352.862022] ret_from_fork+0x1f/0x30 + +[ 352.862955] Last potentially related work creation: +[ 352.863758] kasan_save_stack+0x1b/0x40 +[ 352.864378] kasan_record_aux_stack+0xab/0xc0 +[ 352.865028] insert_work+0x30/0x160 +[ 352.865617] __queue_work+0x351/0x670 +[ 352.866261] rcu_work_rcufn+0x30/0x40 +[ 352.866917] rcu_core+0x3b2/0xdb0 +[ 352.867561] __do_softirq+0xf6/0x386 + +[ 352.868708] Second to last potentially related work creation: +[ 352.869779] kasan_save_stack+0x1b/0x40 +[ 352.870560] kasan_record_aux_stack+0xab/0xc0 +[ 352.871426] call_rcu+0x5f/0x5c0 +[ 352.872108] queue_rcu_work+0x44/0x50 +[ 352.872855] __fl_put+0x17c/0x240 [cls_flower] +[ 352.873733] fl_delete+0xc7/0x100 [cls_flower] +[ 352.874607] tc_del_tfilter+0x510/0xb30 +[ 352.886085] rtnetlink_rcv_msg+0x471/0x550 +[ 352.886875] netlink_rcv_skb+0xc6/0x1f0 +[ 352.887636] netlink_unicast+0x353/0x480 +[ 352.888285] netlink_sendmsg+0x396/0x680 +[ 352.888942] sock_sendmsg+0x6c/0x80 +[ 352.889583] ____sys_sendmsg+0x3a5/0x3c0 +[ 352.890311] ___sys_sendmsg+0xd8/0x140 +[ 352.891019] __sys_sendmsg+0xb3/0x130 +[ 352.891716] do_syscall_64+0x35/0x80 +[ 352.892395] entry_SYSCALL_64_after_hwframe+0x44/0xae + +[ 352.893666] The buggy address belongs to the object at ffff8881c8251000 + which belongs to the cache kmalloc-2k of size 2048 +[ 352.895696] The buggy address is located 1152 bytes inside of + 2048-byte region [ffff8881c8251000, ffff8881c8251800) +[ 352.897640] The buggy address belongs to the page: +[ 352.898492] page:00000000213bac35 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1c8250 +[ 352.900110] head:00000000213bac35 order:3 compound_mapcount:0 compound_pincount:0 +[ 352.901541] flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff) +[ 352.902908] raw: 002ffff800010200 0000000000000000 dead000000000122 ffff888100042f00 +[ 352.904391] raw: 0000000000000000 0000000000080008 00000001ffffffff 0000000000000000 +[ 352.905861] page dumped because: kasan: bad access detected + +[ 352.907323] Memory state around the buggy address: +[ 352.908218] ffff8881c8251380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 352.909471] ffff8881c8251400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 352.910735] >ffff8881c8251480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 352.912012] ^ +[ 352.912642] ffff8881c8251500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 352.913919] ffff8881c8251580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 352.915185] ================================================================== + +Fixes: d39d714969cd ("idr: introduce idr_for_each_entry_continue_ul()") +Signed-off-by: Vlad Buslov +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/cls_flower.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c +index d7869a984881..d2a4e31d963d 100644 +--- a/net/sched/cls_flower.c ++++ b/net/sched/cls_flower.c +@@ -2188,18 +2188,24 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg, + + arg->count = arg->skip; + ++ rcu_read_lock(); + idr_for_each_entry_continue_ul(&head->handle_idr, f, tmp, id) { + /* don't return filters that are being deleted */ + if (!refcount_inc_not_zero(&f->refcnt)) + continue; ++ rcu_read_unlock(); ++ + if (arg->fn(tp, f, arg) < 0) { + __fl_put(f); + arg->stop = 1; ++ rcu_read_lock(); + break; + } + __fl_put(f); + arg->count++; ++ rcu_read_lock(); + } ++ rcu_read_unlock(); + arg->cookie = id; + } + +-- +2.33.0 + diff --git a/queue-5.14/net-stmmac-fix-eee-init-issue-when-paired-with-eee-c.patch b/queue-5.14/net-stmmac-fix-eee-init-issue-when-paired-with-eee-c.patch new file mode 100644 index 00000000000..937e81ddeb2 --- /dev/null +++ b/queue-5.14/net-stmmac-fix-eee-init-issue-when-paired-with-eee-c.patch @@ -0,0 +1,43 @@ +From fbfa33d9fdca0ba3570e8a4f14b602a73c71cf6a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 Sep 2021 14:44:36 +0800 +Subject: net: stmmac: fix EEE init issue when paired with EEE capable PHYs + +From: Wong Vee Khee + +[ Upstream commit 656ed8b015f19bf3f6e6b3ddd9a4bb4aa5ca73e1 ] + +When STMMAC is paired with Energy-Efficient Ethernet(EEE) capable PHY, +and the PHY is advertising EEE by default, we need to enable EEE on the +xPCS side too, instead of having user to manually trigger the enabling +config via ethtool. + +Fixed this by adding xpcs_config_eee() call in stmmac_eee_init(). + +Fixes: 7617af3d1a5e ("net: pcs: Introducing support for DWC xpcs Energy Efficient Ethernet") +Cc: Michael Sit Wei Hong +Signed-off-by: Wong Vee Khee +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index 2218bc3a624b..86151a817b79 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -486,6 +486,10 @@ bool stmmac_eee_init(struct stmmac_priv *priv) + timer_setup(&priv->eee_ctrl_timer, stmmac_eee_ctrl_timer, 0); + stmmac_set_eee_timer(priv, priv->hw, STMMAC_DEFAULT_LIT_LS, + eee_tw_timer); ++ if (priv->hw->xpcs) ++ xpcs_config_eee(priv->hw->xpcs, ++ priv->plat->mult_fact_100ns, ++ true); + } + + if (priv->plat->has_gmac4 && priv->tx_lpi_timer <= STMMAC_ET_MAX) { +-- +2.33.0 + diff --git a/queue-5.14/netfilter-log-work-around-missing-softdep-backend-mo.patch b/queue-5.14/netfilter-log-work-around-missing-softdep-backend-mo.patch new file mode 100644 index 00000000000..c75cc4224bb --- /dev/null +++ b/queue-5.14/netfilter-log-work-around-missing-softdep-backend-mo.patch @@ -0,0 +1,148 @@ +From f8d1eb98bc1582fd41368e500399026a31415786 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 Sep 2021 18:50:17 +0200 +Subject: netfilter: log: work around missing softdep backend module + +From: Florian Westphal + +[ Upstream commit b53deef054e58fe4f37c66211b8ece9f8fc1aa13 ] + +iptables/nftables has two types of log modules: + +1. backend, e.g. nf_log_syslog, which implement the functionality +2. frontend, e.g. xt_LOG or nft_log, which call the functionality + provided by backend based on nf_tables or xtables rule set. + +Problem is that the request_module() call to load the backed in +nf_logger_find_get() might happen with nftables transaction mutex held +in case the call path is via nf_tables/nft_compat. + +This can cause deadlocks (see 'Fixes' tags for details). + +The chosen solution as to let modprobe deal with this by adding 'pre: ' +soft dep tag to xt_LOG (to load the syslog backend) and xt_NFLOG (to +load nflog backend). + +Eric reports that this breaks on systems with older modprobe that +doesn't support softdeps. + +Another, similar issue occurs when someone either insmods xt_(NF)LOG +directly or unloads the backend module (possible if no log frontend +is in use): because the frontend module is already loaded, modprobe is +not invoked again so the softdep isn't evaluated. + +Add a workaround: If nf_logger_find_get() returns -ENOENT and call +is not via nft_compat, load the backend explicitly and try again. + +Else, let nft_compat ask for deferred request_module via nf_tables +infra. + +Softdeps are kept in-place, so with newer modprobe the dependencies +are resolved from userspace. + +Fixes: cefa31a9d461 ("netfilter: nft_log: perform module load from nf_tables") +Fixes: a38b5b56d6f4 ("netfilter: nf_log: add module softdeps") +Reported-and-tested-by: Eric Dumazet +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_compat.c | 17 ++++++++++++++++- + net/netfilter/xt_LOG.c | 10 +++++++++- + net/netfilter/xt_NFLOG.c | 10 +++++++++- + 3 files changed, 34 insertions(+), 3 deletions(-) + +diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c +index 272bcdb1392d..f69cc73c5813 100644 +--- a/net/netfilter/nft_compat.c ++++ b/net/netfilter/nft_compat.c +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + + /* Used for matches where *info is larger than X byte */ + #define NFT_MATCH_LARGE_THRESH 192 +@@ -257,8 +258,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + nft_compat_wait_for_destructors(); + + ret = xt_check_target(&par, size, proto, inv); +- if (ret < 0) ++ if (ret < 0) { ++ if (ret == -ENOENT) { ++ const char *modname = NULL; ++ ++ if (strcmp(target->name, "LOG") == 0) ++ modname = "nf_log_syslog"; ++ else if (strcmp(target->name, "NFLOG") == 0) ++ modname = "nfnetlink_log"; ++ ++ if (modname && ++ nft_request_module(ctx->net, "%s", modname) == -EAGAIN) ++ return -EAGAIN; ++ } ++ + return ret; ++ } + + /* The standard target cannot be used */ + if (!target->target) +diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c +index 2ff75f7637b0..f39244f9c0ed 100644 +--- a/net/netfilter/xt_LOG.c ++++ b/net/netfilter/xt_LOG.c +@@ -44,6 +44,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par) + static int log_tg_check(const struct xt_tgchk_param *par) + { + const struct xt_log_info *loginfo = par->targinfo; ++ int ret; + + if (par->family != NFPROTO_IPV4 && par->family != NFPROTO_IPV6) + return -EINVAL; +@@ -58,7 +59,14 @@ static int log_tg_check(const struct xt_tgchk_param *par) + return -EINVAL; + } + +- return nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); ++ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); ++ if (ret != 0 && !par->nft_compat) { ++ request_module("%s", "nf_log_syslog"); ++ ++ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_LOG); ++ } ++ ++ return ret; + } + + static void log_tg_destroy(const struct xt_tgdtor_param *par) +diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c +index fb5793208059..e660c3710a10 100644 +--- a/net/netfilter/xt_NFLOG.c ++++ b/net/netfilter/xt_NFLOG.c +@@ -42,13 +42,21 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par) + static int nflog_tg_check(const struct xt_tgchk_param *par) + { + const struct xt_nflog_info *info = par->targinfo; ++ int ret; + + if (info->flags & ~XT_NFLOG_MASK) + return -EINVAL; + if (info->prefix[sizeof(info->prefix) - 1] != '\0') + return -EINVAL; + +- return nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG); ++ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG); ++ if (ret != 0 && !par->nft_compat) { ++ request_module("%s", "nfnetlink_log"); ++ ++ ret = nf_logger_find_get(par->family, NF_LOG_TYPE_ULOG); ++ } ++ ++ return ret; + } + + static void nflog_tg_destroy(const struct xt_tgdtor_param *par) +-- +2.33.0 + diff --git a/queue-5.14/netfilter-nf_tables-unlink-table-before-deleting-it.patch b/queue-5.14/netfilter-nf_tables-unlink-table-before-deleting-it.patch new file mode 100644 index 00000000000..2ff5d58f01d --- /dev/null +++ b/queue-5.14/netfilter-nf_tables-unlink-table-before-deleting-it.patch @@ -0,0 +1,106 @@ +From ea033722981ec2ce15970ef63cde25e10bbde9d7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Sep 2021 14:42:33 +0200 +Subject: netfilter: nf_tables: unlink table before deleting it + +From: Florian Westphal + +[ Upstream commit a499b03bf36b0c2e3b958a381d828678ab0ffc5e ] + +syzbot reports following UAF: +BUG: KASAN: use-after-free in memcmp+0x18f/0x1c0 lib/string.c:955 + nla_strcmp+0xf2/0x130 lib/nlattr.c:836 + nft_table_lookup.part.0+0x1a2/0x460 net/netfilter/nf_tables_api.c:570 + nft_table_lookup net/netfilter/nf_tables_api.c:4064 [inline] + nf_tables_getset+0x1b3/0x860 net/netfilter/nf_tables_api.c:4064 + nfnetlink_rcv_msg+0x659/0x13f0 net/netfilter/nfnetlink.c:285 + netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2504 + +Problem is that all get operations are lockless, so the commit_mutex +held by nft_rcv_nl_event() isn't enough to stop a parallel GET request +from doing read-accesses to the table object even after synchronize_rcu(). + +To avoid this, unlink the table first and store the table objects in +on-stack scratch space. + +Fixes: 6001a930ce03 ("netfilter: nftables: introduce table ownership") +Reported-and-tested-by: syzbot+f31660cf279b0557160c@syzkaller.appspotmail.com +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_tables_api.c | 28 ++++++++++++++++++---------- + 1 file changed, 18 insertions(+), 10 deletions(-) + +diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c +index 081437dd75b7..33e771cd847c 100644 +--- a/net/netfilter/nf_tables_api.c ++++ b/net/netfilter/nf_tables_api.c +@@ -9599,7 +9599,6 @@ static void __nft_release_table(struct net *net, struct nft_table *table) + table->use--; + nf_tables_chain_destroy(&ctx); + } +- list_del(&table->list); + nf_tables_table_destroy(&ctx); + } + +@@ -9612,6 +9611,8 @@ static void __nft_release_tables(struct net *net) + if (nft_table_has_owner(table)) + continue; + ++ list_del(&table->list); ++ + __nft_release_table(net, table); + } + } +@@ -9619,31 +9620,38 @@ static void __nft_release_tables(struct net *net) + static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event, + void *ptr) + { ++ struct nft_table *table, *to_delete[8]; + struct nftables_pernet *nft_net; + struct netlink_notify *n = ptr; +- struct nft_table *table, *nt; + struct net *net = n->net; +- bool release = false; ++ unsigned int deleted; ++ bool restart = false; + + if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER) + return NOTIFY_DONE; + + nft_net = nft_pernet(net); ++ deleted = 0; + mutex_lock(&nft_net->commit_mutex); ++again: + list_for_each_entry(table, &nft_net->tables, list) { + if (nft_table_has_owner(table) && + n->portid == table->nlpid) { + __nft_release_hook(net, table); +- release = true; ++ list_del_rcu(&table->list); ++ to_delete[deleted++] = table; ++ if (deleted >= ARRAY_SIZE(to_delete)) ++ break; + } + } +- if (release) { ++ if (deleted) { ++ restart = deleted >= ARRAY_SIZE(to_delete); + synchronize_rcu(); +- list_for_each_entry_safe(table, nt, &nft_net->tables, list) { +- if (nft_table_has_owner(table) && +- n->portid == table->nlpid) +- __nft_release_table(net, table); +- } ++ while (deleted) ++ __nft_release_table(net, to_delete[--deleted]); ++ ++ if (restart) ++ goto again; + } + mutex_unlock(&nft_net->commit_mutex); + +-- +2.33.0 + diff --git a/queue-5.14/objtool-teach-get_alt_entry-about-more-relocation-ty.patch b/queue-5.14/objtool-teach-get_alt_entry-about-more-relocation-ty.patch new file mode 100644 index 00000000000..e869586eaff --- /dev/null +++ b/queue-5.14/objtool-teach-get_alt_entry-about-more-relocation-ty.patch @@ -0,0 +1,98 @@ +From e79c2ee117c37790efc73000de29f1d2a5a5ee1f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 30 Sep 2021 12:43:10 +0200 +Subject: objtool: Teach get_alt_entry() about more relocation types + +From: Peter Zijlstra + +[ Upstream commit 24ff652573754fe4c03213ebd26b17e86842feb3 ] + +Occasionally objtool encounters symbol (as opposed to section) +relocations in .altinstructions. Typically they are the alternatives +written by elf_add_alternative() as encountered on a noinstr +validation run on vmlinux after having already ran objtool on the +individual .o files. + +Basically this is the counterpart of commit 44f6a7c0755d ("objtool: +Fix seg fault with Clang non-section symbols"), because when these new +assemblers (binutils now also does this) strip the section symbols, +elf_add_reloc_to_insn() is forced to emit symbol based relocations. + +As such, teach get_alt_entry() about different relocation types. + +Fixes: 9bc0bb50727c ("objtool/x86: Rewrite retpoline thunk calls") +Reported-by: Stephen Rothwell +Reported-by: Borislav Petkov +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Josh Poimboeuf +Tested-by: Nathan Chancellor +Link: https://lore.kernel.org/r/YVWUvknIEVNkPvnP@hirez.programming.kicks-ass.net +Signed-off-by: Sasha Levin +--- + tools/objtool/special.c | 32 +++++++++++++++++++++++++------- + 1 file changed, 25 insertions(+), 7 deletions(-) + +diff --git a/tools/objtool/special.c b/tools/objtool/special.c +index bc925cf19e2d..f58ecc50fb10 100644 +--- a/tools/objtool/special.c ++++ b/tools/objtool/special.c +@@ -58,6 +58,24 @@ void __weak arch_handle_alternative(unsigned short feature, struct special_alt * + { + } + ++static bool reloc2sec_off(struct reloc *reloc, struct section **sec, unsigned long *off) ++{ ++ switch (reloc->sym->type) { ++ case STT_FUNC: ++ *sec = reloc->sym->sec; ++ *off = reloc->sym->offset + reloc->addend; ++ return true; ++ ++ case STT_SECTION: ++ *sec = reloc->sym->sec; ++ *off = reloc->addend; ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ + static int get_alt_entry(struct elf *elf, struct special_entry *entry, + struct section *sec, int idx, + struct special_alt *alt) +@@ -91,15 +109,12 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, + WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); + return -1; + } +- if (orig_reloc->sym->type != STT_SECTION) { +- WARN_FUNC("don't know how to handle non-section reloc symbol %s", ++ if (!reloc2sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off)) { ++ WARN_FUNC("don't know how to handle reloc symbol type: %s", + sec, offset + entry->orig, orig_reloc->sym->name); + return -1; + } + +- alt->orig_sec = orig_reloc->sym->sec; +- alt->orig_off = orig_reloc->addend; +- + if (!entry->group || alt->new_len) { + new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new); + if (!new_reloc) { +@@ -116,8 +131,11 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, + if (arch_is_retpoline(new_reloc->sym)) + return 1; + +- alt->new_sec = new_reloc->sym->sec; +- alt->new_off = (unsigned int)new_reloc->addend; ++ if (!reloc2sec_off(new_reloc, &alt->new_sec, &alt->new_off)) { ++ WARN_FUNC("don't know how to handle reloc symbol type: %s", ++ sec, offset + entry->new, new_reloc->sym->name); ++ return -1; ++ } + + /* _ASM_EXTABLE_EX hack */ + if (alt->new_off >= 0x7ffffff0) +-- +2.33.0 + diff --git a/queue-5.14/perf-x86-intel-update-event-constraints-for-icx.patch b/queue-5.14/perf-x86-intel-update-event-constraints-for-icx.patch new file mode 100644 index 00000000000..98d30b97919 --- /dev/null +++ b/queue-5.14/perf-x86-intel-update-event-constraints-for-icx.patch @@ -0,0 +1,37 @@ +From 5443ab4c80e773273436fb35b69e8b10f20937fd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Sep 2021 08:19:03 -0700 +Subject: perf/x86/intel: Update event constraints for ICX + +From: Kan Liang + +[ Upstream commit ecc2123e09f9e71ddc6c53d71e283b8ada685fe2 ] + +According to the latest event list, the event encoding 0xEF is only +available on the first 4 counters. Add it into the event constraints +table. + +Fixes: 6017608936c1 ("perf/x86/intel: Add Icelake support") +Signed-off-by: Kan Liang +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/1632842343-25862-1-git-send-email-kan.liang@linux.intel.com +Signed-off-by: Sasha Levin +--- + arch/x86/events/intel/core.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c +index ac6fd2dabf6a..482224444a1e 100644 +--- a/arch/x86/events/intel/core.c ++++ b/arch/x86/events/intel/core.c +@@ -263,6 +263,7 @@ static struct event_constraint intel_icl_event_constraints[] = { + INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf), ++ INTEL_EVENT_CONSTRAINT(0xef, 0xf), + INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf), + EVENT_CONSTRAINT_END + }; +-- +2.33.0 + diff --git a/queue-5.14/rdma-cma-fix-listener-leak-in-rdma_cma_listen_on_all.patch b/queue-5.14/rdma-cma-fix-listener-leak-in-rdma_cma_listen_on_all.patch new file mode 100644 index 00000000000..05587c09247 --- /dev/null +++ b/queue-5.14/rdma-cma-fix-listener-leak-in-rdma_cma_listen_on_all.patch @@ -0,0 +1,97 @@ +From b209201958b002d614bf29933ba81cb6d710babe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Sep 2021 17:33:44 +0800 +Subject: RDMA/cma: Fix listener leak in rdma_cma_listen_on_all() failure + +From: Tao Liu + +[ Upstream commit ca465e1f1f9b38fe916a36f7d80c5d25f2337c81 ] + +If cma_listen_on_all() fails it leaves the per-device ID still on the +listen_list but the state is not set to RDMA_CM_ADDR_BOUND. + +When the cmid is eventually destroyed cma_cancel_listens() is not called +due to the wrong state, however the per-device IDs are still holding the +refcount preventing the ID from being destroyed, thus deadlocking: + + task:rping state:D stack: 0 pid:19605 ppid: 47036 flags:0x00000084 + Call Trace: + __schedule+0x29a/0x780 + ? free_unref_page_commit+0x9b/0x110 + schedule+0x3c/0xa0 + schedule_timeout+0x215/0x2b0 + ? __flush_work+0x19e/0x1e0 + wait_for_completion+0x8d/0xf0 + _destroy_id+0x144/0x210 [rdma_cm] + ucma_close_id+0x2b/0x40 [rdma_ucm] + __destroy_id+0x93/0x2c0 [rdma_ucm] + ? __xa_erase+0x4a/0xa0 + ucma_destroy_id+0x9a/0x120 [rdma_ucm] + ucma_write+0xb8/0x130 [rdma_ucm] + vfs_write+0xb4/0x250 + ksys_write+0xb5/0xd0 + ? syscall_trace_enter.isra.19+0x123/0x190 + do_syscall_64+0x33/0x40 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Ensure that cma_listen_on_all() atomically unwinds its action under the +lock during error. + +Fixes: c80a0c52d85c ("RDMA/cma: Add missing error handling of listen_id") +Link: https://lore.kernel.org/r/20210913093344.17230-1-thomas.liu@ucloud.cn +Signed-off-by: Tao Liu +Reviewed-by: Leon Romanovsky +Signed-off-by: Jason Gunthorpe +Signed-off-by: Sasha Levin +--- + drivers/infiniband/core/cma.c | 13 ++++++++++--- + 1 file changed, 10 insertions(+), 3 deletions(-) + +diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c +index 107462905b21..dbbacc8e9273 100644 +--- a/drivers/infiniband/core/cma.c ++++ b/drivers/infiniband/core/cma.c +@@ -1746,15 +1746,16 @@ static void cma_cancel_route(struct rdma_id_private *id_priv) + } + } + +-static void cma_cancel_listens(struct rdma_id_private *id_priv) ++static void _cma_cancel_listens(struct rdma_id_private *id_priv) + { + struct rdma_id_private *dev_id_priv; + ++ lockdep_assert_held(&lock); ++ + /* + * Remove from listen_any_list to prevent added devices from spawning + * additional listen requests. + */ +- mutex_lock(&lock); + list_del(&id_priv->list); + + while (!list_empty(&id_priv->listen_list)) { +@@ -1768,6 +1769,12 @@ static void cma_cancel_listens(struct rdma_id_private *id_priv) + rdma_destroy_id(&dev_id_priv->id); + mutex_lock(&lock); + } ++} ++ ++static void cma_cancel_listens(struct rdma_id_private *id_priv) ++{ ++ mutex_lock(&lock); ++ _cma_cancel_listens(id_priv); + mutex_unlock(&lock); + } + +@@ -2587,7 +2594,7 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv) + return 0; + + err_listen: +- list_del(&id_priv->list); ++ _cma_cancel_listens(id_priv); + mutex_unlock(&lock); + if (to_destroy) + rdma_destroy_id(&to_destroy->id); +-- +2.33.0 + diff --git a/queue-5.14/rdma-hfi1-fix-kernel-pointer-leak.patch b/queue-5.14/rdma-hfi1-fix-kernel-pointer-leak.patch new file mode 100644 index 00000000000..24cf0699ad7 --- /dev/null +++ b/queue-5.14/rdma-hfi1-fix-kernel-pointer-leak.patch @@ -0,0 +1,49 @@ +From d64cf0b917af3273e17adb3cba37ac88c2ac5833 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 22 Sep 2021 21:48:57 +0800 +Subject: RDMA/hfi1: Fix kernel pointer leak + +From: Guo Zhi + +[ Upstream commit 7d5cfafe8b4006a75b55c2f1fdfdb363f9a5cc98 ] + +Pointers should be printed with %p or %px rather than cast to 'unsigned +long long' and printed with %llx. Change %llx to %p to print the secured +pointer. + +Fixes: 042a00f93aad ("IB/{ipoib,hfi1}: Add a timeout handler for rdma_netdev") +Link: https://lore.kernel.org/r/20210922134857.619602-1-qtxuning1999@sjtu.edu.cn +Signed-off-by: Guo Zhi +Acked-by: Mike Marciniszyn +Signed-off-by: Jason Gunthorpe +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/hfi1/ipoib_tx.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/infiniband/hw/hfi1/ipoib_tx.c b/drivers/infiniband/hw/hfi1/ipoib_tx.c +index 993f9838b6c8..e1fdeadda437 100644 +--- a/drivers/infiniband/hw/hfi1/ipoib_tx.c ++++ b/drivers/infiniband/hw/hfi1/ipoib_tx.c +@@ -873,14 +873,14 @@ void hfi1_ipoib_tx_timeout(struct net_device *dev, unsigned int q) + struct hfi1_ipoib_txq *txq = &priv->txqs[q]; + u64 completed = atomic64_read(&txq->complete_txreqs); + +- dd_dev_info(priv->dd, "timeout txq %llx q %u stopped %u stops %d no_desc %d ring_full %d\n", +- (unsigned long long)txq, q, ++ dd_dev_info(priv->dd, "timeout txq %p q %u stopped %u stops %d no_desc %d ring_full %d\n", ++ txq, q, + __netif_subqueue_stopped(dev, txq->q_idx), + atomic_read(&txq->stops), + atomic_read(&txq->no_desc), + atomic_read(&txq->ring_full)); +- dd_dev_info(priv->dd, "sde %llx engine %u\n", +- (unsigned long long)txq->sde, ++ dd_dev_info(priv->dd, "sde %p engine %u\n", ++ txq->sde, + txq->sde ? txq->sde->this_idx : 0); + dd_dev_info(priv->dd, "flow %x\n", txq->flow.as_int); + dd_dev_info(priv->dd, "sent %llu completed %llu used %llu\n", +-- +2.33.0 + diff --git a/queue-5.14/rdma-hns-add-the-check-of-the-cqe-size-of-the-user-s.patch b/queue-5.14/rdma-hns-add-the-check-of-the-cqe-size-of-the-user-s.patch new file mode 100644 index 00000000000..612c53b80ec --- /dev/null +++ b/queue-5.14/rdma-hns-add-the-check-of-the-cqe-size-of-the-user-s.patch @@ -0,0 +1,78 @@ +From ec397ea972723453e82e85666d6a1065fe389745 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Sep 2021 20:55:57 +0800 +Subject: RDMA/hns: Add the check of the CQE size of the user space + +From: Wenpeng Liang + +[ Upstream commit e671f0ecfece14940a9bb81981098910ea278cf7 ] + +If the CQE size of the user space is not the size supported by the +hardware, the creation of CQ should be stopped. + +Fixes: 09a5f210f67e ("RDMA/hns: Add support for CQE in size of 64 Bytes") +Link: https://lore.kernel.org/r/20210927125557.15031-3-liangwenpeng@huawei.com +Signed-off-by: Wenpeng Liang +Signed-off-by: Jason Gunthorpe +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/hns/hns_roce_cq.c | 31 ++++++++++++++++++------- + 1 file changed, 22 insertions(+), 9 deletions(-) + +diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c +index 1e9c3c5bee68..d763f097599f 100644 +--- a/drivers/infiniband/hw/hns/hns_roce_cq.c ++++ b/drivers/infiniband/hw/hns/hns_roce_cq.c +@@ -326,19 +326,30 @@ static void set_cq_param(struct hns_roce_cq *hr_cq, u32 cq_entries, int vector, + INIT_LIST_HEAD(&hr_cq->rq_list); + } + +-static void set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata, +- struct hns_roce_ib_create_cq *ucmd) ++static int set_cqe_size(struct hns_roce_cq *hr_cq, struct ib_udata *udata, ++ struct hns_roce_ib_create_cq *ucmd) + { + struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device); + +- if (udata) { +- if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size)) +- hr_cq->cqe_size = ucmd->cqe_size; +- else +- hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE; +- } else { ++ if (!udata) { + hr_cq->cqe_size = hr_dev->caps.cqe_sz; ++ return 0; ++ } ++ ++ if (udata->inlen >= offsetofend(typeof(*ucmd), cqe_size)) { ++ if (ucmd->cqe_size != HNS_ROCE_V2_CQE_SIZE && ++ ucmd->cqe_size != HNS_ROCE_V3_CQE_SIZE) { ++ ibdev_err(&hr_dev->ib_dev, ++ "invalid cqe size %u.\n", ucmd->cqe_size); ++ return -EINVAL; ++ } ++ ++ hr_cq->cqe_size = ucmd->cqe_size; ++ } else { ++ hr_cq->cqe_size = HNS_ROCE_V2_CQE_SIZE; + } ++ ++ return 0; + } + + int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, +@@ -366,7 +377,9 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, + + set_cq_param(hr_cq, attr->cqe, attr->comp_vector, &ucmd); + +- set_cqe_size(hr_cq, udata, &ucmd); ++ ret = set_cqe_size(hr_cq, udata, &ucmd); ++ if (ret) ++ return ret; + + ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); + if (ret) { +-- +2.33.0 + diff --git a/queue-5.14/rdma-hns-fix-the-size-setting-error-when-copying-cqe.patch b/queue-5.14/rdma-hns-fix-the-size-setting-error-when-copying-cqe.patch new file mode 100644 index 00000000000..a097f188a9e --- /dev/null +++ b/queue-5.14/rdma-hns-fix-the-size-setting-error-when-copying-cqe.patch @@ -0,0 +1,37 @@ +From 8262d64e5738e087d95e3e9f069e9dfffc1f4ad3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Sep 2021 20:55:56 +0800 +Subject: RDMA/hns: Fix the size setting error when copying CQE in clean_cq() + +From: Wenpeng Liang + +[ Upstream commit cc26aee100588a3f293921342a307b6309ace193 ] + +The size of CQE is different for different versions of hardware, so the +driver needs to specify the size of CQE explicitly. + +Fixes: 09a5f210f67e ("RDMA/hns: Add support for CQE in size of 64 Bytes") +Link: https://lore.kernel.org/r/20210927125557.15031-2-liangwenpeng@huawei.com +Signed-off-by: Wenpeng Liang +Signed-off-by: Jason Gunthorpe +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +index 6cb4a4e10837..0ccb0c453f6a 100644 +--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c ++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +@@ -3306,7 +3306,7 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, + dest = get_cqe_v2(hr_cq, (prod_index + nfreed) & + hr_cq->ib_cq.cqe); + owner_bit = hr_reg_read(dest, CQE_OWNER); +- memcpy(dest, cqe, sizeof(*cqe)); ++ memcpy(dest, cqe, hr_cq->cqe_size); + hr_reg_write(dest, CQE_OWNER, owner_bit); + } + } +-- +2.33.0 + diff --git a/queue-5.14/rdma-hns-work-around-broken-constant-propagation-in-.patch b/queue-5.14/rdma-hns-work-around-broken-constant-propagation-in-.patch new file mode 100644 index 00000000000..5a4d8398428 --- /dev/null +++ b/queue-5.14/rdma-hns-work-around-broken-constant-propagation-in-.patch @@ -0,0 +1,77 @@ +From b6e015e5f621c8736dd9f2f163242d614eef85ff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 16 Sep 2021 12:05:28 -0300 +Subject: RDMA/hns: Work around broken constant propagation in gcc 8 + +From: Jason Gunthorpe + +[ Upstream commit 14351f08ed5c8b888cdd95651152db7e096ee27f ] + +gcc 8.3 and 5.4 throw this: + +In function 'modify_qp_init_to_rtr', +././include/linux/compiler_types.h:322:38: error: call to '__compiletime_assert_1859' declared with attribute error: FIELD_PREP: value too large for the field + _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__) +[..] +drivers/infiniband/hw/hns/hns_roce_common.h:91:52: note: in expansion of macro 'FIELD_PREP' + *((__le32 *)ptr + (field_h) / 32) |= cpu_to_le32(FIELD_PREP( \ + ^~~~~~~~~~ +drivers/infiniband/hw/hns/hns_roce_common.h:95:39: note: in expansion of macro '_hr_reg_write' + #define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val) + ^~~~~~~~~~~~~ +drivers/infiniband/hw/hns/hns_roce_hw_v2.c:4412:2: note: in expansion of macro 'hr_reg_write' + hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini); + +Because gcc has miscalculated the constantness of lp_pktn_ini: + + mtu = ib_mtu_enum_to_int(ib_mtu); + if (WARN_ON(mtu < 0)) [..] + lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu); + +Since mtu is limited to {256,512,1024,2048,4096} lp_pktn_ini is between 4 +and 8 which is compatible with the 4 bit field in the FIELD_PREP. + +Work around this broken compiler by adding a 'can never be true' +constraint on lp_pktn_ini's value which clears out the problem. + +Fixes: f0cb411aad23 ("RDMA/hns: Use new interface to modify QP context") +Link: https://lore.kernel.org/r/0-v1-c773ecb137bc+11f-hns_gcc8_jgg@nvidia.com +Reported-by: Geert Uytterhoeven +Signed-off-by: Jason Gunthorpe +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +index c320891c8763..6cb4a4e10837 100644 +--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c ++++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +@@ -4411,7 +4411,12 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, + hr_qp->path_mtu = ib_mtu; + + mtu = ib_mtu_enum_to_int(ib_mtu); +- if (WARN_ON(mtu < 0)) ++ if (WARN_ON(mtu <= 0)) ++ return -EINVAL; ++#define MAX_LP_MSG_LEN 65536 ++ /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */ ++ lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu); ++ if (WARN_ON(lp_pktn_ini >= 0xF)) + return -EINVAL; + + if (attr_mask & IB_QP_PATH_MTU) { +@@ -4419,10 +4424,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, + hr_reg_clear(qpc_mask, QPC_MTU); + } + +-#define MAX_LP_MSG_LEN 65536 +- /* MTU * (2 ^ LP_PKTN_INI) shouldn't be bigger than 64KB */ +- lp_pktn_ini = ilog2(MAX_LP_MSG_LEN / mtu); +- + hr_reg_write(context, QPC_LP_PKTN_INI, lp_pktn_ini); + hr_reg_clear(qpc_mask, QPC_LP_PKTN_INI); + +-- +2.33.0 + diff --git a/queue-5.14/rdma-irdma-report-correct-wc-error-when-there-are-mw.patch b/queue-5.14/rdma-irdma-report-correct-wc-error-when-there-are-mw.patch new file mode 100644 index 00000000000..0da6d92f997 --- /dev/null +++ b/queue-5.14/rdma-irdma-report-correct-wc-error-when-there-are-mw.patch @@ -0,0 +1,68 @@ +From b183d4cf5bb91e80ce48af4caf6689777e09ec63 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 16 Sep 2021 14:12:22 -0500 +Subject: RDMA/irdma: Report correct WC error when there are MW bind errors + +From: Sindhu Devale + +[ Upstream commit 9f7fa37a6bd90f2749c67f8524334c387d972eb9 ] + +Report the correct WC error when MW bind error related asynchronous events +are generated by HW. + +Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") +Link: https://lore.kernel.org/r/20210916191222.824-5-shiraz.saleem@intel.com +Signed-off-by: Sindhu Devale +Signed-off-by: Shiraz Saleem +Signed-off-by: Jason Gunthorpe +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/irdma/hw.c | 5 +++++ + drivers/infiniband/hw/irdma/user.h | 1 + + drivers/infiniband/hw/irdma/verbs.c | 2 ++ + 3 files changed, 8 insertions(+) + +diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c +index cb9a8e24e3b7..7de525a5ccf8 100644 +--- a/drivers/infiniband/hw/irdma/hw.c ++++ b/drivers/infiniband/hw/irdma/hw.c +@@ -179,6 +179,11 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp, + case IRDMA_AE_LLP_TOO_MANY_RETRIES: + qp->flush_code = FLUSH_RETRY_EXC_ERR; + break; ++ case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS: ++ case IRDMA_AE_AMP_MWBIND_BIND_DISABLED: ++ case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS: ++ qp->flush_code = FLUSH_MW_BIND_ERR; ++ break; + default: + qp->flush_code = FLUSH_FATAL_ERR; + break; +diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h +index 267102d1049d..3dcbb1fbf2c6 100644 +--- a/drivers/infiniband/hw/irdma/user.h ++++ b/drivers/infiniband/hw/irdma/user.h +@@ -103,6 +103,7 @@ enum irdma_flush_opcode { + FLUSH_LOC_LEN_ERR, + FLUSH_FATAL_ERR, + FLUSH_RETRY_EXC_ERR, ++ FLUSH_MW_BIND_ERR, + }; + + enum irdma_cmpl_status { +diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c +index 3960c872ff76..fa393c5ea397 100644 +--- a/drivers/infiniband/hw/irdma/verbs.c ++++ b/drivers/infiniband/hw/irdma/verbs.c +@@ -3360,6 +3360,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode + return IB_WC_WR_FLUSH_ERR; + case FLUSH_RETRY_EXC_ERR: + return IB_WC_RETRY_EXC_ERR; ++ case FLUSH_MW_BIND_ERR: ++ return IB_WC_MW_BIND_ERR; + case FLUSH_FATAL_ERR: + default: + return IB_WC_FATAL_ERR; +-- +2.33.0 + diff --git a/queue-5.14/rdma-irdma-report-correct-wc-error-when-transport-re.patch b/queue-5.14/rdma-irdma-report-correct-wc-error-when-transport-re.patch new file mode 100644 index 00000000000..09e07bd0c5a --- /dev/null +++ b/queue-5.14/rdma-irdma-report-correct-wc-error-when-transport-re.patch @@ -0,0 +1,69 @@ +From db98f0f6470a29c1d119deff605deb214d88ea24 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 16 Sep 2021 14:12:21 -0500 +Subject: RDMA/irdma: Report correct WC error when transport retry counter is + exceeded + +From: Sindhu Devale + +[ Upstream commit d3bdcd59633907ee306057b6bb70f06dce47dddc ] + +When the retry counter exceeds, as the remote QP didn't send any Ack or +Nack an asynchronous event (AE) for too many retries is generated. Add +code to handle the AE and set the correct IB WC error code +IB_WC_RETRY_EXC_ERR. + +Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") +Link: https://lore.kernel.org/r/20210916191222.824-4-shiraz.saleem@intel.com +Signed-off-by: Sindhu Devale +Signed-off-by: Shiraz Saleem +Signed-off-by: Jason Gunthorpe +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/irdma/hw.c | 3 +++ + drivers/infiniband/hw/irdma/user.h | 1 + + drivers/infiniband/hw/irdma/verbs.c | 2 ++ + 3 files changed, 6 insertions(+) + +diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c +index 33c06a3a4f63..cb9a8e24e3b7 100644 +--- a/drivers/infiniband/hw/irdma/hw.c ++++ b/drivers/infiniband/hw/irdma/hw.c +@@ -176,6 +176,9 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp, + case IRDMA_AE_LLP_RECEIVED_MPA_CRC_ERROR: + qp->flush_code = FLUSH_GENERAL_ERR; + break; ++ case IRDMA_AE_LLP_TOO_MANY_RETRIES: ++ qp->flush_code = FLUSH_RETRY_EXC_ERR; ++ break; + default: + qp->flush_code = FLUSH_FATAL_ERR; + break; +diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h +index ff705f323233..267102d1049d 100644 +--- a/drivers/infiniband/hw/irdma/user.h ++++ b/drivers/infiniband/hw/irdma/user.h +@@ -102,6 +102,7 @@ enum irdma_flush_opcode { + FLUSH_REM_OP_ERR, + FLUSH_LOC_LEN_ERR, + FLUSH_FATAL_ERR, ++ FLUSH_RETRY_EXC_ERR, + }; + + enum irdma_cmpl_status { +diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c +index 6c3d28f744cb..3960c872ff76 100644 +--- a/drivers/infiniband/hw/irdma/verbs.c ++++ b/drivers/infiniband/hw/irdma/verbs.c +@@ -3358,6 +3358,8 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode + return IB_WC_LOC_LEN_ERR; + case FLUSH_GENERAL_ERR: + return IB_WC_WR_FLUSH_ERR; ++ case FLUSH_RETRY_EXC_ERR: ++ return IB_WC_RETRY_EXC_ERR; + case FLUSH_FATAL_ERR: + default: + return IB_WC_FATAL_ERR; +-- +2.33.0 + diff --git a/queue-5.14/rdma-irdma-skip-cqp-ring-during-a-reset.patch b/queue-5.14/rdma-irdma-skip-cqp-ring-during-a-reset.patch new file mode 100644 index 00000000000..3e801bd05b9 --- /dev/null +++ b/queue-5.14/rdma-irdma-skip-cqp-ring-during-a-reset.patch @@ -0,0 +1,140 @@ +From fc0e3119f6d9a3e42c4e6e06756743b6d3c18cd0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 16 Sep 2021 14:12:19 -0500 +Subject: RDMA/irdma: Skip CQP ring during a reset + +From: Sindhu Devale + +[ Upstream commit 5b1e985f7626307c451f98883f5e2665ee208e1c ] + +Due to duplicate reset flags, CQP commands are processed during reset. + +This leads CQP failures such as below: + + irdma0: [Delete Local MAC Entry Cmd Error][op_code=49] status=-27 waiting=1 completion_err=0 maj=0x0 min=0x0 + +Remove the redundant flag and set the correct reset flag so CPQ is paused +during reset + +Fixes: 8498a30e1b94 ("RDMA/irdma: Register auxiliary driver and implement private channel OPs") +Link: https://lore.kernel.org/r/20210916191222.824-2-shiraz.saleem@intel.com +Reported-by: LiLiang +Signed-off-by: Sindhu Devale +Signed-off-by: Shiraz Saleem +Signed-off-by: Jason Gunthorpe +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/irdma/cm.c | 4 ++-- + drivers/infiniband/hw/irdma/hw.c | 6 +++--- + drivers/infiniband/hw/irdma/i40iw_if.c | 2 +- + drivers/infiniband/hw/irdma/main.h | 1 - + drivers/infiniband/hw/irdma/utils.c | 2 +- + drivers/infiniband/hw/irdma/verbs.c | 3 +-- + 6 files changed, 8 insertions(+), 10 deletions(-) + +diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c +index 6b62299abfbb..6dea0a49d171 100644 +--- a/drivers/infiniband/hw/irdma/cm.c ++++ b/drivers/infiniband/hw/irdma/cm.c +@@ -3496,7 +3496,7 @@ static void irdma_cm_disconn_true(struct irdma_qp *iwqp) + original_hw_tcp_state == IRDMA_TCP_STATE_TIME_WAIT || + last_ae == IRDMA_AE_RDMAP_ROE_BAD_LLP_CLOSE || + last_ae == IRDMA_AE_BAD_CLOSE || +- last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->reset)) { ++ last_ae == IRDMA_AE_LLP_CONNECTION_RESET || iwdev->rf->reset)) { + issue_close = 1; + iwqp->cm_id = NULL; + qp->term_flags = 0; +@@ -4250,7 +4250,7 @@ void irdma_cm_teardown_connections(struct irdma_device *iwdev, u32 *ipaddr, + teardown_entry); + attr.qp_state = IB_QPS_ERR; + irdma_modify_qp(&cm_node->iwqp->ibqp, &attr, IB_QP_STATE, NULL); +- if (iwdev->reset) ++ if (iwdev->rf->reset) + irdma_cm_disconn(cm_node->iwqp); + irdma_rem_ref_cm_node(cm_node); + } +diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c +index 00de5ee9a260..33c06a3a4f63 100644 +--- a/drivers/infiniband/hw/irdma/hw.c ++++ b/drivers/infiniband/hw/irdma/hw.c +@@ -1489,7 +1489,7 @@ void irdma_reinitialize_ieq(struct irdma_sc_vsi *vsi) + + irdma_puda_dele_rsrc(vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, false); + if (irdma_initialize_ieq(iwdev)) { +- iwdev->reset = true; ++ iwdev->rf->reset = true; + rf->gen_ops.request_reset(rf); + } + } +@@ -1632,13 +1632,13 @@ void irdma_rt_deinit_hw(struct irdma_device *iwdev) + case IEQ_CREATED: + if (!iwdev->roce_mode) + irdma_puda_dele_rsrc(&iwdev->vsi, IRDMA_PUDA_RSRC_TYPE_IEQ, +- iwdev->reset); ++ iwdev->rf->reset); + fallthrough; + case ILQ_CREATED: + if (!iwdev->roce_mode) + irdma_puda_dele_rsrc(&iwdev->vsi, + IRDMA_PUDA_RSRC_TYPE_ILQ, +- iwdev->reset); ++ iwdev->rf->reset); + break; + default: + ibdev_warn(&iwdev->ibdev, "bad init_state = %d\n", iwdev->init_state); +diff --git a/drivers/infiniband/hw/irdma/i40iw_if.c b/drivers/infiniband/hw/irdma/i40iw_if.c +index bddf88194d09..d219f64b2c3d 100644 +--- a/drivers/infiniband/hw/irdma/i40iw_if.c ++++ b/drivers/infiniband/hw/irdma/i40iw_if.c +@@ -55,7 +55,7 @@ static void i40iw_close(struct i40e_info *cdev_info, struct i40e_client *client, + + iwdev = to_iwdev(ibdev); + if (reset) +- iwdev->reset = true; ++ iwdev->rf->reset = true; + + iwdev->iw_status = 0; + irdma_port_ibevent(iwdev); +diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h +index 743d9e143a99..b678fe712447 100644 +--- a/drivers/infiniband/hw/irdma/main.h ++++ b/drivers/infiniband/hw/irdma/main.h +@@ -346,7 +346,6 @@ struct irdma_device { + bool roce_mode:1; + bool roce_dcqcn_en:1; + bool dcb:1; +- bool reset:1; + bool iw_ooo:1; + enum init_completion_state init_state; + +diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c +index 5bbe44e54f9a..832e9604766b 100644 +--- a/drivers/infiniband/hw/irdma/utils.c ++++ b/drivers/infiniband/hw/irdma/utils.c +@@ -2510,7 +2510,7 @@ void irdma_modify_qp_to_err(struct irdma_sc_qp *sc_qp) + struct irdma_qp *qp = sc_qp->qp_uk.back_qp; + struct ib_qp_attr attr; + +- if (qp->iwdev->reset) ++ if (qp->iwdev->rf->reset) + return; + attr.qp_state = IB_QPS_ERR; + +diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c +index 717147ed0519..6107f37321d2 100644 +--- a/drivers/infiniband/hw/irdma/verbs.c ++++ b/drivers/infiniband/hw/irdma/verbs.c +@@ -535,8 +535,7 @@ static int irdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) + irdma_qp_rem_ref(&iwqp->ibqp); + wait_for_completion(&iwqp->free_qp); + irdma_free_lsmm_rsrc(iwqp); +- if (!iwdev->reset) +- irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp); ++ irdma_cqp_qp_destroy_cmd(&iwdev->rf->sc_dev, &iwqp->sc_qp); + + if (!iwqp->user_mode) { + if (iwqp->iwscq) { +-- +2.33.0 + diff --git a/queue-5.14/rdma-irdma-validate-number-of-cq-entries-on-create-c.patch b/queue-5.14/rdma-irdma-validate-number-of-cq-entries-on-create-c.patch new file mode 100644 index 00000000000..f90b53ae6bd --- /dev/null +++ b/queue-5.14/rdma-irdma-validate-number-of-cq-entries-on-create-c.patch @@ -0,0 +1,37 @@ +From f143796a2a2bf976afe75090ff6d55b2b870d734 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 16 Sep 2021 14:12:20 -0500 +Subject: RDMA/irdma: Validate number of CQ entries on create CQ + +From: Sindhu Devale + +[ Upstream commit f4475f249445b3c1fb99919b0514a075b6d6b3d4 ] + +Add lower bound check for CQ entries at creation time. + +Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs") +Link: https://lore.kernel.org/r/20210916191222.824-3-shiraz.saleem@intel.com +Signed-off-by: Sindhu Devale +Signed-off-by: Shiraz Saleem +Signed-off-by: Jason Gunthorpe +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/irdma/verbs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c +index 6107f37321d2..6c3d28f744cb 100644 +--- a/drivers/infiniband/hw/irdma/verbs.c ++++ b/drivers/infiniband/hw/irdma/verbs.c +@@ -2040,7 +2040,7 @@ static int irdma_create_cq(struct ib_cq *ibcq, + /* Kmode allocations */ + int rsize; + +- if (entries > rf->max_cqe) { ++ if (entries < 1 || entries > rf->max_cqe) { + err_code = -EINVAL; + goto cq_free_rsrc; + } +-- +2.33.0 + diff --git a/queue-5.14/revert-block-bfq-honor-already-setup-queue-merges.patch b/queue-5.14/revert-block-bfq-honor-already-setup-queue-merges.patch new file mode 100644 index 00000000000..aa750bba2ca --- /dev/null +++ b/queue-5.14/revert-block-bfq-honor-already-setup-queue-merges.patch @@ -0,0 +1,68 @@ +From d2b71e55565b72888d766bbbd553f3cb25f59012 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 28 Sep 2021 06:33:15 -0600 +Subject: Revert "block, bfq: honor already-setup queue merges" + +From: Jens Axboe + +[ Upstream commit ebc69e897e17373fbe1daaff1debaa77583a5284 ] + +This reverts commit 2d52c58b9c9bdae0ca3df6a1eab5745ab3f7d80b. + +We have had several folks complain that this causes hangs for them, which +is especially problematic as the commit has also hit stable already. + +As no resolution seems to be forthcoming right now, revert the patch. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=214503 +Fixes: 2d52c58b9c9b ("block, bfq: honor already-setup queue merges") +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + block/bfq-iosched.c | 16 +++------------- + 1 file changed, 3 insertions(+), 13 deletions(-) + +diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c +index 3a1038b6eeb3..9360c65169ff 100644 +--- a/block/bfq-iosched.c ++++ b/block/bfq-iosched.c +@@ -2662,15 +2662,6 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) + * are likely to increase the throughput. + */ + bfqq->new_bfqq = new_bfqq; +- /* +- * The above assignment schedules the following redirections: +- * each time some I/O for bfqq arrives, the process that +- * generated that I/O is disassociated from bfqq and +- * associated with new_bfqq. Here we increases new_bfqq->ref +- * in advance, adding the number of processes that are +- * expected to be associated with new_bfqq as they happen to +- * issue I/O. +- */ + new_bfqq->ref += process_refs; + return new_bfqq; + } +@@ -2733,10 +2724,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + { + struct bfq_queue *in_service_bfqq, *new_bfqq; + +- /* if a merge has already been setup, then proceed with that first */ +- if (bfqq->new_bfqq) +- return bfqq->new_bfqq; +- + /* + * Check delayed stable merge for rotational or non-queueing + * devs. For this branch to be executed, bfqq must not be +@@ -2838,6 +2825,9 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, + if (bfq_too_late_for_merging(bfqq)) + return NULL; + ++ if (bfqq->new_bfqq) ++ return bfqq->new_bfqq; ++ + if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) + return NULL; + +-- +2.33.0 + diff --git a/queue-5.14/revert-mac80211-do-not-use-low-data-rates-for-data-f.patch b/queue-5.14/revert-mac80211-do-not-use-low-data-rates-for-data-f.patch new file mode 100644 index 00000000000..e277c3e6f0a --- /dev/null +++ b/queue-5.14/revert-mac80211-do-not-use-low-data-rates-for-data-f.patch @@ -0,0 +1,48 @@ +From 435860ed31fd37636ac37a7f6eda2033f1d11106 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 6 Sep 2021 10:35:59 +0200 +Subject: Revert "mac80211: do not use low data rates for data frames with no + ack flag" + +From: Felix Fietkau + +[ Upstream commit 98d46b021f6ee246c7a73f9d490d4cddb4511a3b ] + +This reverts commit d333322361e7 ("mac80211: do not use low data rates for +data frames with no ack flag"). + +Returning false early in rate_control_send_low breaks sending broadcast +packets, since rate control will not select a rate for it. + +Before re-introducing a fixed version of this patch, we should probably also +make some changes to rate control to be more conservative in selecting rates +for no-ack packets and also prevent using probing rates on them, since we won't +get any feedback. + +Fixes: d333322361e7 ("mac80211: do not use low data rates for data frames with no ack flag") +Signed-off-by: Felix Fietkau +Link: https://lore.kernel.org/r/20210906083559.9109-1-nbd@nbd.name +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/rate.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c +index e5935e3d7a07..8c6416129d5b 100644 +--- a/net/mac80211/rate.c ++++ b/net/mac80211/rate.c +@@ -392,10 +392,6 @@ static bool rate_control_send_low(struct ieee80211_sta *pubsta, + int mcast_rate; + bool use_basicrate = false; + +- if (ieee80211_is_tx_data(txrc->skb) && +- info->flags & IEEE80211_TX_CTL_NO_ACK) +- return false; +- + if (!pubsta || rc_no_data_or_no_ack_use_min(txrc)) { + __rate_control_send_low(txrc->hw, sband, pubsta, info, + txrc->rate_idx_mask); +-- +2.33.0 + diff --git a/queue-5.14/sched-fair-add-ancestors-of-unthrottled-undecayed-cf.patch b/queue-5.14/sched-fair-add-ancestors-of-unthrottled-undecayed-cf.patch new file mode 100644 index 00000000000..b9e8494d0ea --- /dev/null +++ b/queue-5.14/sched-fair-add-ancestors-of-unthrottled-undecayed-cf.patch @@ -0,0 +1,59 @@ +From 829b2d95a87e9c0327337b33387bf842ca207a38 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 Sep 2021 17:30:37 +0200 +Subject: sched/fair: Add ancestors of unthrottled undecayed cfs_rq +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Michal Koutný + +[ Upstream commit 2630cde26711dab0d0b56a8be1616475be646d13 ] + +Since commit a7b359fc6a37 ("sched/fair: Correctly insert cfs_rq's to +list on unthrottle") we add cfs_rqs with no runnable tasks but not fully +decayed into the load (leaf) list. We may ignore adding some ancestors +and therefore breaking tmp_alone_branch invariant. This broke LTP test +cfs_bandwidth01 and it was partially fixed in commit fdaba61ef8a2 +("sched/fair: Ensure that the CFS parent is added after unthrottling"). + +I noticed the named test still fails even with the fix (but with low +probability, 1 in ~1000 executions of the test). The reason is when +bailing out of unthrottle_cfs_rq early, we may miss adding ancestors of +the unthrottled cfs_rq, thus, not joining tmp_alone_branch properly. + +Fix this by adding ancestors if we notice the unthrottled cfs_rq was +added to the load list. + +Fixes: a7b359fc6a37 ("sched/fair: Correctly insert cfs_rq's to list on unthrottle") +Signed-off-by: Michal Koutný +Signed-off-by: Peter Zijlstra (Intel) +Reviewed-by: Vincent Guittot +Reviewed-by: Odin Ugedal +Link: https://lore.kernel.org/r/20210917153037.11176-1-mkoutny@suse.com +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 30a6984a58f7..423ec671a306 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -4898,8 +4898,12 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) + /* update hierarchical throttle state */ + walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq); + +- if (!cfs_rq->load.weight) ++ /* Nothing to run but something to decay (on_list)? Complete the branch */ ++ if (!cfs_rq->load.weight) { ++ if (cfs_rq->on_list) ++ goto unthrottle_throttle; + return; ++ } + + task_delta = cfs_rq->h_nr_running; + idle_task_delta = cfs_rq->idle_h_nr_running; +-- +2.33.0 + diff --git a/queue-5.14/sched-fair-null-terminate-buffer-when-updating-tunab.patch b/queue-5.14/sched-fair-null-terminate-buffer-when-updating-tunab.patch new file mode 100644 index 00000000000..01bfbb0850f --- /dev/null +++ b/queue-5.14/sched-fair-null-terminate-buffer-when-updating-tunab.patch @@ -0,0 +1,70 @@ +From 241f7fa422b24c4b9267acb8f620a971c7703310 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Sep 2021 12:46:35 +0100 +Subject: sched/fair: Null terminate buffer when updating tunable_scaling + +From: Mel Gorman + +[ Upstream commit 703066188f63d66cc6b9d678e5b5ef1213c5938e ] + +This patch null-terminates the temporary buffer in sched_scaling_write() +so kstrtouint() does not return failure and checks the value is valid. + +Before: + $ cat /sys/kernel/debug/sched/tunable_scaling + 1 + $ echo 0 > /sys/kernel/debug/sched/tunable_scaling + -bash: echo: write error: Invalid argument + $ cat /sys/kernel/debug/sched/tunable_scaling + 1 + +After: + $ cat /sys/kernel/debug/sched/tunable_scaling + 1 + $ echo 0 > /sys/kernel/debug/sched/tunable_scaling + $ cat /sys/kernel/debug/sched/tunable_scaling + 0 + $ echo 3 > /sys/kernel/debug/sched/tunable_scaling + -bash: echo: write error: Invalid argument + +Fixes: 8a99b6833c88 ("sched: Move SCHED_DEBUG sysctl to debugfs") +Signed-off-by: Mel Gorman +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Vincent Guittot +Link: https://lore.kernel.org/r/20210927114635.GH3959@techsingularity.net +Signed-off-by: Sasha Levin +--- + kernel/sched/debug.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c +index 7e08e3d947c2..2c879cd02a5f 100644 +--- a/kernel/sched/debug.c ++++ b/kernel/sched/debug.c +@@ -173,16 +173,22 @@ static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) + { + char buf[16]; ++ unsigned int scaling; + + if (cnt > 15) + cnt = 15; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; ++ buf[cnt] = '\0'; + +- if (kstrtouint(buf, 10, &sysctl_sched_tunable_scaling)) ++ if (kstrtouint(buf, 10, &scaling)) + return -EINVAL; + ++ if (scaling >= SCHED_TUNABLESCALING_END) ++ return -EINVAL; ++ ++ sysctl_sched_tunable_scaling = scaling; + if (sched_update_scaling()) + return -EINVAL; + +-- +2.33.0 + diff --git a/queue-5.14/scsi-csiostor-add-module-softdep-on-cxgb4.patch b/queue-5.14/scsi-csiostor-add-module-softdep-on-cxgb4.patch new file mode 100644 index 00000000000..93e19f0913e --- /dev/null +++ b/queue-5.14/scsi-csiostor-add-module-softdep-on-cxgb4.patch @@ -0,0 +1,44 @@ +From 5464d0aa372c8eae8fbbf165063823c9c8b6cf8d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Sep 2021 21:44:08 +0530 +Subject: scsi: csiostor: Add module softdep on cxgb4 + +From: Rahul Lakkireddy + +[ Upstream commit 79a7482249a7353bc86aff8127954d5febf02472 ] + +Both cxgb4 and csiostor drivers run on their own independent Physical +Function. But when cxgb4 and csiostor are both being loaded in parallel via +modprobe, there is a race when firmware upgrade is attempted by both the +drivers. + +When the cxgb4 driver initiates the firmware upgrade, it halts the firmware +and the chip until upgrade is complete. When the csiostor driver is coming +up in parallel, the firmware mailbox communication fails with timeouts and +the csiostor driver probe fails. + +Add a module soft dependency on cxgb4 driver to ensure loading csiostor +triggers cxgb4 to load first when available to avoid the firmware upgrade +race. + +Link: https://lore.kernel.org/r/1632759248-15382-1-git-send-email-rahul.lakkireddy@chelsio.com +Fixes: a3667aaed569 ("[SCSI] csiostor: Chelsio FCoE offload driver") +Signed-off-by: Rahul Lakkireddy +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/csiostor/csio_init.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c +index 390b07bf92b9..ccbded3353bd 100644 +--- a/drivers/scsi/csiostor/csio_init.c ++++ b/drivers/scsi/csiostor/csio_init.c +@@ -1254,3 +1254,4 @@ MODULE_DEVICE_TABLE(pci, csio_pci_tbl); + MODULE_VERSION(CSIO_DRV_VERSION); + MODULE_FIRMWARE(FW_FNAME_T5); + MODULE_FIRMWARE(FW_FNAME_T6); ++MODULE_SOFTDEP("pre: cxgb4"); +-- +2.33.0 + diff --git a/queue-5.14/sctp-break-out-if-skb_header_pointer-returns-null-in.patch b/queue-5.14/sctp-break-out-if-skb_header_pointer-returns-null-in.patch new file mode 100644 index 00000000000..51869348776 --- /dev/null +++ b/queue-5.14/sctp-break-out-if-skb_header_pointer-returns-null-in.patch @@ -0,0 +1,55 @@ +From 3a330d33c4f614b88bfc2731a6413f4b28154328 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Sep 2021 00:05:04 -0400 +Subject: sctp: break out if skb_header_pointer returns NULL in sctp_rcv_ootb + +From: Xin Long + +[ Upstream commit f7e745f8e94492a8ac0b0a26e25f2b19d342918f ] + +We should always check if skb_header_pointer's return is NULL before +using it, otherwise it may cause null-ptr-deref, as syzbot reported: + + KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] + RIP: 0010:sctp_rcv_ootb net/sctp/input.c:705 [inline] + RIP: 0010:sctp_rcv+0x1d84/0x3220 net/sctp/input.c:196 + Call Trace: + + sctp6_rcv+0x38/0x60 net/sctp/ipv6.c:1109 + ip6_protocol_deliver_rcu+0x2e9/0x1ca0 net/ipv6/ip6_input.c:422 + ip6_input_finish+0x62/0x170 net/ipv6/ip6_input.c:463 + NF_HOOK include/linux/netfilter.h:307 [inline] + NF_HOOK include/linux/netfilter.h:301 [inline] + ip6_input+0x9c/0xd0 net/ipv6/ip6_input.c:472 + dst_input include/net/dst.h:460 [inline] + ip6_rcv_finish net/ipv6/ip6_input.c:76 [inline] + NF_HOOK include/linux/netfilter.h:307 [inline] + NF_HOOK include/linux/netfilter.h:301 [inline] + ipv6_rcv+0x28c/0x3c0 net/ipv6/ip6_input.c:297 + +Fixes: 3acb50c18d8d ("sctp: delay as much as possible skb_linearize") +Reported-by: syzbot+581aff2ae6b860625116@syzkaller.appspotmail.com +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sctp/input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/sctp/input.c b/net/sctp/input.c +index 5ef86fdb1176..1f1786021d9c 100644 +--- a/net/sctp/input.c ++++ b/net/sctp/input.c +@@ -702,7 +702,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb) + ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch); + + /* Break out if chunk length is less then minimal. */ +- if (ntohs(ch->length) < sizeof(_ch)) ++ if (!ch || ntohs(ch->length) < sizeof(_ch)) + break; + + ch_end = offset + SCTP_PAD4(ntohs(ch->length)); +-- +2.33.0 + diff --git a/queue-5.14/selftests-bpf-fix-makefile-dependencies-on-libbpf.patch b/queue-5.14/selftests-bpf-fix-makefile-dependencies-on-libbpf.patch new file mode 100644 index 00000000000..59c01a3c631 --- /dev/null +++ b/queue-5.14/selftests-bpf-fix-makefile-dependencies-on-libbpf.patch @@ -0,0 +1,57 @@ +From 491f4f9b5381974f5b920ee50da3435970a46cd3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Sep 2021 18:01:36 +0200 +Subject: selftests, bpf: Fix makefile dependencies on libbpf + +From: Jiri Benc + +[ Upstream commit d888eaac4fb1df30320bb1305a8f78efe86524c6 ] + +When building bpf selftest with make -j, I'm randomly getting build failures +such as this one: + + In file included from progs/bpf_flow.c:19: + [...]/tools/testing/selftests/bpf/tools/include/bpf/bpf_helpers.h:11:10: fatal error: 'bpf_helper_defs.h' file not found + #include "bpf_helper_defs.h" + ^~~~~~~~~~~~~~~~~~~ + +The file that fails the build varies between runs but it's always in the +progs/ subdir. + +The reason is a missing make dependency on libbpf for the .o files in +progs/. There was a dependency before commit 3ac2e20fba07e but that commit +removed it to prevent unneeded rebuilds. However, that only works if libbpf +has been built already; the 'wildcard' prerequisite does not trigger when +there's no bpf_helper_defs.h generated yet. + +Keep the libbpf as an order-only prerequisite to satisfy both goals. It is +always built before the progs/ objects but it does not trigger unnecessary +rebuilds by itself. + +Fixes: 3ac2e20fba07e ("selftests/bpf: BPF object files should depend only on libbpf headers") +Signed-off-by: Jiri Benc +Signed-off-by: Andrii Nakryiko +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/ee84ab66436fba05a197f952af23c98d90eb6243.1632758415.git.jbenc@redhat.com +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/bpf/Makefile | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile +index f405b20c1e6c..93f1f124ef89 100644 +--- a/tools/testing/selftests/bpf/Makefile ++++ b/tools/testing/selftests/bpf/Makefile +@@ -374,7 +374,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ + $(TRUNNER_BPF_PROGS_DIR)/%.c \ + $(TRUNNER_BPF_PROGS_DIR)/*.h \ + $$(INCLUDE_DIR)/vmlinux.h \ +- $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT) ++ $(wildcard $(BPFDIR)/bpf_*.h) \ ++ | $(TRUNNER_OUTPUT) $$(BPFOBJ) + $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ + $(TRUNNER_BPF_CFLAGS)) + +-- +2.33.0 + diff --git a/queue-5.14/selftests-bpf-test_lwt_ip_encap-really-disable-rp_fi.patch b/queue-5.14/selftests-bpf-test_lwt_ip_encap-really-disable-rp_fi.patch new file mode 100644 index 00000000000..c1d3bed5d5a --- /dev/null +++ b/queue-5.14/selftests-bpf-test_lwt_ip_encap-really-disable-rp_fi.patch @@ -0,0 +1,57 @@ +From 792f075a1da9b8568e16b8e01882303b840ad988 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Sep 2021 10:40:22 +0200 +Subject: selftests, bpf: test_lwt_ip_encap: Really disable rp_filter + +From: Jiri Benc + +[ Upstream commit 79e2c306667542b8ee2d9a9d947eadc7039f0a3c ] + +It's not enough to set net.ipv4.conf.all.rp_filter=0, that does not override +a greater rp_filter value on the individual interfaces. We also need to set +net.ipv4.conf.default.rp_filter=0 before creating the interfaces. That way, +they'll also get their own rp_filter value of zero. + +Fixes: 0fde56e4385b0 ("selftests: bpf: add test_lwt_ip_encap selftest") +Signed-off-by: Jiri Benc +Signed-off-by: Daniel Borkmann +Link: https://lore.kernel.org/bpf/b1cdd9d469f09ea6e01e9c89a6071c79b7380f89.1632386362.git.jbenc@redhat.com +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/bpf/test_lwt_ip_encap.sh | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +index 59ea56945e6c..b497bb85b667 100755 +--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh ++++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +@@ -112,6 +112,14 @@ setup() + ip netns add "${NS2}" + ip netns add "${NS3}" + ++ # rp_filter gets confused by what these tests are doing, so disable it ++ ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 ++ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 ++ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 ++ ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0 ++ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0 ++ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0 ++ + ip link add veth1 type veth peer name veth2 + ip link add veth3 type veth peer name veth4 + ip link add veth5 type veth peer name veth6 +@@ -236,11 +244,6 @@ setup() + ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF} + ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF} + +- # rp_filter gets confused by what these tests are doing, so disable it +- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 +- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 +- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 +- + TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX) + + sleep 1 # reduce flakiness +-- +2.33.0 + diff --git a/queue-5.14/series b/queue-5.14/series index 6997e2adba0..53c961d9587 100644 --- a/queue-5.14/series +++ b/queue-5.14/series @@ -71,3 +71,74 @@ drm-amd-display-fix-display-flicker-on-embedded-panels.patch drm-amdgpu-force-exit-gfxoff-on-sdma-resume-for-rmb-s0ix.patch drm-amdgpu-check-tiling-flags-when-creating-fb-on-gfx8.patch drm-amdgpu-correct-initial-cp_hqd_quantum-for-gfx9.patch +interconnect-qcom-sdm660-fix-id-of-slv_cnoc_mnoc_cfg.patch +interconnect-qcom-sdm660-correct-noc_qos_priority-sh.patch +drm-i915-gvt-fix-the-usage-of-ww-lock-in-gvt-schedul.patch +ipvs-check-that-ip_vs_conn_tab_bits-is-between-8-and.patch +bpf-handle-return-value-of-bpf_prog_type_struct_ops-.patch +ib-cma-do-not-send-igmp-leaves-for-sendonly-multicas.patch +rdma-cma-fix-listener-leak-in-rdma_cma_listen_on_all.patch +bpf-mips-validate-conditional-branch-offsets.patch +hwmon-mlxreg-fan-return-non-zero-value-when-fan-curr.patch +rdma-irdma-skip-cqp-ring-during-a-reset.patch +rdma-irdma-validate-number-of-cq-entries-on-create-c.patch +rdma-irdma-report-correct-wc-error-when-transport-re.patch +rdma-irdma-report-correct-wc-error-when-there-are-mw.patch +netfilter-nf_tables-unlink-table-before-deleting-it.patch +netfilter-log-work-around-missing-softdep-backend-mo.patch +revert-mac80211-do-not-use-low-data-rates-for-data-f.patch +mac80211-fix-ieee80211_amsdu_aggregate-frag_tail-bug.patch +mac80211-limit-injected-vht-mcs-nss-in-ieee80211_par.patch +mac80211-mesh-fix-potentially-unaligned-access.patch +mac80211-hwsim-fix-late-beacon-hrtimer-handling.patch +driver-core-fw_devlink-add-support-for-fwnode_flag_n.patch +net-mdiobus-set-fwnode_flag_needs_child_bound_on_add.patch +sctp-break-out-if-skb_header_pointer-returns-null-in.patch +mptcp-don-t-return-sockets-in-foreign-netns.patch +mptcp-allow-changing-the-backup-bit-when-no-sockets-.patch +rdma-hns-work-around-broken-constant-propagation-in-.patch +hwmon-tmp421-report-pvld-condition-as-fault.patch +hwmon-tmp421-fix-rounding-for-negative-values.patch +net-enetc-fix-the-incorrect-clearing-of-if_mode-bits.patch +net-ipv4-fix-rtnexthop-len-when-rta_flow-is-present.patch +smsc95xx-fix-stalled-rx-after-link-change.patch +drm-i915-request-fix-early-tracepoints.patch +drm-i915-remove-warning-from-the-rps-worker.patch +dsa-mv88e6xxx-6161-use-chip-wide-max-mtu.patch +dsa-mv88e6xxx-fix-mtu-definition.patch +dsa-mv88e6xxx-include-tagger-overhead-when-setting-m.patch +e100-fix-length-calculation-in-e100_get_regs_len.patch +e100-fix-buffer-overrun-in-e100_get_regs.patch +rdma-hfi1-fix-kernel-pointer-leak.patch +rdma-hns-fix-the-size-setting-error-when-copying-cqe.patch +rdma-hns-add-the-check-of-the-cqe-size-of-the-user-s.patch +bpf-exempt-cap_bpf-from-checks-against-bpf_jit_limit.patch +libbpf-fix-segfault-in-static-linker-for-objects-wit.patch +selftests-bpf-fix-makefile-dependencies-on-libbpf.patch +selftests-bpf-test_lwt_ip_encap-really-disable-rp_fi.patch +bpf-x86-fix-bpf-mapping-of-atomic-fetch-implementati.patch +net-ks8851-fix-link-error.patch +ionic-fix-gathering-of-debug-stats.patch +revert-block-bfq-honor-already-setup-queue-merges.patch +scsi-csiostor-add-module-softdep-on-cxgb4.patch +ixgbe-fix-null-pointer-dereference-in-ixgbe_xdp_setu.patch +net-hns3-do-not-allow-call-hns3_nic_net_open-repeate.patch +net-hns3-remove-tc-enable-checking.patch +net-hns3-don-t-rollback-when-destroy-mqprio-fail.patch +net-hns3-fix-mixed-flag-hclge_flag_mqprio_enable-and.patch +net-hns3-fix-show-wrong-state-when-add-existing-uc-m.patch +net-hns3-reconstruct-function-hns3_self_test.patch +net-hns3-fix-always-enable-rx-vlan-filter-problem-af.patch +net-hns3-disable-firmware-compatible-features-when-u.patch +net-phy-bcm7xxx-fixed-indirect-mmd-operations.patch +net-introduce-and-use-lock_sock_fast_nested.patch +net-sched-flower-protect-fl_walk-with-rcu.patch +net-stmmac-fix-eee-init-issue-when-paired-with-eee-c.patch +af_unix-fix-races-in-sk_peer_pid-and-sk_peer_cred-ac.patch +objtool-teach-get_alt_entry-about-more-relocation-ty.patch +perf-x86-intel-update-event-constraints-for-icx.patch +sched-fair-add-ancestors-of-unthrottled-undecayed-cf.patch +sched-fair-null-terminate-buffer-when-updating-tunab.patch +hwmon-occ-fix-p10-vrm-temp-sensors.patch +hwmon-pmbus-mp2975-add-missed-pout-attribute-for-pag.patch +kvm-fix-objtool-relocation-warning.patch diff --git a/queue-5.14/smsc95xx-fix-stalled-rx-after-link-change.patch b/queue-5.14/smsc95xx-fix-stalled-rx-after-link-change.patch new file mode 100644 index 00000000000..c611d1c89b4 --- /dev/null +++ b/queue-5.14/smsc95xx-fix-stalled-rx-after-link-change.patch @@ -0,0 +1,41 @@ +From fbeaed384acc580a09bb68de923c0dcadf96380e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 Sep 2021 01:00:16 +0300 +Subject: smsc95xx: fix stalled rx after link change + +From: Aaro Koskinen + +[ Upstream commit 5ab8a447bcfee1ded709e7ff5dc7608ca9f66ae2 ] + +After commit 05b35e7eb9a1 ("smsc95xx: add phylib support"), link changes +are no longer propagated to usbnet. As a result, rx URB allocation won't +happen until there is a packet sent out first (this might never happen, +e.g. running just ssh server with a static IP). Fix by triggering usbnet +EVENT_LINK_CHANGE. + +Fixes: 05b35e7eb9a1 ("smsc95xx: add phylib support") +Signed-off-by: Aaro Koskinen +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/usb/smsc95xx.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c +index 4c8ee1cff4d4..4cb71dd1998c 100644 +--- a/drivers/net/usb/smsc95xx.c ++++ b/drivers/net/usb/smsc95xx.c +@@ -1178,7 +1178,10 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf) + + static void smsc95xx_handle_link_change(struct net_device *net) + { ++ struct usbnet *dev = netdev_priv(net); ++ + phy_print_status(net->phydev); ++ usbnet_defer_kevent(dev, EVENT_LINK_CHANGE); + } + + static int smsc95xx_start_phy(struct usbnet *dev) +-- +2.33.0 +