From: Greg Kroah-Hartman Date: Mon, 10 Aug 2020 12:07:31 +0000 (+0200) Subject: 5.7-stable patches X-Git-Tag: v4.19.139~21 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=339b401a6db1de5a23481e27c18490a7950d94a3;p=thirdparty%2Fkernel%2Fstable-queue.git 5.7-stable patches added patches: appletalk-fix-atalk_proc_init-return-path.patch devlink-ignore-eopnotsupp-errors-on-dumpit.patch dpaa2-eth-fix-passing-zero-to-ptr_err-warning.patch hv_netvsc-do-not-use-vf-device-if-link-is-down.patch ipv4-silence-suspicious-rcu-usage-warning.patch ipv6-fix-memory-leaks-on-ipv6_addrform-path.patch ipv6-fix-nexthop-refcnt-leak-when-creating-ipv6-route-info.patch mptcp-be-careful-on-subflow-creation.patch mptcp-fix-bogus-sendmsg-return-code-under-pressure.patch net-bridge-clear-bridge-s-private-skb-space-on-xmit.patch net-gre-recompute-gre-csum-for-sctp-over-gre-tunnels.patch net-lan78xx-replace-bogus-endpoint-lookup.patch net-macb-properly-handle-phylink-on-at91sam9x.patch net-mvpp2-fix-memory-leak-in-mvpp2_rx.patch net-sched-act_ct-fix-miss-set-mru-for-ovs-after-defrag-in-act_ct.patch net-thunderx-use-spin_lock_bh-in-nicvf_set_rx_mode_task.patch openvswitch-prevent-kernel-infoleak-in-ovs_ct_put_key.patch pci-tegra-revert-tegra124-raw_violation_fixup.patch revert-powerpc-kasan-fix-shadow-pages-allocation-failure.patch revert-vxlan-fix-tos-value-before-xmit.patch rhashtable-restore-rcu-marking-on-rhash_lock_head.patch rxrpc-fix-race-between-recvmsg-and-sendmsg-on-immediate-call-failure.patch selftests-net-relax-cpu-affinity-requirement-in-msg_zerocopy-test.patch tcp-apply-a-floor-of-1-for-rtt-samples-from-tcp-timestamps.patch vxlan-ensure-fdb-dump-is-performed-under-rcu.patch xattr-break-delegations-in-set-remove-xattr.patch --- diff --git a/queue-5.7/appletalk-fix-atalk_proc_init-return-path.patch b/queue-5.7/appletalk-fix-atalk_proc_init-return-path.patch new file mode 100644 index 00000000000..0e4d16fca93 --- /dev/null +++ b/queue-5.7/appletalk-fix-atalk_proc_init-return-path.patch @@ -0,0 +1,39 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Vincent Duvert +Date: Sun, 2 Aug 2020 07:06:51 +0200 +Subject: appletalk: Fix atalk_proc_init() return path + +From: Vincent Duvert + +[ Upstream commit d0f6ba2ef2c1c95069509e71402e7d6d43452512 ] + +Add a missing return statement to atalk_proc_init so it doesn't return +-ENOMEM when successful. This allows the appletalk module to load +properly. + +Fixes: e2bcd8b0ce6e ("appletalk: use remove_proc_subtree to simplify procfs code") +Link: https://www.downtowndougbrown.com/2020/08/hacking-up-a-fix-for-the-broken-appletalk-kernel-module-in-linux-5-1-and-newer/ +Reported-by: Christopher KOBAYASHI +Reported-by: Doug Brown +Signed-off-by: Vincent Duvert +[lukas: add missing tags] +Signed-off-by: Lukas Wunner +Cc: stable@vger.kernel.org # v5.1+ +Cc: Yue Haibing +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/appletalk/atalk_proc.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/appletalk/atalk_proc.c ++++ b/net/appletalk/atalk_proc.c +@@ -229,6 +229,8 @@ int __init atalk_proc_init(void) + sizeof(struct aarp_iter_state), NULL)) + goto out; + ++ return 0; ++ + out: + remove_proc_subtree("atalk", init_net.proc_net); + return -ENOMEM; diff --git a/queue-5.7/devlink-ignore-eopnotsupp-errors-on-dumpit.patch b/queue-5.7/devlink-ignore-eopnotsupp-errors-on-dumpit.patch new file mode 100644 index 00000000000..472c5822855 --- /dev/null +++ b/queue-5.7/devlink-ignore-eopnotsupp-errors-on-dumpit.patch @@ -0,0 +1,101 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Jakub Kicinski +Date: Tue, 28 Jul 2020 16:15:07 -0700 +Subject: devlink: ignore -EOPNOTSUPP errors on dumpit + +From: Jakub Kicinski + +[ Upstream commit 82274d075536322368ce710b211c41c37c4740b9 ] + +Number of .dumpit functions try to ignore -EOPNOTSUPP errors. +Recent change missed that, and started reporting all errors +but -EMSGSIZE back from dumps. This leads to situation like +this: + +$ devlink dev info +devlink answers: Operation not supported + +Dump should not report an error just because the last device +to be queried could not provide an answer. + +To fix this and avoid similar confusion make sure we clear +err properly, and not leave it set to an error if we don't +terminate the iteration. + +Fixes: c62c2cfb801b ("net: devlink: don't ignore errors during dumpit") +Signed-off-by: Jakub Kicinski +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/devlink.c | 24 ++++++++++++++++++------ + 1 file changed, 18 insertions(+), 6 deletions(-) + +--- a/net/core/devlink.c ++++ b/net/core/devlink.c +@@ -1065,7 +1065,9 @@ static int devlink_nl_cmd_sb_pool_get_du + devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); +- if (err && err != -EOPNOTSUPP) { ++ if (err == -EOPNOTSUPP) { ++ err = 0; ++ } else if (err) { + mutex_unlock(&devlink->lock); + goto out; + } +@@ -1266,7 +1268,9 @@ static int devlink_nl_cmd_sb_port_pool_g + devlink, devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); +- if (err && err != -EOPNOTSUPP) { ++ if (err == -EOPNOTSUPP) { ++ err = 0; ++ } else if (err) { + mutex_unlock(&devlink->lock); + goto out; + } +@@ -1498,7 +1502,9 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpi + devlink_sb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq); +- if (err && err != -EOPNOTSUPP) { ++ if (err == -EOPNOTSUPP) { ++ err = 0; ++ } else if (err) { + mutex_unlock(&devlink->lock); + goto out; + } +@@ -3299,7 +3305,9 @@ static int devlink_nl_cmd_param_get_dump + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); +- if (err && err != -EOPNOTSUPP) { ++ if (err == -EOPNOTSUPP) { ++ err = 0; ++ } else if (err) { + mutex_unlock(&devlink->lock); + goto out; + } +@@ -3569,7 +3577,9 @@ static int devlink_nl_cmd_port_param_get + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); +- if (err && err != -EOPNOTSUPP) { ++ if (err == -EOPNOTSUPP) { ++ err = 0; ++ } else if (err) { + mutex_unlock(&devlink->lock); + goto out; + } +@@ -4479,7 +4489,9 @@ static int devlink_nl_cmd_info_get_dumpi + cb->nlh->nlmsg_seq, NLM_F_MULTI, + cb->extack); + mutex_unlock(&devlink->lock); +- if (err && err != -EOPNOTSUPP) ++ if (err == -EOPNOTSUPP) ++ err = 0; ++ else if (err) + break; + idx++; + } diff --git a/queue-5.7/dpaa2-eth-fix-passing-zero-to-ptr_err-warning.patch b/queue-5.7/dpaa2-eth-fix-passing-zero-to-ptr_err-warning.patch new file mode 100644 index 00000000000..5bd10137c85 --- /dev/null +++ b/queue-5.7/dpaa2-eth-fix-passing-zero-to-ptr_err-warning.patch @@ -0,0 +1,47 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: YueHaibing +Date: Tue, 4 Aug 2020 21:26:43 +0800 +Subject: dpaa2-eth: Fix passing zero to 'PTR_ERR' warning + +From: YueHaibing + +[ Upstream commit 02afa9c66bb954c6959877c70d9e128dcf0adce7 ] + +Fix smatch warning: + +drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c:2419 + alloc_channel() warn: passing zero to 'ERR_PTR' + +setup_dpcon() should return ERR_PTR(err) instead of zero in error +handling case. + +Fixes: d7f5a9d89a55 ("dpaa2-eth: defer probe on object allocate") +Signed-off-by: YueHaibing +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c ++++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +@@ -2120,7 +2120,7 @@ close: + free: + fsl_mc_object_free(dpcon); + +- return NULL; ++ return ERR_PTR(err); + } + + static void free_dpcon(struct dpaa2_eth_priv *priv, +@@ -2144,8 +2144,8 @@ alloc_channel(struct dpaa2_eth_priv *pri + return NULL; + + channel->dpcon = setup_dpcon(priv); +- if (IS_ERR_OR_NULL(channel->dpcon)) { +- err = PTR_ERR_OR_ZERO(channel->dpcon); ++ if (IS_ERR(channel->dpcon)) { ++ err = PTR_ERR(channel->dpcon); + goto err_setup; + } + diff --git a/queue-5.7/hv_netvsc-do-not-use-vf-device-if-link-is-down.patch b/queue-5.7/hv_netvsc-do-not-use-vf-device-if-link-is-down.patch new file mode 100644 index 00000000000..810209c40cb --- /dev/null +++ b/queue-5.7/hv_netvsc-do-not-use-vf-device-if-link-is-down.patch @@ -0,0 +1,45 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Stephen Hemminger +Date: Tue, 4 Aug 2020 09:54:15 -0700 +Subject: hv_netvsc: do not use VF device if link is down + +From: Stephen Hemminger + +[ Upstream commit 7c9864bbccc23e1812ac82966555d68c13ea4006 ] + +If the accelerated networking SRIOV VF device has lost carrier +use the synthetic network device which is available as backup +path. This is a rare case since if VF link goes down, normally +the VMBus device will also loose external connectivity as well. +But if the communication is between two VM's on the same host +the VMBus device will still work. + +Reported-by: "Shah, Ashish N" +Fixes: 0c195567a8f6 ("netvsc: transparent VF management") +Signed-off-by: Stephen Hemminger +Reviewed-by: Haiyang Zhang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -532,12 +532,13 @@ static int netvsc_xmit(struct sk_buff *s + u32 hash; + struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT]; + +- /* if VF is present and up then redirect packets +- * already called with rcu_read_lock_bh ++ /* If VF is present and up then redirect packets to it. ++ * Skip the VF if it is marked down or has no carrier. ++ * If netpoll is in uses, then VF can not be used either. + */ + vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev); + if (vf_netdev && netif_running(vf_netdev) && +- !netpoll_tx_running(net)) ++ netif_carrier_ok(vf_netdev) && !netpoll_tx_running(net)) + return netvsc_vf_xmit(net, vf_netdev, skb); + + /* We will atmost need two pages to describe the rndis diff --git a/queue-5.7/ipv4-silence-suspicious-rcu-usage-warning.patch b/queue-5.7/ipv4-silence-suspicious-rcu-usage-warning.patch new file mode 100644 index 00000000000..f08e294c4c3 --- /dev/null +++ b/queue-5.7/ipv4-silence-suspicious-rcu-usage-warning.patch @@ -0,0 +1,80 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Ido Schimmel +Date: Wed, 29 Jul 2020 11:37:13 +0300 +Subject: ipv4: Silence suspicious RCU usage warning + +From: Ido Schimmel + +[ Upstream commit 83f3522860f702748143e022f1a546547314c715 ] + +fib_trie_unmerge() is called with RTNL held, but not from an RCU +read-side critical section. This leads to the following warning [1] when +the FIB alias list in a leaf is traversed with +hlist_for_each_entry_rcu(). + +Since the function is always called with RTNL held and since +modification of the list is protected by RTNL, simply use +hlist_for_each_entry() and silence the warning. + +[1] +WARNING: suspicious RCU usage +5.8.0-rc4-custom-01520-gc1f937f3f83b #30 Not tainted +----------------------------- +net/ipv4/fib_trie.c:1867 RCU-list traversed in non-reader section!! + +other info that might help us debug this: + +rcu_scheduler_active = 2, debug_locks = 1 +1 lock held by ip/164: + #0: ffffffff85a27850 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x49a/0xbd0 + +stack backtrace: +CPU: 0 PID: 164 Comm: ip Not tainted 5.8.0-rc4-custom-01520-gc1f937f3f83b #30 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 +Call Trace: + dump_stack+0x100/0x184 + lockdep_rcu_suspicious+0x153/0x15d + fib_trie_unmerge+0x608/0xdb0 + fib_unmerge+0x44/0x360 + fib4_rule_configure+0xc8/0xad0 + fib_nl_newrule+0x37a/0x1dd0 + rtnetlink_rcv_msg+0x4f7/0xbd0 + netlink_rcv_skb+0x17a/0x480 + rtnetlink_rcv+0x22/0x30 + netlink_unicast+0x5ae/0x890 + netlink_sendmsg+0x98a/0xf40 + ____sys_sendmsg+0x879/0xa00 + ___sys_sendmsg+0x122/0x190 + __sys_sendmsg+0x103/0x1d0 + __x64_sys_sendmsg+0x7d/0xb0 + do_syscall_64+0x54/0xa0 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 +RIP: 0033:0x7fc80a234e97 +Code: Bad RIP value. +RSP: 002b:00007ffef8b66798 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fc80a234e97 +RDX: 0000000000000000 RSI: 00007ffef8b66800 RDI: 0000000000000003 +RBP: 000000005f141b1c R08: 0000000000000001 R09: 0000000000000000 +R10: 00007fc80a2a8ac0 R11: 0000000000000246 R12: 0000000000000001 +R13: 0000000000000000 R14: 00007ffef8b67008 R15: 0000556fccb10020 + +Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse") +Signed-off-by: Ido Schimmel +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_trie.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -1864,7 +1864,7 @@ struct fib_table *fib_trie_unmerge(struc + while ((l = leaf_walk_rcu(&tp, key)) != NULL) { + struct key_vector *local_l = NULL, *local_tp; + +- hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { ++ hlist_for_each_entry(fa, &l->leaf, fa_list) { + struct fib_alias *new_fa; + + if (local_tb->tb_id != fa->tb_id) diff --git a/queue-5.7/ipv6-fix-memory-leaks-on-ipv6_addrform-path.patch b/queue-5.7/ipv6-fix-memory-leaks-on-ipv6_addrform-path.patch new file mode 100644 index 00000000000..98d6ec003b6 --- /dev/null +++ b/queue-5.7/ipv6-fix-memory-leaks-on-ipv6_addrform-path.patch @@ -0,0 +1,115 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Cong Wang +Date: Sat, 25 Jul 2020 15:40:53 -0700 +Subject: ipv6: fix memory leaks on IPV6_ADDRFORM path + +From: Cong Wang + +[ Upstream commit 8c0de6e96c9794cb523a516c465991a70245da1c ] + +IPV6_ADDRFORM causes resource leaks when converting an IPv6 socket +to IPv4, particularly struct ipv6_ac_socklist. Similar to +struct ipv6_mc_socklist, we should just close it on this path. + +This bug can be easily reproduced with the following C program: + + #include + #include + #include + #include + #include + + int main() + { + int s, value; + struct sockaddr_in6 addr; + struct ipv6_mreq m6; + + s = socket(AF_INET6, SOCK_DGRAM, 0); + addr.sin6_family = AF_INET6; + addr.sin6_port = htons(5000); + inet_pton(AF_INET6, "::ffff:192.168.122.194", &addr.sin6_addr); + connect(s, (struct sockaddr *)&addr, sizeof(addr)); + + inet_pton(AF_INET6, "fe80::AAAA", &m6.ipv6mr_multiaddr); + m6.ipv6mr_interface = 5; + setsockopt(s, SOL_IPV6, IPV6_JOIN_ANYCAST, &m6, sizeof(m6)); + + value = AF_INET; + setsockopt(s, SOL_IPV6, IPV6_ADDRFORM, &value, sizeof(value)); + + close(s); + return 0; + } + +Reported-by: ch3332xr@gmail.com +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/addrconf.h | 1 + + net/ipv6/anycast.c | 17 ++++++++++++----- + net/ipv6/ipv6_sockglue.c | 1 + + 3 files changed, 14 insertions(+), 5 deletions(-) + +--- a/include/net/addrconf.h ++++ b/include/net/addrconf.h +@@ -276,6 +276,7 @@ int ipv6_sock_ac_join(struct sock *sk, i + const struct in6_addr *addr); + int ipv6_sock_ac_drop(struct sock *sk, int ifindex, + const struct in6_addr *addr); ++void __ipv6_sock_ac_close(struct sock *sk); + void ipv6_sock_ac_close(struct sock *sk); + + int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr); +--- a/net/ipv6/anycast.c ++++ b/net/ipv6/anycast.c +@@ -183,7 +183,7 @@ int ipv6_sock_ac_drop(struct sock *sk, i + return 0; + } + +-void ipv6_sock_ac_close(struct sock *sk) ++void __ipv6_sock_ac_close(struct sock *sk) + { + struct ipv6_pinfo *np = inet6_sk(sk); + struct net_device *dev = NULL; +@@ -191,10 +191,7 @@ void ipv6_sock_ac_close(struct sock *sk) + struct net *net = sock_net(sk); + int prev_index; + +- if (!np->ipv6_ac_list) +- return; +- +- rtnl_lock(); ++ ASSERT_RTNL(); + pac = np->ipv6_ac_list; + np->ipv6_ac_list = NULL; + +@@ -211,6 +208,16 @@ void ipv6_sock_ac_close(struct sock *sk) + sock_kfree_s(sk, pac, sizeof(*pac)); + pac = next; + } ++} ++ ++void ipv6_sock_ac_close(struct sock *sk) ++{ ++ struct ipv6_pinfo *np = inet6_sk(sk); ++ ++ if (!np->ipv6_ac_list) ++ return; ++ rtnl_lock(); ++ __ipv6_sock_ac_close(sk); + rtnl_unlock(); + } + +--- a/net/ipv6/ipv6_sockglue.c ++++ b/net/ipv6/ipv6_sockglue.c +@@ -205,6 +205,7 @@ static int do_ipv6_setsockopt(struct soc + + fl6_free_socklist(sk); + __ipv6_sock_mc_close(sk); ++ __ipv6_sock_ac_close(sk); + + /* + * Sock is moving from IPv6 to IPv4 (sk_prot), so diff --git a/queue-5.7/ipv6-fix-nexthop-refcnt-leak-when-creating-ipv6-route-info.patch b/queue-5.7/ipv6-fix-nexthop-refcnt-leak-when-creating-ipv6-route-info.patch new file mode 100644 index 00000000000..c551ed267fd --- /dev/null +++ b/queue-5.7/ipv6-fix-nexthop-refcnt-leak-when-creating-ipv6-route-info.patch @@ -0,0 +1,54 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Xiyu Yang +Date: Sat, 25 Jul 2020 16:02:18 +0800 +Subject: ipv6: Fix nexthop refcnt leak when creating ipv6 route info + +From: Xiyu Yang + +[ Upstream commit 706ec919164622ff5ce822065472d0f30a9e9dd2 ] + +ip6_route_info_create() invokes nexthop_get(), which increases the +refcount of the "nh". + +When ip6_route_info_create() returns, local variable "nh" becomes +invalid, so the refcount should be decreased to keep refcount balanced. + +The reference counting issue happens in one exception handling path of +ip6_route_info_create(). When nexthops can not be used with source +routing, the function forgets to decrease the refcnt increased by +nexthop_get(), causing a refcnt leak. + +Fix this issue by pulling up the error source routing handling when +nexthops can not be used with source routing. + +Fixes: f88d8ea67fbd ("ipv6: Plumb support for nexthop object in a fib6_info") +Signed-off-by: Xiyu Yang +Signed-off-by: Xin Tan +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -3677,14 +3677,14 @@ static struct fib6_info *ip6_route_info_ + rt->fib6_src.plen = cfg->fc_src_len; + #endif + if (nh) { +- if (!nexthop_get(nh)) { +- NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); +- goto out; +- } + if (rt->fib6_src.plen) { + NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); + goto out; + } ++ if (!nexthop_get(nh)) { ++ NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); ++ goto out; ++ } + rt->nh = nh; + fib6_nh = nexthop_fib6_nh(rt->nh); + } else { diff --git a/queue-5.7/mptcp-be-careful-on-subflow-creation.patch b/queue-5.7/mptcp-be-careful-on-subflow-creation.patch new file mode 100644 index 00000000000..41af3b658b1 --- /dev/null +++ b/queue-5.7/mptcp-be-careful-on-subflow-creation.patch @@ -0,0 +1,72 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Paolo Abeni +Date: Tue, 4 Aug 2020 18:31:06 +0200 +Subject: mptcp: be careful on subflow creation + +From: Paolo Abeni + +[ Upstream commit adf7341064982de923a1f8a11bcdec48be6b3004 ] + +Nicolas reported the following oops: + +[ 1521.392541] BUG: kernel NULL pointer dereference, address: 00000000000000c0 +[ 1521.394189] #PF: supervisor read access in kernel mode +[ 1521.395376] #PF: error_code(0x0000) - not-present page +[ 1521.396607] PGD 0 P4D 0 +[ 1521.397156] Oops: 0000 [#1] SMP PTI +[ 1521.398020] CPU: 0 PID: 22986 Comm: kworker/0:2 Not tainted 5.8.0-rc4+ #109 +[ 1521.399618] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 +[ 1521.401728] Workqueue: events mptcp_worker +[ 1521.402651] RIP: 0010:mptcp_subflow_create_socket+0xf1/0x1c0 +[ 1521.403954] Code: 24 08 89 44 24 04 48 8b 7a 18 e8 2a 48 d4 ff 8b 44 24 04 85 c0 75 7a 48 8b 8b 78 02 00 00 48 8b 54 24 08 48 8d bb 80 00 00 00 <48> 8b 89 c0 00 00 00 48 89 8a c0 00 00 00 48 8b 8b 78 02 00 00 8b +[ 1521.408201] RSP: 0000:ffffabc4002d3c60 EFLAGS: 00010246 +[ 1521.409433] RAX: 0000000000000000 RBX: ffffa0b9ad8c9a00 RCX: 0000000000000000 +[ 1521.411096] RDX: ffffa0b9ae78a300 RSI: 00000000fffffe01 RDI: ffffa0b9ad8c9a80 +[ 1521.412734] RBP: ffffa0b9adff2e80 R08: ffffa0b9af02d640 R09: ffffa0b9ad923a00 +[ 1521.414333] R10: ffffabc4007139f8 R11: fefefefefefefeff R12: ffffabc4002d3cb0 +[ 1521.415918] R13: ffffa0b9ad91fa58 R14: ffffa0b9ad8c9f9c R15: 0000000000000000 +[ 1521.417592] FS: 0000000000000000(0000) GS:ffffa0b9af000000(0000) knlGS:0000000000000000 +[ 1521.419490] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 1521.420839] CR2: 00000000000000c0 CR3: 000000002951e006 CR4: 0000000000160ef0 +[ 1521.422511] Call Trace: +[ 1521.423103] __mptcp_subflow_connect+0x94/0x1f0 +[ 1521.425376] mptcp_pm_create_subflow_or_signal_addr+0x200/0x2a0 +[ 1521.426736] mptcp_worker+0x31b/0x390 +[ 1521.431324] process_one_work+0x1fc/0x3f0 +[ 1521.432268] worker_thread+0x2d/0x3b0 +[ 1521.434197] kthread+0x117/0x130 +[ 1521.435783] ret_from_fork+0x22/0x30 + +on some unconventional configuration. + +The MPTCP protocol is trying to create a subflow for an +unaccepted server socket. That is allowed by the RFC, even +if subflow creation will likely fail. +Unaccepted sockets have still a NULL sk_socket field, +avoid the issue by failing earlier. + +Reported-and-tested-by: Nicolas Rybowski +Fixes: 7d14b0d2b9b3 ("mptcp: set correct vfs info for subflows") +Signed-off-by: Paolo Abeni +Reviewed-by: Matthieu Baerts +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/subflow.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/mptcp/subflow.c ++++ b/net/mptcp/subflow.c +@@ -999,6 +999,12 @@ int mptcp_subflow_create_socket(struct s + struct socket *sf; + int err; + ++ /* un-accepted server sockets can reach here - on bad configuration ++ * bail early to avoid greater trouble later ++ */ ++ if (unlikely(!sk->sk_socket)) ++ return -EINVAL; ++ + err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP, + &sf); + if (err) diff --git a/queue-5.7/mptcp-fix-bogus-sendmsg-return-code-under-pressure.patch b/queue-5.7/mptcp-fix-bogus-sendmsg-return-code-under-pressure.patch new file mode 100644 index 00000000000..fb37dccf95b --- /dev/null +++ b/queue-5.7/mptcp-fix-bogus-sendmsg-return-code-under-pressure.patch @@ -0,0 +1,45 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Paolo Abeni +Date: Mon, 3 Aug 2020 18:40:39 +0200 +Subject: mptcp: fix bogus sendmsg() return code under pressure + +From: Paolo Abeni + +[ Upstream commit 8555c6bfd5fddb1cf363d3cd157d70a1bb27f718 ] + +In case of memory pressure, mptcp_sendmsg() may call +sk_stream_wait_memory() after succesfully xmitting some +bytes. If the latter fails we currently return to the +user-space the error code, ignoring the succeful xmit. + +Address the issue always checking for the xmitted bytes +before mptcp_sendmsg() completes. + +Fixes: f296234c98a8 ("mptcp: Add handling of incoming MP_JOIN requests") +Reviewed-by: Matthieu Baerts +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/mptcp/protocol.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/mptcp/protocol.c ++++ b/net/mptcp/protocol.c +@@ -802,7 +802,6 @@ fallback: + + mptcp_set_timeout(sk, ssk); + if (copied) { +- ret = copied; + tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle, + size_goal); + +@@ -815,7 +814,7 @@ fallback: + release_sock(ssk); + out: + release_sock(sk); +- return ret; ++ return copied ? : ret; + } + + static void mptcp_wait_data(struct sock *sk, long *timeo) diff --git a/queue-5.7/net-bridge-clear-bridge-s-private-skb-space-on-xmit.patch b/queue-5.7/net-bridge-clear-bridge-s-private-skb-space-on-xmit.patch new file mode 100644 index 00000000000..928a5219821 --- /dev/null +++ b/queue-5.7/net-bridge-clear-bridge-s-private-skb-space-on-xmit.patch @@ -0,0 +1,36 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Nikolay Aleksandrov +Date: Fri, 31 Jul 2020 19:26:16 +0300 +Subject: net: bridge: clear bridge's private skb space on xmit + +From: Nikolay Aleksandrov + +[ Upstream commit fd65e5a95d08389444e8591a20538b3edece0e15 ] + +We need to clear all of the bridge private skb variables as they can be +stale due to the packet being recirculated through the stack and then +transmitted through the bridge device. Similar memset is already done on +bridge's input. We've seen cases where proxyarp_replied was 1 on routed +multicast packets transmitted through the bridge to ports with neigh +suppress which were getting dropped. Same thing can in theory happen with +the port isolation bit as well. + +Fixes: 821f1b21cabb ("bridge: add new BR_NEIGH_SUPPRESS port flag to suppress arp and nd flood") +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_device.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/bridge/br_device.c ++++ b/net/bridge/br_device.c +@@ -36,6 +36,8 @@ netdev_tx_t br_dev_xmit(struct sk_buff * + const unsigned char *dest; + u16 vid = 0; + ++ memset(skb->cb, 0, sizeof(struct br_input_skb_cb)); ++ + rcu_read_lock(); + nf_ops = rcu_dereference(nf_br_ops); + if (nf_ops && nf_ops->br_dev_xmit_hook(skb)) { diff --git a/queue-5.7/net-gre-recompute-gre-csum-for-sctp-over-gre-tunnels.patch b/queue-5.7/net-gre-recompute-gre-csum-for-sctp-over-gre-tunnels.patch new file mode 100644 index 00000000000..7b49b9f93d9 --- /dev/null +++ b/queue-5.7/net-gre-recompute-gre-csum-for-sctp-over-gre-tunnels.patch @@ -0,0 +1,69 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Lorenzo Bianconi +Date: Fri, 31 Jul 2020 20:12:05 +0200 +Subject: net: gre: recompute gre csum for sctp over gre tunnels + +From: Lorenzo Bianconi + +[ Upstream commit 622e32b7d4a6492cf5c1f759ef833f817418f7b3 ] + +The GRE tunnel can be used to transport traffic that does not rely on a +Internet checksum (e.g. SCTP). The issue can be triggered creating a GRE +or GRETAP tunnel and transmitting SCTP traffic ontop of it where CRC +offload has been disabled. In order to fix the issue we need to +recompute the GRE csum in gre_gso_segment() not relying on the inner +checksum. +The issue is still present when we have the CRC offload enabled. +In this case we need to disable the CRC offload if we require GRE +checksum since otherwise skb_checksum() will report a wrong value. + +Fixes: 90017accff61 ("sctp: Add GSO support") +Signed-off-by: Lorenzo Bianconi +Reviewed-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/gre_offload.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +--- a/net/ipv4/gre_offload.c ++++ b/net/ipv4/gre_offload.c +@@ -15,12 +15,12 @@ static struct sk_buff *gre_gso_segment(s + netdev_features_t features) + { + int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); ++ bool need_csum, need_recompute_csum, gso_partial; + struct sk_buff *segs = ERR_PTR(-EINVAL); + u16 mac_offset = skb->mac_header; + __be16 protocol = skb->protocol; + u16 mac_len = skb->mac_len; + int gre_offset, outer_hlen; +- bool need_csum, gso_partial; + + if (!skb->encapsulation) + goto out; +@@ -41,6 +41,7 @@ static struct sk_buff *gre_gso_segment(s + skb->protocol = skb->inner_protocol; + + need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM); ++ need_recompute_csum = skb->csum_not_inet; + skb->encap_hdr_csum = need_csum; + + features &= skb->dev->hw_enc_features; +@@ -98,7 +99,15 @@ static struct sk_buff *gre_gso_segment(s + } + + *(pcsum + 1) = 0; +- *pcsum = gso_make_checksum(skb, 0); ++ if (need_recompute_csum && !skb_is_gso(skb)) { ++ __wsum csum; ++ ++ csum = skb_checksum(skb, gre_offset, ++ skb->len - gre_offset, 0); ++ *pcsum = csum_fold(csum); ++ } else { ++ *pcsum = gso_make_checksum(skb, 0); ++ } + } while ((skb = skb->next)); + out: + return segs; diff --git a/queue-5.7/net-lan78xx-replace-bogus-endpoint-lookup.patch b/queue-5.7/net-lan78xx-replace-bogus-endpoint-lookup.patch new file mode 100644 index 00000000000..1de0be92405 --- /dev/null +++ b/queue-5.7/net-lan78xx-replace-bogus-endpoint-lookup.patch @@ -0,0 +1,189 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Johan Hovold +Date: Tue, 28 Jul 2020 14:10:31 +0200 +Subject: net: lan78xx: replace bogus endpoint lookup + +From: Johan Hovold + +[ Upstream commit ea060b352654a8de1e070140d25fe1b7e4d50310 ] + +Drop the bogus endpoint-lookup helper which could end up accepting +interfaces based on endpoints belonging to unrelated altsettings. + +Note that the returned bulk pipes and interrupt endpoint descriptor +were never actually used. Instead the bulk-endpoint numbers are +hardcoded to 1 and 2 (matching the specification), while the interrupt- +endpoint descriptor was assumed to be the third descriptor created by +USB core. + +Try to bring some order to this by dropping the bogus lookup helper and +adding the missing endpoint sanity checks while keeping the interrupt- +descriptor assumption for now. + +Signed-off-by: Johan Hovold +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/lan78xx.c | 117 +++++++++++----------------------------------- + 1 file changed, 30 insertions(+), 87 deletions(-) + +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -377,10 +377,6 @@ struct lan78xx_net { + struct tasklet_struct bh; + struct delayed_work wq; + +- struct usb_host_endpoint *ep_blkin; +- struct usb_host_endpoint *ep_blkout; +- struct usb_host_endpoint *ep_intr; +- + int msg_enable; + + struct urb *urb_intr; +@@ -2860,78 +2856,12 @@ lan78xx_start_xmit(struct sk_buff *skb, + return NETDEV_TX_OK; + } + +-static int +-lan78xx_get_endpoints(struct lan78xx_net *dev, struct usb_interface *intf) +-{ +- int tmp; +- struct usb_host_interface *alt = NULL; +- struct usb_host_endpoint *in = NULL, *out = NULL; +- struct usb_host_endpoint *status = NULL; +- +- for (tmp = 0; tmp < intf->num_altsetting; tmp++) { +- unsigned ep; +- +- in = NULL; +- out = NULL; +- status = NULL; +- alt = intf->altsetting + tmp; +- +- for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) { +- struct usb_host_endpoint *e; +- int intr = 0; +- +- e = alt->endpoint + ep; +- switch (e->desc.bmAttributes) { +- case USB_ENDPOINT_XFER_INT: +- if (!usb_endpoint_dir_in(&e->desc)) +- continue; +- intr = 1; +- /* FALLTHROUGH */ +- case USB_ENDPOINT_XFER_BULK: +- break; +- default: +- continue; +- } +- if (usb_endpoint_dir_in(&e->desc)) { +- if (!intr && !in) +- in = e; +- else if (intr && !status) +- status = e; +- } else { +- if (!out) +- out = e; +- } +- } +- if (in && out) +- break; +- } +- if (!alt || !in || !out) +- return -EINVAL; +- +- dev->pipe_in = usb_rcvbulkpipe(dev->udev, +- in->desc.bEndpointAddress & +- USB_ENDPOINT_NUMBER_MASK); +- dev->pipe_out = usb_sndbulkpipe(dev->udev, +- out->desc.bEndpointAddress & +- USB_ENDPOINT_NUMBER_MASK); +- dev->ep_intr = status; +- +- return 0; +-} +- + static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf) + { + struct lan78xx_priv *pdata = NULL; + int ret; + int i; + +- ret = lan78xx_get_endpoints(dev, intf); +- if (ret) { +- netdev_warn(dev->net, "lan78xx_get_endpoints failed: %d\n", +- ret); +- return ret; +- } +- + dev->data[0] = (unsigned long)kzalloc(sizeof(*pdata), GFP_KERNEL); + + pdata = (struct lan78xx_priv *)(dev->data[0]); +@@ -3700,6 +3630,7 @@ static void lan78xx_stat_monitor(struct + static int lan78xx_probe(struct usb_interface *intf, + const struct usb_device_id *id) + { ++ struct usb_host_endpoint *ep_blkin, *ep_blkout, *ep_intr; + struct lan78xx_net *dev; + struct net_device *netdev; + struct usb_device *udev; +@@ -3748,6 +3679,34 @@ static int lan78xx_probe(struct usb_inte + + mutex_init(&dev->stats.access_lock); + ++ if (intf->cur_altsetting->desc.bNumEndpoints < 3) { ++ ret = -ENODEV; ++ goto out2; ++ } ++ ++ dev->pipe_in = usb_rcvbulkpipe(udev, BULK_IN_PIPE); ++ ep_blkin = usb_pipe_endpoint(udev, dev->pipe_in); ++ if (!ep_blkin || !usb_endpoint_is_bulk_in(&ep_blkin->desc)) { ++ ret = -ENODEV; ++ goto out2; ++ } ++ ++ dev->pipe_out = usb_sndbulkpipe(udev, BULK_OUT_PIPE); ++ ep_blkout = usb_pipe_endpoint(udev, dev->pipe_out); ++ if (!ep_blkout || !usb_endpoint_is_bulk_out(&ep_blkout->desc)) { ++ ret = -ENODEV; ++ goto out2; ++ } ++ ++ ep_intr = &intf->cur_altsetting->endpoint[2]; ++ if (!usb_endpoint_is_int_in(&ep_intr->desc)) { ++ ret = -ENODEV; ++ goto out2; ++ } ++ ++ dev->pipe_intr = usb_rcvintpipe(dev->udev, ++ usb_endpoint_num(&ep_intr->desc)); ++ + ret = lan78xx_bind(dev, intf); + if (ret < 0) + goto out2; +@@ -3759,23 +3718,7 @@ static int lan78xx_probe(struct usb_inte + netdev->max_mtu = MAX_SINGLE_PACKET_SIZE; + netif_set_gso_max_size(netdev, MAX_SINGLE_PACKET_SIZE - MAX_HEADER); + +- if (intf->cur_altsetting->desc.bNumEndpoints < 3) { +- ret = -ENODEV; +- goto out3; +- } +- +- dev->ep_blkin = (intf->cur_altsetting)->endpoint + 0; +- dev->ep_blkout = (intf->cur_altsetting)->endpoint + 1; +- dev->ep_intr = (intf->cur_altsetting)->endpoint + 2; +- +- dev->pipe_in = usb_rcvbulkpipe(udev, BULK_IN_PIPE); +- dev->pipe_out = usb_sndbulkpipe(udev, BULK_OUT_PIPE); +- +- dev->pipe_intr = usb_rcvintpipe(dev->udev, +- dev->ep_intr->desc.bEndpointAddress & +- USB_ENDPOINT_NUMBER_MASK); +- period = dev->ep_intr->desc.bInterval; +- ++ period = ep_intr->desc.bInterval; + maxp = usb_maxpacket(dev->udev, dev->pipe_intr, 0); + buf = kmalloc(maxp, GFP_KERNEL); + if (buf) { diff --git a/queue-5.7/net-macb-properly-handle-phylink-on-at91sam9x.patch b/queue-5.7/net-macb-properly-handle-phylink-on-at91sam9x.patch new file mode 100644 index 00000000000..44fa48ed608 --- /dev/null +++ b/queue-5.7/net-macb-properly-handle-phylink-on-at91sam9x.patch @@ -0,0 +1,60 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Stefan Roese +Date: Tue, 4 Aug 2020 14:17:16 +0200 +Subject: net: macb: Properly handle phylink on at91sam9x + +From: Stefan Roese + +[ Upstream commit f7ba7dbf4f7af67b5936ff1cbd40a3254b409ebf ] + +I just recently noticed that ethernet does not work anymore since v5.5 +on the GARDENA smart Gateway, which is based on the AT91SAM9G25. +Debugging showed that the "GEM bits" in the NCFGR register are now +unconditionally accessed, which is incorrect for the !macb_is_gem() +case. + +This patch adds the macb_is_gem() checks back to the code +(in macb_mac_config() & macb_mac_link_up()), so that the GEM register +bits are not accessed in this case any more. + +Fixes: 7897b071ac3b ("net: macb: convert to phylink") +Signed-off-by: Stefan Roese +Cc: Reto Schneider +Cc: Alexandre Belloni +Cc: Nicolas Ferre +Cc: David S. Miller +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cadence/macb_main.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/cadence/macb_main.c ++++ b/drivers/net/ethernet/cadence/macb_main.c +@@ -578,7 +578,7 @@ static void macb_mac_config(struct phyli + if (bp->caps & MACB_CAPS_MACB_IS_EMAC) { + if (state->interface == PHY_INTERFACE_MODE_RMII) + ctrl |= MACB_BIT(RM9200_RMII); +- } else { ++ } else if (macb_is_gem(bp)) { + ctrl &= ~(GEM_BIT(SGMIIEN) | GEM_BIT(PCSSEL)); + + if (state->interface == PHY_INTERFACE_MODE_SGMII) +@@ -639,10 +639,13 @@ static void macb_mac_link_up(struct phyl + ctrl |= MACB_BIT(FD); + + if (!(bp->caps & MACB_CAPS_MACB_IS_EMAC)) { +- ctrl &= ~(GEM_BIT(GBE) | MACB_BIT(PAE)); +- +- if (speed == SPEED_1000) +- ctrl |= GEM_BIT(GBE); ++ ctrl &= ~MACB_BIT(PAE); ++ if (macb_is_gem(bp)) { ++ ctrl &= ~GEM_BIT(GBE); ++ ++ if (speed == SPEED_1000) ++ ctrl |= GEM_BIT(GBE); ++ } + + /* We do not support MLO_PAUSE_RX yet */ + if (tx_pause) diff --git a/queue-5.7/net-mvpp2-fix-memory-leak-in-mvpp2_rx.patch b/queue-5.7/net-mvpp2-fix-memory-leak-in-mvpp2_rx.patch new file mode 100644 index 00000000000..c5795cd26c0 --- /dev/null +++ b/queue-5.7/net-mvpp2-fix-memory-leak-in-mvpp2_rx.patch @@ -0,0 +1,30 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Lorenzo Bianconi +Date: Fri, 31 Jul 2020 10:38:32 +0200 +Subject: net: mvpp2: fix memory leak in mvpp2_rx + +From: Lorenzo Bianconi + +[ Upstream commit d6526926de7397a97308780911565e31a6b67b59 ] + +Release skb memory in mvpp2_rx() if mvpp2_rx_refill routine fails + +Fixes: b5015854674b ("net: mvpp2: fix refilling BM pools in RX path") +Signed-off-by: Lorenzo Bianconi +Acked-by: Matteo Croce +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c ++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +@@ -2981,6 +2981,7 @@ static int mvpp2_rx(struct mvpp2_port *p + err = mvpp2_rx_refill(port, bm_pool, pool); + if (err) { + netdev_err(port->dev, "failed to refill BM pools\n"); ++ dev_kfree_skb_any(skb); + goto err_drop_frame; + } + diff --git a/queue-5.7/net-sched-act_ct-fix-miss-set-mru-for-ovs-after-defrag-in-act_ct.patch b/queue-5.7/net-sched-act_ct-fix-miss-set-mru-for-ovs-after-defrag-in-act_ct.patch new file mode 100644 index 00000000000..e41d03f83c4 --- /dev/null +++ b/queue-5.7/net-sched-act_ct-fix-miss-set-mru-for-ovs-after-defrag-in-act_ct.patch @@ -0,0 +1,109 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: wenxu +Date: Fri, 31 Jul 2020 10:45:01 +0800 +Subject: net/sched: act_ct: fix miss set mru for ovs after defrag in act_ct + +From: wenxu + +[ Upstream commit 038ebb1a713d114d54dbf14868a73181c0c92758 ] + +When openvswitch conntrack offload with act_ct action. Fragment packets +defrag in the ingress tc act_ct action and miss the next chain. Then the +packet pass to the openvswitch datapath without the mru. The over +mtu packet will be dropped in output action in openvswitch for over mtu. + +"kernel: net2: dropped over-mtu packet: 1528 > 1500" + +This patch add mru in the tc_skb_ext for adefrag and miss next chain +situation. And also add mru in the qdisc_skb_cb. The act_ct set the mru +to the qdisc_skb_cb when the packet defrag. And When the chain miss, +The mru is set to tc_skb_ext which can be got by ovs datapath. + +Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct") +Signed-off-by: wenxu +Reviewed-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/skbuff.h | 1 + + include/net/sch_generic.h | 3 ++- + net/openvswitch/flow.c | 1 + + net/sched/act_ct.c | 8 ++++++-- + net/sched/cls_api.c | 1 + + 5 files changed, 11 insertions(+), 3 deletions(-) + +--- a/include/linux/skbuff.h ++++ b/include/linux/skbuff.h +@@ -283,6 +283,7 @@ struct nf_bridge_info { + */ + struct tc_skb_ext { + __u32 chain; ++ __u16 mru; + }; + #endif + +--- a/include/net/sch_generic.h ++++ b/include/net/sch_generic.h +@@ -380,6 +380,7 @@ struct qdisc_skb_cb { + }; + #define QDISC_CB_PRIV_LEN 20 + unsigned char data[QDISC_CB_PRIV_LEN]; ++ u16 mru; + }; + + typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv); +@@ -459,7 +460,7 @@ static inline void qdisc_cb_private_vali + { + struct qdisc_skb_cb *qcb; + +- BUILD_BUG_ON(sizeof(skb->cb) < offsetof(struct qdisc_skb_cb, data) + sz); ++ BUILD_BUG_ON(sizeof(skb->cb) < sizeof(*qcb)); + BUILD_BUG_ON(sizeof(qcb->data) < sz); + } + +--- a/net/openvswitch/flow.c ++++ b/net/openvswitch/flow.c +@@ -890,6 +890,7 @@ int ovs_flow_key_extract(const struct ip + if (static_branch_unlikely(&tc_recirc_sharing_support)) { + tc_ext = skb_ext_find(skb, TC_SKB_EXT); + key->recirc_id = tc_ext ? tc_ext->chain : 0; ++ OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0; + } else { + key->recirc_id = 0; + } +--- a/net/sched/act_ct.c ++++ b/net/sched/act_ct.c +@@ -704,8 +704,10 @@ static int tcf_ct_handle_fragments(struc + if (err && err != -EINPROGRESS) + goto out_free; + +- if (!err) ++ if (!err) { + *defrag = true; ++ cb.mru = IPCB(skb)->frag_max_size; ++ } + } else { /* NFPROTO_IPV6 */ + #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; +@@ -715,8 +717,10 @@ static int tcf_ct_handle_fragments(struc + if (err && err != -EINPROGRESS) + goto out_free; + +- if (!err) ++ if (!err) { + *defrag = true; ++ cb.mru = IP6CB(skb)->frag_max_size; ++ } + #else + err = -EOPNOTSUPP; + goto out_free; +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -1679,6 +1679,7 @@ int tcf_classify_ingress(struct sk_buff + if (WARN_ON_ONCE(!ext)) + return TC_ACT_SHOT; + ext->chain = last_executed_chain; ++ ext->mru = qdisc_skb_cb(skb)->mru; + } + + return ret; diff --git a/queue-5.7/net-thunderx-use-spin_lock_bh-in-nicvf_set_rx_mode_task.patch b/queue-5.7/net-thunderx-use-spin_lock_bh-in-nicvf_set_rx_mode_task.patch new file mode 100644 index 00000000000..9ab679724d9 --- /dev/null +++ b/queue-5.7/net-thunderx-use-spin_lock_bh-in-nicvf_set_rx_mode_task.patch @@ -0,0 +1,60 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Xin Long +Date: Tue, 4 Aug 2020 15:02:30 +0800 +Subject: net: thunderx: use spin_lock_bh in nicvf_set_rx_mode_task() + +From: Xin Long + +[ Upstream commit bab9693a9a8c6dd19f670408ec1e78e12a320682 ] + +A dead lock was triggered on thunderx driver: + + CPU0 CPU1 + ---- ---- + [01] lock(&(&nic->rx_mode_wq_lock)->rlock); + [11] lock(&(&mc->mca_lock)->rlock); + [12] lock(&(&nic->rx_mode_wq_lock)->rlock); + [02] lock(&(&mc->mca_lock)->rlock); + +The path for each is: + + [01] worker_thread() -> process_one_work() -> nicvf_set_rx_mode_task() + [02] mld_ifc_timer_expire() + [11] ipv6_add_dev() -> ipv6_dev_mc_inc() -> igmp6_group_added() -> + [12] dev_mc_add() -> __dev_set_rx_mode() -> nicvf_set_rx_mode() + +To fix it, it needs to disable bh on [1], so that the timer on [2] +wouldn't be triggered until rx_mode_wq_lock is released. So change +to use spin_lock_bh() instead of spin_lock(). + +Thanks to Paolo for helping with this. + +v1->v2: + - post to netdev. + +Reported-by: Rafael P. +Tested-by: Dean Nelson +Fixes: 469998c861fa ("net: thunderx: prevent concurrent data re-writing by nicvf_set_rx_mode") +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/cavium/thunder/nicvf_main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c ++++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c +@@ -2041,11 +2041,11 @@ static void nicvf_set_rx_mode_task(struc + /* Save message data locally to prevent them from + * being overwritten by next ndo_set_rx_mode call(). + */ +- spin_lock(&nic->rx_mode_wq_lock); ++ spin_lock_bh(&nic->rx_mode_wq_lock); + mode = vf_work->mode; + mc = vf_work->mc; + vf_work->mc = NULL; +- spin_unlock(&nic->rx_mode_wq_lock); ++ spin_unlock_bh(&nic->rx_mode_wq_lock); + + __nicvf_set_rx_mode_task(mode, mc, nic); + } diff --git a/queue-5.7/openvswitch-prevent-kernel-infoleak-in-ovs_ct_put_key.patch b/queue-5.7/openvswitch-prevent-kernel-infoleak-in-ovs_ct_put_key.patch new file mode 100644 index 00000000000..dd16d3b1234 --- /dev/null +++ b/queue-5.7/openvswitch-prevent-kernel-infoleak-in-ovs_ct_put_key.patch @@ -0,0 +1,81 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Peilin Ye +Date: Fri, 31 Jul 2020 00:48:38 -0400 +Subject: openvswitch: Prevent kernel-infoleak in ovs_ct_put_key() + +From: Peilin Ye + +[ Upstream commit 9aba6c5b49254d5bee927d81593ed4429e91d4ae ] + +ovs_ct_put_key() is potentially copying uninitialized kernel stack memory +into socket buffers, since the compiler may leave a 3-byte hole at the end +of `struct ovs_key_ct_tuple_ipv4` and `struct ovs_key_ct_tuple_ipv6`. Fix +it by initializing `orig` with memset(). + +Fixes: 9dd7f8907c37 ("openvswitch: Add original direction conntrack tuple to sw_flow_key.") +Suggested-by: Dan Carpenter +Signed-off-by: Peilin Ye +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/conntrack.c | 38 ++++++++++++++++++++------------------ + 1 file changed, 20 insertions(+), 18 deletions(-) + +--- a/net/openvswitch/conntrack.c ++++ b/net/openvswitch/conntrack.c +@@ -276,10 +276,6 @@ void ovs_ct_fill_key(const struct sk_buf + ovs_ct_update_key(skb, NULL, key, false, false); + } + +-#define IN6_ADDR_INITIALIZER(ADDR) \ +- { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \ +- (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] } +- + int ovs_ct_put_key(const struct sw_flow_key *swkey, + const struct sw_flow_key *output, struct sk_buff *skb) + { +@@ -301,24 +297,30 @@ int ovs_ct_put_key(const struct sw_flow_ + + if (swkey->ct_orig_proto) { + if (swkey->eth.type == htons(ETH_P_IP)) { +- struct ovs_key_ct_tuple_ipv4 orig = { +- output->ipv4.ct_orig.src, +- output->ipv4.ct_orig.dst, +- output->ct.orig_tp.src, +- output->ct.orig_tp.dst, +- output->ct_orig_proto, +- }; ++ struct ovs_key_ct_tuple_ipv4 orig; ++ ++ memset(&orig, 0, sizeof(orig)); ++ orig.ipv4_src = output->ipv4.ct_orig.src; ++ orig.ipv4_dst = output->ipv4.ct_orig.dst; ++ orig.src_port = output->ct.orig_tp.src; ++ orig.dst_port = output->ct.orig_tp.dst; ++ orig.ipv4_proto = output->ct_orig_proto; ++ + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4, + sizeof(orig), &orig)) + return -EMSGSIZE; + } else if (swkey->eth.type == htons(ETH_P_IPV6)) { +- struct ovs_key_ct_tuple_ipv6 orig = { +- IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src), +- IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst), +- output->ct.orig_tp.src, +- output->ct.orig_tp.dst, +- output->ct_orig_proto, +- }; ++ struct ovs_key_ct_tuple_ipv6 orig; ++ ++ memset(&orig, 0, sizeof(orig)); ++ memcpy(orig.ipv6_src, output->ipv6.ct_orig.src.s6_addr32, ++ sizeof(orig.ipv6_src)); ++ memcpy(orig.ipv6_dst, output->ipv6.ct_orig.dst.s6_addr32, ++ sizeof(orig.ipv6_dst)); ++ orig.src_port = output->ct.orig_tp.src; ++ orig.dst_port = output->ct.orig_tp.dst; ++ orig.ipv6_proto = output->ct_orig_proto; ++ + if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6, + sizeof(orig), &orig)) + return -EMSGSIZE; diff --git a/queue-5.7/pci-tegra-revert-tegra124-raw_violation_fixup.patch b/queue-5.7/pci-tegra-revert-tegra124-raw_violation_fixup.patch new file mode 100644 index 00000000000..97829839fbd --- /dev/null +++ b/queue-5.7/pci-tegra-revert-tegra124-raw_violation_fixup.patch @@ -0,0 +1,159 @@ +From e7b856dfcec6d3bf028adee8c65342d7035914a1 Mon Sep 17 00:00:00 2001 +From: Nicolas Chauvet +Date: Sat, 18 Jul 2020 12:07:10 +0200 +Subject: PCI: tegra: Revert tegra124 raw_violation_fixup + +From: Nicolas Chauvet + +commit e7b856dfcec6d3bf028adee8c65342d7035914a1 upstream. + +As reported in https://bugzilla.kernel.org/206217 , raw_violation_fixup +is causing more harm than good in some common use-cases. + +This patch is a partial revert of commit: + +191cd6fb5d2c ("PCI: tegra: Add SW fixup for RAW violations") + +and fixes the following regression since then. + +* Description: + +When both the NIC and MMC are used one can see the following message: + + NETDEV WATCHDOG: enp1s0 (r8169): transmit queue 0 timed out + +and + + pcieport 0000:00:02.0: AER: Uncorrected (Non-Fatal) error received: 0000:01:00.0 + r8169 0000:01:00.0: AER: PCIe Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, (Requester ID) + r8169 0000:01:00.0: AER: device [10ec:8168] error status/mask=00004000/00400000 + r8169 0000:01:00.0: AER: [14] CmpltTO (First) + r8169 0000:01:00.0: AER: can't recover (no error_detected callback) + pcieport 0000:00:02.0: AER: device recovery failed + +After that, the ethernet NIC is not functional anymore even after +reloading the r8169 module. After a reboot, this is reproducible by +copying a large file over the NIC to the MMC. + +For some reason this is not reproducible when files are copied to a tmpfs. + +* Little background on the fixup, by Manikanta Maddireddy: + "In the internal testing with dGPU on Tegra124, CmplTO is reported by +dGPU. This happened because FIFO queue in AFI(AXI to PCIe) module +get full by upstream posted writes. Back to back upstream writes +interleaved with infrequent reads, triggers RAW violation and CmpltTO. +This is fixed by reducing the posted write credits and by changing +updateFC timer frequency. These settings are fixed after stress test. + +In the current case, RTL NIC is also reporting CmplTO. These settings +seems to be aggravating the issue instead of fixing it." + +Link: https://lore.kernel.org/r/20200718100710.15398-1-kwizart@gmail.com +Fixes: 191cd6fb5d2c ("PCI: tegra: Add SW fixup for RAW violations") +Signed-off-by: Nicolas Chauvet +Signed-off-by: Lorenzo Pieralisi +Reviewed-by: Manikanta Maddireddy +Cc: stable@vger.kernel.org +Signed-off-by: Greg Kroah-Hartman + +--- + drivers/pci/controller/pci-tegra.c | 32 -------------------------------- + 1 file changed, 32 deletions(-) + +--- a/drivers/pci/controller/pci-tegra.c ++++ b/drivers/pci/controller/pci-tegra.c +@@ -181,13 +181,6 @@ + + #define AFI_PEXBIAS_CTRL_0 0x168 + +-#define RP_PRIV_XP_DL 0x00000494 +-#define RP_PRIV_XP_DL_GEN2_UPD_FC_TSHOLD (0x1ff << 1) +- +-#define RP_RX_HDR_LIMIT 0x00000e00 +-#define RP_RX_HDR_LIMIT_PW_MASK (0xff << 8) +-#define RP_RX_HDR_LIMIT_PW (0x0e << 8) +- + #define RP_ECTL_2_R1 0x00000e84 + #define RP_ECTL_2_R1_RX_CTLE_1C_MASK 0xffff + +@@ -323,7 +316,6 @@ struct tegra_pcie_soc { + bool program_uphy; + bool update_clamp_threshold; + bool program_deskew_time; +- bool raw_violation_fixup; + bool update_fc_timer; + bool has_cache_bars; + struct { +@@ -659,23 +651,6 @@ static void tegra_pcie_apply_sw_fixup(st + writel(value, port->base + RP_VEND_CTL0); + } + +- /* Fixup for read after write violation. */ +- if (soc->raw_violation_fixup) { +- value = readl(port->base + RP_RX_HDR_LIMIT); +- value &= ~RP_RX_HDR_LIMIT_PW_MASK; +- value |= RP_RX_HDR_LIMIT_PW; +- writel(value, port->base + RP_RX_HDR_LIMIT); +- +- value = readl(port->base + RP_PRIV_XP_DL); +- value |= RP_PRIV_XP_DL_GEN2_UPD_FC_TSHOLD; +- writel(value, port->base + RP_PRIV_XP_DL); +- +- value = readl(port->base + RP_VEND_XP); +- value &= ~RP_VEND_XP_UPDATE_FC_THRESHOLD_MASK; +- value |= soc->update_fc_threshold; +- writel(value, port->base + RP_VEND_XP); +- } +- + if (soc->update_fc_timer) { + value = readl(port->base + RP_VEND_XP); + value &= ~RP_VEND_XP_UPDATE_FC_THRESHOLD_MASK; +@@ -2416,7 +2391,6 @@ static const struct tegra_pcie_soc tegra + .program_uphy = true, + .update_clamp_threshold = false, + .program_deskew_time = false, +- .raw_violation_fixup = false, + .update_fc_timer = false, + .has_cache_bars = true, + .ectl.enable = false, +@@ -2446,7 +2420,6 @@ static const struct tegra_pcie_soc tegra + .program_uphy = true, + .update_clamp_threshold = false, + .program_deskew_time = false, +- .raw_violation_fixup = false, + .update_fc_timer = false, + .has_cache_bars = false, + .ectl.enable = false, +@@ -2459,8 +2432,6 @@ static const struct tegra_pcie_soc tegra + .pads_pll_ctl = PADS_PLL_CTL_TEGRA30, + .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN, + .pads_refclk_cfg0 = 0x44ac44ac, +- /* FC threshold is bit[25:18] */ +- .update_fc_threshold = 0x03fc0000, + .has_pex_clkreq_en = true, + .has_pex_bias_ctrl = true, + .has_intr_prsnt_sense = true, +@@ -2470,7 +2441,6 @@ static const struct tegra_pcie_soc tegra + .program_uphy = true, + .update_clamp_threshold = true, + .program_deskew_time = false, +- .raw_violation_fixup = true, + .update_fc_timer = false, + .has_cache_bars = false, + .ectl.enable = false, +@@ -2494,7 +2464,6 @@ static const struct tegra_pcie_soc tegra + .program_uphy = true, + .update_clamp_threshold = true, + .program_deskew_time = true, +- .raw_violation_fixup = false, + .update_fc_timer = true, + .has_cache_bars = false, + .ectl = { +@@ -2536,7 +2505,6 @@ static const struct tegra_pcie_soc tegra + .program_uphy = false, + .update_clamp_threshold = false, + .program_deskew_time = false, +- .raw_violation_fixup = false, + .update_fc_timer = false, + .has_cache_bars = false, + .ectl.enable = false, diff --git a/queue-5.7/revert-powerpc-kasan-fix-shadow-pages-allocation-failure.patch b/queue-5.7/revert-powerpc-kasan-fix-shadow-pages-allocation-failure.patch new file mode 100644 index 00000000000..c3318cf7df4 --- /dev/null +++ b/queue-5.7/revert-powerpc-kasan-fix-shadow-pages-allocation-failure.patch @@ -0,0 +1,80 @@ +From b506923ee44ae87fc9f4de16b53feb313623e146 Mon Sep 17 00:00:00 2001 +From: Christophe Leroy +Date: Thu, 2 Jul 2020 11:52:02 +0000 +Subject: Revert "powerpc/kasan: Fix shadow pages allocation failure" + +From: Christophe Leroy + +commit b506923ee44ae87fc9f4de16b53feb313623e146 upstream. + +This reverts commit d2a91cef9bbdeb87b7449fdab1a6be6000930210. + +This commit moved too much work in kasan_init(). The allocation +of shadow pages has to be moved for the reason explained in that +patch, but the allocation of page tables still need to be done +before switching to the final hash table. + +First revert the incorrect commit, following patch redoes it +properly. + +Fixes: d2a91cef9bbd ("powerpc/kasan: Fix shadow pages allocation failure") +Cc: stable@vger.kernel.org +Reported-by: Erhard F. +Signed-off-by: Christophe Leroy +Signed-off-by: Michael Ellerman +Link: https://bugzilla.kernel.org/show_bug.cgi?id=208181 +Link: https://lore.kernel.org/r/3667deb0911affbf999b99f87c31c77d5e870cd2.1593690707.git.christophe.leroy@csgroup.eu +Signed-off-by: Greg Kroah-Hartman + +--- + arch/powerpc/include/asm/kasan.h | 2 ++ + arch/powerpc/mm/init_32.c | 2 ++ + arch/powerpc/mm/kasan/kasan_init_32.c | 4 +--- + 3 files changed, 5 insertions(+), 3 deletions(-) + +--- a/arch/powerpc/include/asm/kasan.h ++++ b/arch/powerpc/include/asm/kasan.h +@@ -27,10 +27,12 @@ + + #ifdef CONFIG_KASAN + void kasan_early_init(void); ++void kasan_mmu_init(void); + void kasan_init(void); + void kasan_late_init(void); + #else + static inline void kasan_init(void) { } ++static inline void kasan_mmu_init(void) { } + static inline void kasan_late_init(void) { } + #endif + +--- a/arch/powerpc/mm/init_32.c ++++ b/arch/powerpc/mm/init_32.c +@@ -170,6 +170,8 @@ void __init MMU_init(void) + btext_unmap(); + #endif + ++ kasan_mmu_init(); ++ + setup_kup(); + + /* Shortly after that, the entire linear mapping will be available */ +--- a/arch/powerpc/mm/kasan/kasan_init_32.c ++++ b/arch/powerpc/mm/kasan/kasan_init_32.c +@@ -131,7 +131,7 @@ static void __init kasan_unmap_early_sha + flush_tlb_kernel_range(k_start, k_end); + } + +-static void __init kasan_mmu_init(void) ++void __init kasan_mmu_init(void) + { + int ret; + struct memblock_region *reg; +@@ -159,8 +159,6 @@ static void __init kasan_mmu_init(void) + + void __init kasan_init(void) + { +- kasan_mmu_init(); +- + kasan_remap_early_shadow_ro(); + + clear_page(kasan_early_shadow_page); diff --git a/queue-5.7/revert-vxlan-fix-tos-value-before-xmit.patch b/queue-5.7/revert-vxlan-fix-tos-value-before-xmit.patch new file mode 100644 index 00000000000..b0903c0011b --- /dev/null +++ b/queue-5.7/revert-vxlan-fix-tos-value-before-xmit.patch @@ -0,0 +1,65 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Hangbin Liu +Date: Wed, 5 Aug 2020 10:41:31 +0800 +Subject: Revert "vxlan: fix tos value before xmit" + +From: Hangbin Liu + +[ Upstream commit a0dced17ad9dc08b1b25e0065b54c97a318e6e8b ] + +This reverts commit 71130f29979c7c7956b040673e6b9d5643003176. + +In commit 71130f29979c ("vxlan: fix tos value before xmit") we want to +make sure the tos value are filtered by RT_TOS() based on RFC1349. + + 0 1 2 3 4 5 6 7 + +-----+-----+-----+-----+-----+-----+-----+-----+ + | PRECEDENCE | TOS | MBZ | + +-----+-----+-----+-----+-----+-----+-----+-----+ + +But RFC1349 has been obsoleted by RFC2474. The new DSCP field defined like + + 0 1 2 3 4 5 6 7 + +-----+-----+-----+-----+-----+-----+-----+-----+ + | DS FIELD, DSCP | ECN FIELD | + +-----+-----+-----+-----+-----+-----+-----+-----+ + +So with + +IPTOS_TOS_MASK 0x1E +RT_TOS(tos) ((tos)&IPTOS_TOS_MASK) + +the first 3 bits DSCP info will get lost. + +To take all the DSCP info in xmit, we should revert the patch and just push +all tos bits to ip_tunnel_ecn_encap(), which will handling ECN field later. + +Fixes: 71130f29979c ("vxlan: fix tos value before xmit") +Signed-off-by: Hangbin Liu +Acked-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -2550,7 +2550,7 @@ static void vxlan_xmit_one(struct sk_buf + ndst = &rt->dst; + skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM); + +- tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb); ++ tos = ip_tunnel_ecn_encap(tos, old_iph, skb); + ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); + err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), + vni, md, flags, udp_sum); +@@ -2590,7 +2590,7 @@ static void vxlan_xmit_one(struct sk_buf + + skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM); + +- tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb); ++ tos = ip_tunnel_ecn_encap(tos, old_iph, skb); + ttl = ttl ? : ip6_dst_hoplimit(ndst); + skb_scrub_packet(skb, xnet); + err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr), diff --git a/queue-5.7/rhashtable-restore-rcu-marking-on-rhash_lock_head.patch b/queue-5.7/rhashtable-restore-rcu-marking-on-rhash_lock_head.patch new file mode 100644 index 00000000000..cc9c5f34438 --- /dev/null +++ b/queue-5.7/rhashtable-restore-rcu-marking-on-rhash_lock_head.patch @@ -0,0 +1,297 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Herbert Xu +Date: Fri, 24 Jul 2020 20:14:34 +1000 +Subject: rhashtable: Restore RCU marking on rhash_lock_head + +From: Herbert Xu + +[ Upstream commit ce9b362bf6db51a083c4221ef0f93c16cfb1facf ] + +This patch restores the RCU marking on bucket_table->buckets as +it really does need RCU protection. Its removal had led to a fatal +bug. + +Signed-off-by: Herbert Xu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/rhashtable.h | 56 +++++++++++++++++++-------------------------- + lib/rhashtable.c | 35 ++++++++++++---------------- + 2 files changed, 40 insertions(+), 51 deletions(-) + +--- a/include/linux/rhashtable.h ++++ b/include/linux/rhashtable.h +@@ -84,7 +84,7 @@ struct bucket_table { + + struct lockdep_map dep_map; + +- struct rhash_lock_head *buckets[] ____cacheline_aligned_in_smp; ++ struct rhash_lock_head __rcu *buckets[] ____cacheline_aligned_in_smp; + }; + + /* +@@ -261,13 +261,12 @@ void rhashtable_free_and_destroy(struct + void *arg); + void rhashtable_destroy(struct rhashtable *ht); + +-struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl, +- unsigned int hash); +-struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl, +- unsigned int hash); +-struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht, +- struct bucket_table *tbl, +- unsigned int hash); ++struct rhash_lock_head __rcu **rht_bucket_nested( ++ const struct bucket_table *tbl, unsigned int hash); ++struct rhash_lock_head __rcu **__rht_bucket_nested( ++ const struct bucket_table *tbl, unsigned int hash); ++struct rhash_lock_head __rcu **rht_bucket_nested_insert( ++ struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash); + + #define rht_dereference(p, ht) \ + rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) +@@ -284,21 +283,21 @@ struct rhash_lock_head **rht_bucket_nest + #define rht_entry(tpos, pos, member) \ + ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) + +-static inline struct rhash_lock_head *const *rht_bucket( ++static inline struct rhash_lock_head __rcu *const *rht_bucket( + const struct bucket_table *tbl, unsigned int hash) + { + return unlikely(tbl->nest) ? rht_bucket_nested(tbl, hash) : + &tbl->buckets[hash]; + } + +-static inline struct rhash_lock_head **rht_bucket_var( ++static inline struct rhash_lock_head __rcu **rht_bucket_var( + struct bucket_table *tbl, unsigned int hash) + { + return unlikely(tbl->nest) ? __rht_bucket_nested(tbl, hash) : + &tbl->buckets[hash]; + } + +-static inline struct rhash_lock_head **rht_bucket_insert( ++static inline struct rhash_lock_head __rcu **rht_bucket_insert( + struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) + { + return unlikely(tbl->nest) ? rht_bucket_nested_insert(ht, tbl, hash) : +@@ -325,7 +324,7 @@ static inline struct rhash_lock_head **r + */ + + static inline void rht_lock(struct bucket_table *tbl, +- struct rhash_lock_head **bkt) ++ struct rhash_lock_head __rcu **bkt) + { + local_bh_disable(); + bit_spin_lock(0, (unsigned long *)bkt); +@@ -333,7 +332,7 @@ static inline void rht_lock(struct bucke + } + + static inline void rht_lock_nested(struct bucket_table *tbl, +- struct rhash_lock_head **bucket, ++ struct rhash_lock_head __rcu **bucket, + unsigned int subclass) + { + local_bh_disable(); +@@ -342,7 +341,7 @@ static inline void rht_lock_nested(struc + } + + static inline void rht_unlock(struct bucket_table *tbl, +- struct rhash_lock_head **bkt) ++ struct rhash_lock_head __rcu **bkt) + { + lock_map_release(&tbl->dep_map); + bit_spin_unlock(0, (unsigned long *)bkt); +@@ -365,48 +364,41 @@ static inline struct rhash_head *__rht_p + * access is guaranteed, such as when destroying the table. + */ + static inline struct rhash_head *rht_ptr_rcu( +- struct rhash_lock_head *const *p) ++ struct rhash_lock_head __rcu *const *bkt) + { +- struct rhash_lock_head __rcu *const *bkt = (void *)p; + return __rht_ptr(rcu_dereference(*bkt), bkt); + } + + static inline struct rhash_head *rht_ptr( +- struct rhash_lock_head *const *p, ++ struct rhash_lock_head __rcu *const *bkt, + struct bucket_table *tbl, + unsigned int hash) + { +- struct rhash_lock_head __rcu *const *bkt = (void *)p; + return __rht_ptr(rht_dereference_bucket(*bkt, tbl, hash), bkt); + } + + static inline struct rhash_head *rht_ptr_exclusive( +- struct rhash_lock_head *const *p) ++ struct rhash_lock_head __rcu *const *bkt) + { +- struct rhash_lock_head __rcu *const *bkt = (void *)p; + return __rht_ptr(rcu_dereference_protected(*bkt, 1), bkt); + } + +-static inline void rht_assign_locked(struct rhash_lock_head **bkt, ++static inline void rht_assign_locked(struct rhash_lock_head __rcu **bkt, + struct rhash_head *obj) + { +- struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt; +- + if (rht_is_a_nulls(obj)) + obj = NULL; +- rcu_assign_pointer(*p, (void *)((unsigned long)obj | BIT(0))); ++ rcu_assign_pointer(*bkt, (void *)((unsigned long)obj | BIT(0))); + } + + static inline void rht_assign_unlock(struct bucket_table *tbl, +- struct rhash_lock_head **bkt, ++ struct rhash_lock_head __rcu **bkt, + struct rhash_head *obj) + { +- struct rhash_head __rcu **p = (struct rhash_head __rcu **)bkt; +- + if (rht_is_a_nulls(obj)) + obj = NULL; + lock_map_release(&tbl->dep_map); +- rcu_assign_pointer(*p, obj); ++ rcu_assign_pointer(*bkt, (void *)obj); + preempt_enable(); + __release(bitlock); + local_bh_enable(); +@@ -594,7 +586,7 @@ static inline struct rhash_head *__rhash + .ht = ht, + .key = key, + }; +- struct rhash_lock_head *const *bkt; ++ struct rhash_lock_head __rcu *const *bkt; + struct bucket_table *tbl; + struct rhash_head *he; + unsigned int hash; +@@ -710,7 +702,7 @@ static inline void *__rhashtable_insert_ + .ht = ht, + .key = key, + }; +- struct rhash_lock_head **bkt; ++ struct rhash_lock_head __rcu **bkt; + struct rhash_head __rcu **pprev; + struct bucket_table *tbl; + struct rhash_head *head; +@@ -996,7 +988,7 @@ static inline int __rhashtable_remove_fa + struct rhash_head *obj, const struct rhashtable_params params, + bool rhlist) + { +- struct rhash_lock_head **bkt; ++ struct rhash_lock_head __rcu **bkt; + struct rhash_head __rcu **pprev; + struct rhash_head *he; + unsigned int hash; +@@ -1148,7 +1140,7 @@ static inline int __rhashtable_replace_f + struct rhash_head *obj_old, struct rhash_head *obj_new, + const struct rhashtable_params params) + { +- struct rhash_lock_head **bkt; ++ struct rhash_lock_head __rcu **bkt; + struct rhash_head __rcu **pprev; + struct rhash_head *he; + unsigned int hash; +--- a/lib/rhashtable.c ++++ b/lib/rhashtable.c +@@ -31,7 +31,7 @@ + + union nested_table { + union nested_table __rcu *table; +- struct rhash_lock_head *bucket; ++ struct rhash_lock_head __rcu *bucket; + }; + + static u32 head_hashfn(struct rhashtable *ht, +@@ -213,7 +213,7 @@ static struct bucket_table *rhashtable_l + } + + static int rhashtable_rehash_one(struct rhashtable *ht, +- struct rhash_lock_head **bkt, ++ struct rhash_lock_head __rcu **bkt, + unsigned int old_hash) + { + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); +@@ -266,7 +266,7 @@ static int rhashtable_rehash_chain(struc + unsigned int old_hash) + { + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); +- struct rhash_lock_head **bkt = rht_bucket_var(old_tbl, old_hash); ++ struct rhash_lock_head __rcu **bkt = rht_bucket_var(old_tbl, old_hash); + int err; + + if (!bkt) +@@ -476,7 +476,7 @@ fail: + } + + static void *rhashtable_lookup_one(struct rhashtable *ht, +- struct rhash_lock_head **bkt, ++ struct rhash_lock_head __rcu **bkt, + struct bucket_table *tbl, unsigned int hash, + const void *key, struct rhash_head *obj) + { +@@ -526,12 +526,10 @@ static void *rhashtable_lookup_one(struc + return ERR_PTR(-ENOENT); + } + +-static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, +- struct rhash_lock_head **bkt, +- struct bucket_table *tbl, +- unsigned int hash, +- struct rhash_head *obj, +- void *data) ++static struct bucket_table *rhashtable_insert_one( ++ struct rhashtable *ht, struct rhash_lock_head __rcu **bkt, ++ struct bucket_table *tbl, unsigned int hash, struct rhash_head *obj, ++ void *data) + { + struct bucket_table *new_tbl; + struct rhash_head *head; +@@ -582,7 +580,7 @@ static void *rhashtable_try_insert(struc + { + struct bucket_table *new_tbl; + struct bucket_table *tbl; +- struct rhash_lock_head **bkt; ++ struct rhash_lock_head __rcu **bkt; + unsigned int hash; + void *data; + +@@ -1164,8 +1162,8 @@ void rhashtable_destroy(struct rhashtabl + } + EXPORT_SYMBOL_GPL(rhashtable_destroy); + +-struct rhash_lock_head **__rht_bucket_nested(const struct bucket_table *tbl, +- unsigned int hash) ++struct rhash_lock_head __rcu **__rht_bucket_nested( ++ const struct bucket_table *tbl, unsigned int hash) + { + const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); + unsigned int index = hash & ((1 << tbl->nest) - 1); +@@ -1193,10 +1191,10 @@ struct rhash_lock_head **__rht_bucket_ne + } + EXPORT_SYMBOL_GPL(__rht_bucket_nested); + +-struct rhash_lock_head **rht_bucket_nested(const struct bucket_table *tbl, +- unsigned int hash) ++struct rhash_lock_head __rcu **rht_bucket_nested( ++ const struct bucket_table *tbl, unsigned int hash) + { +- static struct rhash_lock_head *rhnull; ++ static struct rhash_lock_head __rcu *rhnull; + + if (!rhnull) + INIT_RHT_NULLS_HEAD(rhnull); +@@ -1204,9 +1202,8 @@ struct rhash_lock_head **rht_bucket_nest + } + EXPORT_SYMBOL_GPL(rht_bucket_nested); + +-struct rhash_lock_head **rht_bucket_nested_insert(struct rhashtable *ht, +- struct bucket_table *tbl, +- unsigned int hash) ++struct rhash_lock_head __rcu **rht_bucket_nested_insert( ++ struct rhashtable *ht, struct bucket_table *tbl, unsigned int hash) + { + const unsigned int shift = PAGE_SHIFT - ilog2(sizeof(void *)); + unsigned int index = hash & ((1 << tbl->nest) - 1); diff --git a/queue-5.7/rxrpc-fix-race-between-recvmsg-and-sendmsg-on-immediate-call-failure.patch b/queue-5.7/rxrpc-fix-race-between-recvmsg-and-sendmsg-on-immediate-call-failure.patch new file mode 100644 index 00000000000..7f9b6aeb44c --- /dev/null +++ b/queue-5.7/rxrpc-fix-race-between-recvmsg-and-sendmsg-on-immediate-call-failure.patch @@ -0,0 +1,166 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: David Howells +Date: Wed, 29 Jul 2020 00:03:56 +0100 +Subject: rxrpc: Fix race between recvmsg and sendmsg on immediate call failure + +From: David Howells + +[ Upstream commit 65550098c1c4db528400c73acf3e46bfa78d9264 ] + +There's a race between rxrpc_sendmsg setting up a call, but then failing to +send anything on it due to an error, and recvmsg() seeing the call +completion occur and trying to return the state to the user. + +An assertion fails in rxrpc_recvmsg() because the call has already been +released from the socket and is about to be released again as recvmsg deals +with it. (The recvmsg_q queue on the socket holds a ref, so there's no +problem with use-after-free.) + +We also have to be careful not to end up reporting an error twice, in such +a way that both returns indicate to userspace that the user ID supplied +with the call is no longer in use - which could cause the client to +malfunction if it recycles the user ID fast enough. + +Fix this by the following means: + + (1) When sendmsg() creates a call after the point that the call has been + successfully added to the socket, don't return any errors through + sendmsg(), but rather complete the call and let recvmsg() retrieve + them. Make sendmsg() return 0 at this point. Further calls to + sendmsg() for that call will fail with ESHUTDOWN. + + Note that at this point, we haven't send any packets yet, so the + server doesn't yet know about the call. + + (2) If sendmsg() returns an error when it was expected to create a new + call, it means that the user ID wasn't used. + + (3) Mark the call disconnected before marking it completed to prevent an + oops in rxrpc_release_call(). + + (4) recvmsg() will then retrieve the error and set MSG_EOR to indicate + that the user ID is no longer known by the kernel. + +An oops like the following is produced: + + kernel BUG at net/rxrpc/recvmsg.c:605! + ... + RIP: 0010:rxrpc_recvmsg+0x256/0x5ae + ... + Call Trace: + ? __init_waitqueue_head+0x2f/0x2f + ____sys_recvmsg+0x8a/0x148 + ? import_iovec+0x69/0x9c + ? copy_msghdr_from_user+0x5c/0x86 + ___sys_recvmsg+0x72/0xaa + ? __fget_files+0x22/0x57 + ? __fget_light+0x46/0x51 + ? fdget+0x9/0x1b + do_recvmmsg+0x15e/0x232 + ? _raw_spin_unlock+0xa/0xb + ? vtime_delta+0xf/0x25 + __x64_sys_recvmmsg+0x2c/0x2f + do_syscall_64+0x4c/0x78 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Fixes: 357f5ef64628 ("rxrpc: Call rxrpc_release_call() on error in rxrpc_new_client_call()") +Reported-by: syzbot+b54969381df354936d96@syzkaller.appspotmail.com +Signed-off-by: David Howells +Reviewed-by: Marc Dionne +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rxrpc/call_object.c | 27 +++++++++++++++++++-------- + net/rxrpc/conn_object.c | 8 +++++--- + net/rxrpc/recvmsg.c | 2 +- + net/rxrpc/sendmsg.c | 3 +++ + 4 files changed, 28 insertions(+), 12 deletions(-) + +--- a/net/rxrpc/call_object.c ++++ b/net/rxrpc/call_object.c +@@ -288,7 +288,7 @@ struct rxrpc_call *rxrpc_new_client_call + */ + ret = rxrpc_connect_call(rx, call, cp, srx, gfp); + if (ret < 0) +- goto error; ++ goto error_attached_to_socket; + + trace_rxrpc_call(call->debug_id, rxrpc_call_connected, + atomic_read(&call->usage), here, NULL); +@@ -308,18 +308,29 @@ struct rxrpc_call *rxrpc_new_client_call + error_dup_user_ID: + write_unlock(&rx->call_lock); + release_sock(&rx->sk); +- ret = -EEXIST; +- +-error: + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, +- RX_CALL_DEAD, ret); ++ RX_CALL_DEAD, -EEXIST); + trace_rxrpc_call(call->debug_id, rxrpc_call_error, +- atomic_read(&call->usage), here, ERR_PTR(ret)); ++ atomic_read(&call->usage), here, ERR_PTR(-EEXIST)); + rxrpc_release_call(rx, call); + mutex_unlock(&call->user_mutex); + rxrpc_put_call(call, rxrpc_call_put); +- _leave(" = %d", ret); +- return ERR_PTR(ret); ++ _leave(" = -EEXIST"); ++ return ERR_PTR(-EEXIST); ++ ++ /* We got an error, but the call is attached to the socket and is in ++ * need of release. However, we might now race with recvmsg() when ++ * completing the call queues it. Return 0 from sys_sendmsg() and ++ * leave the error to recvmsg() to deal with. ++ */ ++error_attached_to_socket: ++ trace_rxrpc_call(call->debug_id, rxrpc_call_error, ++ atomic_read(&call->usage), here, ERR_PTR(ret)); ++ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); ++ __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, ++ RX_CALL_DEAD, ret); ++ _leave(" = c=%08x [err]", call->debug_id); ++ return call; + } + + /* +--- a/net/rxrpc/conn_object.c ++++ b/net/rxrpc/conn_object.c +@@ -212,9 +212,11 @@ void rxrpc_disconnect_call(struct rxrpc_ + + call->peer->cong_cwnd = call->cong_cwnd; + +- spin_lock_bh(&conn->params.peer->lock); +- hlist_del_rcu(&call->error_link); +- spin_unlock_bh(&conn->params.peer->lock); ++ if (!hlist_unhashed(&call->error_link)) { ++ spin_lock_bh(&call->peer->lock); ++ hlist_del_rcu(&call->error_link); ++ spin_unlock_bh(&call->peer->lock); ++ } + + if (rxrpc_is_client_call(call)) + return rxrpc_disconnect_client_call(call); +--- a/net/rxrpc/recvmsg.c ++++ b/net/rxrpc/recvmsg.c +@@ -541,7 +541,7 @@ try_again: + goto error_unlock_call; + } + +- if (msg->msg_name) { ++ if (msg->msg_name && call->peer) { + struct sockaddr_rxrpc *srx = msg->msg_name; + size_t len = sizeof(call->peer->srx); + +--- a/net/rxrpc/sendmsg.c ++++ b/net/rxrpc/sendmsg.c +@@ -683,6 +683,9 @@ int rxrpc_do_sendmsg(struct rxrpc_sock * + if (IS_ERR(call)) + return PTR_ERR(call); + /* ... and we have the call lock. */ ++ ret = 0; ++ if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) ++ goto out_put_unlock; + } else { + switch (READ_ONCE(call->state)) { + case RXRPC_CALL_UNINITIALISED: diff --git a/queue-5.7/selftests-net-relax-cpu-affinity-requirement-in-msg_zerocopy-test.patch b/queue-5.7/selftests-net-relax-cpu-affinity-requirement-in-msg_zerocopy-test.patch new file mode 100644 index 00000000000..869bddabcd1 --- /dev/null +++ b/queue-5.7/selftests-net-relax-cpu-affinity-requirement-in-msg_zerocopy-test.patch @@ -0,0 +1,46 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Willem de Bruijn +Date: Wed, 5 Aug 2020 04:40:45 -0400 +Subject: selftests/net: relax cpu affinity requirement in msg_zerocopy test + +From: Willem de Bruijn + +[ Upstream commit 16f6458f2478b55e2b628797bc81a4455045c74e ] + +The msg_zerocopy test pins the sender and receiver threads to separate +cores to reduce variance between runs. + +But it hardcodes the cores and skips core 0, so it fails on machines +with the selected cores offline, or simply fewer cores. + +The test mainly gives code coverage in automated runs. The throughput +of zerocopy ('-z') and non-zerocopy runs is logged for manual +inspection. + +Continue even when sched_setaffinity fails. Just log to warn anyone +interpreting the data. + +Fixes: 07b65c5b31ce ("test: add msg_zerocopy test") +Reported-by: Colin Ian King +Signed-off-by: Willem de Bruijn +Acked-by: Colin Ian King +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + tools/testing/selftests/net/msg_zerocopy.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/tools/testing/selftests/net/msg_zerocopy.c ++++ b/tools/testing/selftests/net/msg_zerocopy.c +@@ -125,9 +125,8 @@ static int do_setcpu(int cpu) + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask)) +- error(1, 0, "setaffinity %d", cpu); +- +- if (cfg_verbose) ++ fprintf(stderr, "cpu: unable to pin, may increase variance.\n"); ++ else if (cfg_verbose) + fprintf(stderr, "cpu: %u\n", cpu); + + return 0; diff --git a/queue-5.7/series b/queue-5.7/series index c7fcf9ca0a0..2e74a1d2d9b 100644 --- a/queue-5.7/series +++ b/queue-5.7/series @@ -49,3 +49,29 @@ cfg80211-check-vendor-command-doit-pointer-before-us.patch igb-reinit_locked-should-be-called-with-rtnl_lock.patch atm-fix-atm_dev-refcnt-leaks-in-atmtcp_remove_persis.patch tools-lib-traceevent-fix-memory-leak-in-process_dyna.patch +xattr-break-delegations-in-set-remove-xattr.patch +revert-powerpc-kasan-fix-shadow-pages-allocation-failure.patch +pci-tegra-revert-tegra124-raw_violation_fixup.patch +ipv4-silence-suspicious-rcu-usage-warning.patch +ipv6-fix-memory-leaks-on-ipv6_addrform-path.patch +ipv6-fix-nexthop-refcnt-leak-when-creating-ipv6-route-info.patch +rxrpc-fix-race-between-recvmsg-and-sendmsg-on-immediate-call-failure.patch +vxlan-ensure-fdb-dump-is-performed-under-rcu.patch +net-lan78xx-replace-bogus-endpoint-lookup.patch +rhashtable-restore-rcu-marking-on-rhash_lock_head.patch +devlink-ignore-eopnotsupp-errors-on-dumpit.patch +appletalk-fix-atalk_proc_init-return-path.patch +dpaa2-eth-fix-passing-zero-to-ptr_err-warning.patch +hv_netvsc-do-not-use-vf-device-if-link-is-down.patch +net-bridge-clear-bridge-s-private-skb-space-on-xmit.patch +net-gre-recompute-gre-csum-for-sctp-over-gre-tunnels.patch +net-macb-properly-handle-phylink-on-at91sam9x.patch +net-mvpp2-fix-memory-leak-in-mvpp2_rx.patch +net-sched-act_ct-fix-miss-set-mru-for-ovs-after-defrag-in-act_ct.patch +net-thunderx-use-spin_lock_bh-in-nicvf_set_rx_mode_task.patch +openvswitch-prevent-kernel-infoleak-in-ovs_ct_put_key.patch +revert-vxlan-fix-tos-value-before-xmit.patch +selftests-net-relax-cpu-affinity-requirement-in-msg_zerocopy-test.patch +tcp-apply-a-floor-of-1-for-rtt-samples-from-tcp-timestamps.patch +mptcp-be-careful-on-subflow-creation.patch +mptcp-fix-bogus-sendmsg-return-code-under-pressure.patch diff --git a/queue-5.7/tcp-apply-a-floor-of-1-for-rtt-samples-from-tcp-timestamps.patch b/queue-5.7/tcp-apply-a-floor-of-1-for-rtt-samples-from-tcp-timestamps.patch new file mode 100644 index 00000000000..3443b5bc2ab --- /dev/null +++ b/queue-5.7/tcp-apply-a-floor-of-1-for-rtt-samples-from-tcp-timestamps.patch @@ -0,0 +1,54 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Jianfeng Wang +Date: Thu, 30 Jul 2020 23:49:16 +0000 +Subject: tcp: apply a floor of 1 for RTT samples from TCP timestamps + +From: Jianfeng Wang + +[ Upstream commit 730e700e2c19d87e578ff0e7d8cb1d4a02b036d2 ] + +For retransmitted packets, TCP needs to resort to using TCP timestamps +for computing RTT samples. In the common case where the data and ACK +fall in the same 1-millisecond interval, TCP senders with millisecond- +granularity TCP timestamps compute a ca_rtt_us of 0. This ca_rtt_us +of 0 propagates to rs->rtt_us. + +This value of 0 can cause performance problems for congestion control +modules. For example, in BBR, the zero min_rtt sample can bring the +min_rtt and BDP estimate down to 0, reduce snd_cwnd and result in a +low throughput. It would be hard to mitigate this with filtering in +the congestion control module, because the proper floor to apply would +depend on the method of RTT sampling (using timestamp options or +internally-saved transmission timestamps). + +This fix applies a floor of 1 for the RTT sample delta from TCP +timestamps, so that seq_rtt_us, ca_rtt_us, and rs->rtt_us will be at +least 1 * (USEC_PER_SEC / TCP_TS_HZ). + +Note that the receiver RTT computation in tcp_rcv_rtt_measure() and +min_rtt computation in tcp_update_rtt_min() both already apply a floor +of 1 timestamp tick, so this commit makes the code more consistent in +avoiding this edge case of a value of 0. + +Signed-off-by: Jianfeng Wang +Signed-off-by: Neal Cardwell +Signed-off-by: Eric Dumazet +Acked-by: Kevin Yang +Acked-by: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2945,6 +2945,8 @@ static bool tcp_ack_update_rtt(struct so + u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; + + if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { ++ if (!delta) ++ delta = 1; + seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + ca_rtt_us = seq_rtt_us; + } diff --git a/queue-5.7/vxlan-ensure-fdb-dump-is-performed-under-rcu.patch b/queue-5.7/vxlan-ensure-fdb-dump-is-performed-under-rcu.patch new file mode 100644 index 00000000000..2887c552521 --- /dev/null +++ b/queue-5.7/vxlan-ensure-fdb-dump-is-performed-under-rcu.patch @@ -0,0 +1,96 @@ +From foo@baz Mon 10 Aug 2020 02:00:10 PM CEST +From: Ido Schimmel +Date: Wed, 29 Jul 2020 11:34:36 +0300 +Subject: vxlan: Ensure FDB dump is performed under RCU + +From: Ido Schimmel + +[ Upstream commit b5141915b5aec3b29a63db869229e3741ebce258 ] + +The commit cited below removed the RCU read-side critical section from +rtnl_fdb_dump() which means that the ndo_fdb_dump() callback is invoked +without RCU protection. + +This results in the following warning [1] in the VXLAN driver, which +relied on the callback being invoked from an RCU read-side critical +section. + +Fix this by calling rcu_read_lock() in the VXLAN driver, as already done +in the bridge driver. + +[1] +WARNING: suspicious RCU usage +5.8.0-rc4-custom-01521-g481007553ce6 #29 Not tainted +----------------------------- +drivers/net/vxlan.c:1379 RCU-list traversed in non-reader section!! + +other info that might help us debug this: + +rcu_scheduler_active = 2, debug_locks = 1 +1 lock held by bridge/166: + #0: ffffffff85a27850 (rtnl_mutex){+.+.}-{3:3}, at: netlink_dump+0xea/0x1090 + +stack backtrace: +CPU: 1 PID: 166 Comm: bridge Not tainted 5.8.0-rc4-custom-01521-g481007553ce6 #29 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014 +Call Trace: + dump_stack+0x100/0x184 + lockdep_rcu_suspicious+0x153/0x15d + vxlan_fdb_dump+0x51e/0x6d0 + rtnl_fdb_dump+0x4dc/0xad0 + netlink_dump+0x540/0x1090 + __netlink_dump_start+0x695/0x950 + rtnetlink_rcv_msg+0x802/0xbd0 + netlink_rcv_skb+0x17a/0x480 + rtnetlink_rcv+0x22/0x30 + netlink_unicast+0x5ae/0x890 + netlink_sendmsg+0x98a/0xf40 + __sys_sendto+0x279/0x3b0 + __x64_sys_sendto+0xe6/0x1a0 + do_syscall_64+0x54/0xa0 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 +RIP: 0033:0x7fe14fa2ade0 +Code: Bad RIP value. +RSP: 002b:00007fff75bb5b88 EFLAGS: 00000246 ORIG_RAX: 000000000000002c +RAX: ffffffffffffffda RBX: 00005614b1ba0020 RCX: 00007fe14fa2ade0 +RDX: 000000000000011c RSI: 00007fff75bb5b90 RDI: 0000000000000003 +RBP: 00007fff75bb5b90 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00005614b1b89160 +R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 + +Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl") +Signed-off-by: Ido Schimmel +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vxlan.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/net/vxlan.c ++++ b/drivers/net/vxlan.c +@@ -1225,6 +1225,7 @@ static int vxlan_fdb_dump(struct sk_buff + for (h = 0; h < FDB_HASH_SIZE; ++h) { + struct vxlan_fdb *f; + ++ rcu_read_lock(); + hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { + struct vxlan_rdst *rd; + +@@ -1237,12 +1238,15 @@ static int vxlan_fdb_dump(struct sk_buff + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, + NLM_F_MULTI, rd); +- if (err < 0) ++ if (err < 0) { ++ rcu_read_unlock(); + goto out; ++ } + skip: + *idx += 1; + } + } ++ rcu_read_unlock(); + } + out: + return err; diff --git a/queue-5.7/xattr-break-delegations-in-set-remove-xattr.patch b/queue-5.7/xattr-break-delegations-in-set-remove-xattr.patch new file mode 100644 index 00000000000..a0990537cb5 --- /dev/null +++ b/queue-5.7/xattr-break-delegations-in-set-remove-xattr.patch @@ -0,0 +1,181 @@ +From 08b5d5014a27e717826999ad20e394a8811aae92 Mon Sep 17 00:00:00 2001 +From: Frank van der Linden +Date: Tue, 23 Jun 2020 22:39:18 +0000 +Subject: xattr: break delegations in {set,remove}xattr + +From: Frank van der Linden + +commit 08b5d5014a27e717826999ad20e394a8811aae92 upstream. + +set/removexattr on an exported filesystem should break NFS delegations. +This is true in general, but also for the upcoming support for +RFC 8726 (NFSv4 extended attribute support). Make sure that they do. + +Additionally, they need to grow a _locked variant, since callers might +call this with i_rwsem held (like the NFS server code). + +Cc: stable@vger.kernel.org # v4.9+ +Cc: linux-fsdevel@vger.kernel.org +Cc: Al Viro +Signed-off-by: Frank van der Linden +Signed-off-by: Chuck Lever +Signed-off-by: Greg Kroah-Hartman + +--- + fs/xattr.c | 84 +++++++++++++++++++++++++++++++++++++++++++++----- + include/linux/xattr.h | 2 + + 2 files changed, 79 insertions(+), 7 deletions(-) + +--- a/fs/xattr.c ++++ b/fs/xattr.c +@@ -204,10 +204,22 @@ int __vfs_setxattr_noperm(struct dentry + return error; + } + +- ++/** ++ * __vfs_setxattr_locked: set an extended attribute while holding the inode ++ * lock ++ * ++ * @dentry - object to perform setxattr on ++ * @name - xattr name to set ++ * @value - value to set @name to ++ * @size - size of @value ++ * @flags - flags to pass into filesystem operations ++ * @delegated_inode - on return, will contain an inode pointer that ++ * a delegation was broken on, NULL if none. ++ */ + int +-vfs_setxattr(struct dentry *dentry, const char *name, const void *value, +- size_t size, int flags) ++__vfs_setxattr_locked(struct dentry *dentry, const char *name, ++ const void *value, size_t size, int flags, ++ struct inode **delegated_inode) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -216,15 +228,40 @@ vfs_setxattr(struct dentry *dentry, cons + if (error) + return error; + +- inode_lock(inode); + error = security_inode_setxattr(dentry, name, value, size, flags); + if (error) + goto out; + ++ error = try_break_deleg(inode, delegated_inode); ++ if (error) ++ goto out; ++ + error = __vfs_setxattr_noperm(dentry, name, value, size, flags); + + out: ++ return error; ++} ++EXPORT_SYMBOL_GPL(__vfs_setxattr_locked); ++ ++int ++vfs_setxattr(struct dentry *dentry, const char *name, const void *value, ++ size_t size, int flags) ++{ ++ struct inode *inode = dentry->d_inode; ++ struct inode *delegated_inode = NULL; ++ int error; ++ ++retry_deleg: ++ inode_lock(inode); ++ error = __vfs_setxattr_locked(dentry, name, value, size, flags, ++ &delegated_inode); + inode_unlock(inode); ++ ++ if (delegated_inode) { ++ error = break_deleg_wait(&delegated_inode); ++ if (!error) ++ goto retry_deleg; ++ } + return error; + } + EXPORT_SYMBOL_GPL(vfs_setxattr); +@@ -378,8 +415,18 @@ __vfs_removexattr(struct dentry *dentry, + } + EXPORT_SYMBOL(__vfs_removexattr); + ++/** ++ * __vfs_removexattr_locked: set an extended attribute while holding the inode ++ * lock ++ * ++ * @dentry - object to perform setxattr on ++ * @name - name of xattr to remove ++ * @delegated_inode - on return, will contain an inode pointer that ++ * a delegation was broken on, NULL if none. ++ */ + int +-vfs_removexattr(struct dentry *dentry, const char *name) ++__vfs_removexattr_locked(struct dentry *dentry, const char *name, ++ struct inode **delegated_inode) + { + struct inode *inode = dentry->d_inode; + int error; +@@ -388,11 +435,14 @@ vfs_removexattr(struct dentry *dentry, c + if (error) + return error; + +- inode_lock(inode); + error = security_inode_removexattr(dentry, name); + if (error) + goto out; + ++ error = try_break_deleg(inode, delegated_inode); ++ if (error) ++ goto out; ++ + error = __vfs_removexattr(dentry, name); + + if (!error) { +@@ -401,12 +451,32 @@ vfs_removexattr(struct dentry *dentry, c + } + + out: ++ return error; ++} ++EXPORT_SYMBOL_GPL(__vfs_removexattr_locked); ++ ++int ++vfs_removexattr(struct dentry *dentry, const char *name) ++{ ++ struct inode *inode = dentry->d_inode; ++ struct inode *delegated_inode = NULL; ++ int error; ++ ++retry_deleg: ++ inode_lock(inode); ++ error = __vfs_removexattr_locked(dentry, name, &delegated_inode); + inode_unlock(inode); ++ ++ if (delegated_inode) { ++ error = break_deleg_wait(&delegated_inode); ++ if (!error) ++ goto retry_deleg; ++ } ++ + return error; + } + EXPORT_SYMBOL_GPL(vfs_removexattr); + +- + /* + * Extended attribute SET operations + */ +--- a/include/linux/xattr.h ++++ b/include/linux/xattr.h +@@ -52,8 +52,10 @@ ssize_t vfs_getxattr(struct dentry *, co + ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); + int __vfs_setxattr(struct dentry *, struct inode *, const char *, const void *, size_t, int); + int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int); ++int __vfs_setxattr_locked(struct dentry *, const char *, const void *, size_t, int, struct inode **); + int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); + int __vfs_removexattr(struct dentry *, const char *); ++int __vfs_removexattr_locked(struct dentry *, const char *, struct inode **); + int vfs_removexattr(struct dentry *, const char *); + + ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size);