]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 10 Aug 2020 12:08:21 +0000 (14:08 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 10 Aug 2020 12:08:21 +0000 (14:08 +0200)
added patches:
appletalk-fix-atalk_proc_init-return-path.patch
dpaa2-eth-fix-passing-zero-to-ptr_err-warning.patch
hv_netvsc-do-not-use-vf-device-if-link-is-down.patch
ipv4-silence-suspicious-rcu-usage-warning.patch
ipv6-fix-memory-leaks-on-ipv6_addrform-path.patch
ipv6-fix-nexthop-refcnt-leak-when-creating-ipv6-route-info.patch
net-ethernet-mtk_eth_soc-fix-mtu-warnings.patch
net-gre-recompute-gre-csum-for-sctp-over-gre-tunnels.patch
net-lan78xx-replace-bogus-endpoint-lookup.patch
net-thunderx-use-spin_lock_bh-in-nicvf_set_rx_mode_task.patch
openvswitch-prevent-kernel-infoleak-in-ovs_ct_put_key.patch
pci-tegra-revert-tegra124-raw_violation_fixup.patch
revert-powerpc-kasan-fix-shadow-pages-allocation-failure.patch
revert-vxlan-fix-tos-value-before-xmit.patch
rxrpc-fix-race-between-recvmsg-and-sendmsg-on-immediate-call-failure.patch
selftests-net-relax-cpu-affinity-requirement-in-msg_zerocopy-test.patch
tcp-apply-a-floor-of-1-for-rtt-samples-from-tcp-timestamps.patch
vxlan-ensure-fdb-dump-is-performed-under-rcu.patch
xattr-break-delegations-in-set-remove-xattr.patch

20 files changed:
queue-5.4/appletalk-fix-atalk_proc_init-return-path.patch [new file with mode: 0644]
queue-5.4/dpaa2-eth-fix-passing-zero-to-ptr_err-warning.patch [new file with mode: 0644]
queue-5.4/hv_netvsc-do-not-use-vf-device-if-link-is-down.patch [new file with mode: 0644]
queue-5.4/ipv4-silence-suspicious-rcu-usage-warning.patch [new file with mode: 0644]
queue-5.4/ipv6-fix-memory-leaks-on-ipv6_addrform-path.patch [new file with mode: 0644]
queue-5.4/ipv6-fix-nexthop-refcnt-leak-when-creating-ipv6-route-info.patch [new file with mode: 0644]
queue-5.4/net-ethernet-mtk_eth_soc-fix-mtu-warnings.patch [new file with mode: 0644]
queue-5.4/net-gre-recompute-gre-csum-for-sctp-over-gre-tunnels.patch [new file with mode: 0644]
queue-5.4/net-lan78xx-replace-bogus-endpoint-lookup.patch [new file with mode: 0644]
queue-5.4/net-thunderx-use-spin_lock_bh-in-nicvf_set_rx_mode_task.patch [new file with mode: 0644]
queue-5.4/openvswitch-prevent-kernel-infoleak-in-ovs_ct_put_key.patch [new file with mode: 0644]
queue-5.4/pci-tegra-revert-tegra124-raw_violation_fixup.patch [new file with mode: 0644]
queue-5.4/revert-powerpc-kasan-fix-shadow-pages-allocation-failure.patch [new file with mode: 0644]
queue-5.4/revert-vxlan-fix-tos-value-before-xmit.patch [new file with mode: 0644]
queue-5.4/rxrpc-fix-race-between-recvmsg-and-sendmsg-on-immediate-call-failure.patch [new file with mode: 0644]
queue-5.4/selftests-net-relax-cpu-affinity-requirement-in-msg_zerocopy-test.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/tcp-apply-a-floor-of-1-for-rtt-samples-from-tcp-timestamps.patch [new file with mode: 0644]
queue-5.4/vxlan-ensure-fdb-dump-is-performed-under-rcu.patch [new file with mode: 0644]
queue-5.4/xattr-break-delegations-in-set-remove-xattr.patch [new file with mode: 0644]

diff --git a/queue-5.4/appletalk-fix-atalk_proc_init-return-path.patch b/queue-5.4/appletalk-fix-atalk_proc_init-return-path.patch
new file mode 100644 (file)
index 0000000..1479f1c
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Vincent Duvert <vincent.ldev@duvert.net>
+Date: Sun, 2 Aug 2020 07:06:51 +0200
+Subject: appletalk: Fix atalk_proc_init() return path
+
+From: Vincent Duvert <vincent.ldev@duvert.net>
+
+[ Upstream commit d0f6ba2ef2c1c95069509e71402e7d6d43452512 ]
+
+Add a missing return statement to atalk_proc_init so it doesn't return
+-ENOMEM when successful.  This allows the appletalk module to load
+properly.
+
+Fixes: e2bcd8b0ce6e ("appletalk: use remove_proc_subtree to simplify procfs code")
+Link: https://www.downtowndougbrown.com/2020/08/hacking-up-a-fix-for-the-broken-appletalk-kernel-module-in-linux-5-1-and-newer/
+Reported-by: Christopher KOBAYASHI <chris@disavowed.jp>
+Reported-by: Doug Brown <doug@downtowndougbrown.com>
+Signed-off-by: Vincent Duvert <vincent.ldev@duvert.net>
+[lukas: add missing tags]
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Cc: stable@vger.kernel.org # v5.1+
+Cc: Yue Haibing <yuehaibing@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/appletalk/atalk_proc.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/appletalk/atalk_proc.c
++++ b/net/appletalk/atalk_proc.c
+@@ -229,6 +229,8 @@ int __init atalk_proc_init(void)
+                                    sizeof(struct aarp_iter_state), NULL))
+               goto out;
++      return 0;
++
+ out:
+       remove_proc_subtree("atalk", init_net.proc_net);
+       return -ENOMEM;
diff --git a/queue-5.4/dpaa2-eth-fix-passing-zero-to-ptr_err-warning.patch b/queue-5.4/dpaa2-eth-fix-passing-zero-to-ptr_err-warning.patch
new file mode 100644 (file)
index 0000000..72c6765
--- /dev/null
@@ -0,0 +1,47 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: YueHaibing <yuehaibing@huawei.com>
+Date: Tue, 4 Aug 2020 21:26:43 +0800
+Subject: dpaa2-eth: Fix passing zero to 'PTR_ERR' warning
+
+From: YueHaibing <yuehaibing@huawei.com>
+
+[ Upstream commit 02afa9c66bb954c6959877c70d9e128dcf0adce7 ]
+
+Fix smatch warning:
+
+drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c:2419
+ alloc_channel() warn: passing zero to 'ERR_PTR'
+
+setup_dpcon() should return ERR_PTR(err) instead of zero in error
+handling case.
+
+Fixes: d7f5a9d89a55 ("dpaa2-eth: defer probe on object allocate")
+Signed-off-by: YueHaibing <yuehaibing@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
++++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+@@ -2090,7 +2090,7 @@ close:
+ free:
+       fsl_mc_object_free(dpcon);
+-      return NULL;
++      return ERR_PTR(err);
+ }
+ static void free_dpcon(struct dpaa2_eth_priv *priv,
+@@ -2114,8 +2114,8 @@ alloc_channel(struct dpaa2_eth_priv *pri
+               return NULL;
+       channel->dpcon = setup_dpcon(priv);
+-      if (IS_ERR_OR_NULL(channel->dpcon)) {
+-              err = PTR_ERR_OR_ZERO(channel->dpcon);
++      if (IS_ERR(channel->dpcon)) {
++              err = PTR_ERR(channel->dpcon);
+               goto err_setup;
+       }
diff --git a/queue-5.4/hv_netvsc-do-not-use-vf-device-if-link-is-down.patch b/queue-5.4/hv_netvsc-do-not-use-vf-device-if-link-is-down.patch
new file mode 100644 (file)
index 0000000..d160971
--- /dev/null
@@ -0,0 +1,45 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Stephen Hemminger <stephen@networkplumber.org>
+Date: Tue, 4 Aug 2020 09:54:15 -0700
+Subject: hv_netvsc: do not use VF device if link is down
+
+From: Stephen Hemminger <stephen@networkplumber.org>
+
+[ Upstream commit 7c9864bbccc23e1812ac82966555d68c13ea4006 ]
+
+If the accelerated networking SRIOV VF device has lost carrier
+use the synthetic network device which is available as backup
+path. This is a rare case since if VF link goes down, normally
+the VMBus device will also loose external connectivity as well.
+But if the communication is between two VM's on the same host
+the VMBus device will still work.
+
+Reported-by: "Shah, Ashish N" <ashish.n.shah@intel.com>
+Fixes: 0c195567a8f6 ("netvsc: transparent VF management")
+Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
+Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/hyperv/netvsc_drv.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/hyperv/netvsc_drv.c
++++ b/drivers/net/hyperv/netvsc_drv.c
+@@ -531,12 +531,13 @@ static int netvsc_start_xmit(struct sk_b
+       u32 hash;
+       struct hv_page_buffer pb[MAX_PAGE_BUFFER_COUNT];
+-      /* if VF is present and up then redirect packets
+-       * already called with rcu_read_lock_bh
++      /* If VF is present and up then redirect packets to it.
++       * Skip the VF if it is marked down or has no carrier.
++       * If netpoll is in uses, then VF can not be used either.
+        */
+       vf_netdev = rcu_dereference_bh(net_device_ctx->vf_netdev);
+       if (vf_netdev && netif_running(vf_netdev) &&
+-          !netpoll_tx_running(net))
++          netif_carrier_ok(vf_netdev) && !netpoll_tx_running(net))
+               return netvsc_vf_xmit(net, vf_netdev, skb);
+       /* We will atmost need two pages to describe the rndis
diff --git a/queue-5.4/ipv4-silence-suspicious-rcu-usage-warning.patch b/queue-5.4/ipv4-silence-suspicious-rcu-usage-warning.patch
new file mode 100644 (file)
index 0000000..bba596f
--- /dev/null
@@ -0,0 +1,80 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Wed, 29 Jul 2020 11:37:13 +0300
+Subject: ipv4: Silence suspicious RCU usage warning
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit 83f3522860f702748143e022f1a546547314c715 ]
+
+fib_trie_unmerge() is called with RTNL held, but not from an RCU
+read-side critical section. This leads to the following warning [1] when
+the FIB alias list in a leaf is traversed with
+hlist_for_each_entry_rcu().
+
+Since the function is always called with RTNL held and since
+modification of the list is protected by RTNL, simply use
+hlist_for_each_entry() and silence the warning.
+
+[1]
+WARNING: suspicious RCU usage
+5.8.0-rc4-custom-01520-gc1f937f3f83b #30 Not tainted
+-----------------------------
+net/ipv4/fib_trie.c:1867 RCU-list traversed in non-reader section!!
+
+other info that might help us debug this:
+
+rcu_scheduler_active = 2, debug_locks = 1
+1 lock held by ip/164:
+ #0: ffffffff85a27850 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x49a/0xbd0
+
+stack backtrace:
+CPU: 0 PID: 164 Comm: ip Not tainted 5.8.0-rc4-custom-01520-gc1f937f3f83b #30
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014
+Call Trace:
+ dump_stack+0x100/0x184
+ lockdep_rcu_suspicious+0x153/0x15d
+ fib_trie_unmerge+0x608/0xdb0
+ fib_unmerge+0x44/0x360
+ fib4_rule_configure+0xc8/0xad0
+ fib_nl_newrule+0x37a/0x1dd0
+ rtnetlink_rcv_msg+0x4f7/0xbd0
+ netlink_rcv_skb+0x17a/0x480
+ rtnetlink_rcv+0x22/0x30
+ netlink_unicast+0x5ae/0x890
+ netlink_sendmsg+0x98a/0xf40
+ ____sys_sendmsg+0x879/0xa00
+ ___sys_sendmsg+0x122/0x190
+ __sys_sendmsg+0x103/0x1d0
+ __x64_sys_sendmsg+0x7d/0xb0
+ do_syscall_64+0x54/0xa0
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+RIP: 0033:0x7fc80a234e97
+Code: Bad RIP value.
+RSP: 002b:00007ffef8b66798 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fc80a234e97
+RDX: 0000000000000000 RSI: 00007ffef8b66800 RDI: 0000000000000003
+RBP: 000000005f141b1c R08: 0000000000000001 R09: 0000000000000000
+R10: 00007fc80a2a8ac0 R11: 0000000000000246 R12: 0000000000000001
+R13: 0000000000000000 R14: 00007ffef8b67008 R15: 0000556fccb10020
+
+Fixes: 0ddcf43d5d4a ("ipv4: FIB Local/MAIN table collapse")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reviewed-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_trie.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_trie.c
++++ b/net/ipv4/fib_trie.c
+@@ -1751,7 +1751,7 @@ struct fib_table *fib_trie_unmerge(struc
+       while ((l = leaf_walk_rcu(&tp, key)) != NULL) {
+               struct key_vector *local_l = NULL, *local_tp;
+-              hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
++              hlist_for_each_entry(fa, &l->leaf, fa_list) {
+                       struct fib_alias *new_fa;
+                       if (local_tb->tb_id != fa->tb_id)
diff --git a/queue-5.4/ipv6-fix-memory-leaks-on-ipv6_addrform-path.patch b/queue-5.4/ipv6-fix-memory-leaks-on-ipv6_addrform-path.patch
new file mode 100644 (file)
index 0000000..866e72e
--- /dev/null
@@ -0,0 +1,115 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Sat, 25 Jul 2020 15:40:53 -0700
+Subject: ipv6: fix memory leaks on IPV6_ADDRFORM path
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 8c0de6e96c9794cb523a516c465991a70245da1c ]
+
+IPV6_ADDRFORM causes resource leaks when converting an IPv6 socket
+to IPv4, particularly struct ipv6_ac_socklist. Similar to
+struct ipv6_mc_socklist, we should just close it on this path.
+
+This bug can be easily reproduced with the following C program:
+
+  #include <stdio.h>
+  #include <string.h>
+  #include <sys/types.h>
+  #include <sys/socket.h>
+  #include <arpa/inet.h>
+
+  int main()
+  {
+    int s, value;
+    struct sockaddr_in6 addr;
+    struct ipv6_mreq m6;
+
+    s = socket(AF_INET6, SOCK_DGRAM, 0);
+    addr.sin6_family = AF_INET6;
+    addr.sin6_port = htons(5000);
+    inet_pton(AF_INET6, "::ffff:192.168.122.194", &addr.sin6_addr);
+    connect(s, (struct sockaddr *)&addr, sizeof(addr));
+
+    inet_pton(AF_INET6, "fe80::AAAA", &m6.ipv6mr_multiaddr);
+    m6.ipv6mr_interface = 5;
+    setsockopt(s, SOL_IPV6, IPV6_JOIN_ANYCAST, &m6, sizeof(m6));
+
+    value = AF_INET;
+    setsockopt(s, SOL_IPV6, IPV6_ADDRFORM, &value, sizeof(value));
+
+    close(s);
+    return 0;
+  }
+
+Reported-by: ch3332xr@gmail.com
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/addrconf.h   |    1 +
+ net/ipv6/anycast.c       |   17 ++++++++++++-----
+ net/ipv6/ipv6_sockglue.c |    1 +
+ 3 files changed, 14 insertions(+), 5 deletions(-)
+
+--- a/include/net/addrconf.h
++++ b/include/net/addrconf.h
+@@ -273,6 +273,7 @@ int ipv6_sock_ac_join(struct sock *sk, i
+                     const struct in6_addr *addr);
+ int ipv6_sock_ac_drop(struct sock *sk, int ifindex,
+                     const struct in6_addr *addr);
++void __ipv6_sock_ac_close(struct sock *sk);
+ void ipv6_sock_ac_close(struct sock *sk);
+ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr);
+--- a/net/ipv6/anycast.c
++++ b/net/ipv6/anycast.c
+@@ -183,7 +183,7 @@ int ipv6_sock_ac_drop(struct sock *sk, i
+       return 0;
+ }
+-void ipv6_sock_ac_close(struct sock *sk)
++void __ipv6_sock_ac_close(struct sock *sk)
+ {
+       struct ipv6_pinfo *np = inet6_sk(sk);
+       struct net_device *dev = NULL;
+@@ -191,10 +191,7 @@ void ipv6_sock_ac_close(struct sock *sk)
+       struct net *net = sock_net(sk);
+       int     prev_index;
+-      if (!np->ipv6_ac_list)
+-              return;
+-
+-      rtnl_lock();
++      ASSERT_RTNL();
+       pac = np->ipv6_ac_list;
+       np->ipv6_ac_list = NULL;
+@@ -211,6 +208,16 @@ void ipv6_sock_ac_close(struct sock *sk)
+               sock_kfree_s(sk, pac, sizeof(*pac));
+               pac = next;
+       }
++}
++
++void ipv6_sock_ac_close(struct sock *sk)
++{
++      struct ipv6_pinfo *np = inet6_sk(sk);
++
++      if (!np->ipv6_ac_list)
++              return;
++      rtnl_lock();
++      __ipv6_sock_ac_close(sk);
+       rtnl_unlock();
+ }
+--- a/net/ipv6/ipv6_sockglue.c
++++ b/net/ipv6/ipv6_sockglue.c
+@@ -205,6 +205,7 @@ static int do_ipv6_setsockopt(struct soc
+                       fl6_free_socklist(sk);
+                       __ipv6_sock_mc_close(sk);
++                      __ipv6_sock_ac_close(sk);
+                       /*
+                        * Sock is moving from IPv6 to IPv4 (sk_prot), so
diff --git a/queue-5.4/ipv6-fix-nexthop-refcnt-leak-when-creating-ipv6-route-info.patch b/queue-5.4/ipv6-fix-nexthop-refcnt-leak-when-creating-ipv6-route-info.patch
new file mode 100644 (file)
index 0000000..28244b4
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Xiyu Yang <xiyuyang19@fudan.edu.cn>
+Date: Sat, 25 Jul 2020 16:02:18 +0800
+Subject: ipv6: Fix nexthop refcnt leak when creating ipv6 route info
+
+From: Xiyu Yang <xiyuyang19@fudan.edu.cn>
+
+[ Upstream commit 706ec919164622ff5ce822065472d0f30a9e9dd2 ]
+
+ip6_route_info_create() invokes nexthop_get(), which increases the
+refcount of the "nh".
+
+When ip6_route_info_create() returns, local variable "nh" becomes
+invalid, so the refcount should be decreased to keep refcount balanced.
+
+The reference counting issue happens in one exception handling path of
+ip6_route_info_create(). When nexthops can not be used with source
+routing, the function forgets to decrease the refcnt increased by
+nexthop_get(), causing a refcnt leak.
+
+Fix this issue by pulling up the error source routing handling when
+nexthops can not be used with source routing.
+
+Fixes: f88d8ea67fbd ("ipv6: Plumb support for nexthop object in a fib6_info")
+Signed-off-by: Xiyu Yang <xiyuyang19@fudan.edu.cn>
+Signed-off-by: Xin Tan <tanxin.ctf@gmail.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -3686,14 +3686,14 @@ static struct fib6_info *ip6_route_info_
+       rt->fib6_src.plen = cfg->fc_src_len;
+ #endif
+       if (nh) {
+-              if (!nexthop_get(nh)) {
+-                      NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+-                      goto out;
+-              }
+               if (rt->fib6_src.plen) {
+                       NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing");
+                       goto out;
+               }
++              if (!nexthop_get(nh)) {
++                      NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
++                      goto out;
++              }
+               rt->nh = nh;
+               fib6_nh = nexthop_fib6_nh(rt->nh);
+       } else {
diff --git a/queue-5.4/net-ethernet-mtk_eth_soc-fix-mtu-warnings.patch b/queue-5.4/net-ethernet-mtk_eth_soc-fix-mtu-warnings.patch
new file mode 100644 (file)
index 0000000..3d61f82
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Landen Chao <landen.chao@mediatek.com>
+Date: Wed, 29 Jul 2020 10:15:17 +0200
+Subject: net: ethernet: mtk_eth_soc: fix MTU warnings
+
+From: Landen Chao <landen.chao@mediatek.com>
+
+[ Upstream commit 555a893303872e044fb86f0a5834ce78d41ad2e2 ]
+
+in recent kernel versions there are warnings about incorrect MTU size
+like these:
+
+eth0: mtu greater than device maximum
+mtk_soc_eth 1b100000.ethernet eth0: error -22 setting MTU to include DSA overhead
+
+Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports")
+Fixes: 72579e14a1d3 ("net: dsa: don't fail to probe if we couldn't set the MTU")
+Fixes: 7a4c53bee332 ("net: report invalid mtu value via netlink extack")
+Signed-off-by: Landen Chao <landen.chao@mediatek.com>
+Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -2878,6 +2878,8 @@ static int mtk_add_mac(struct mtk_eth *e
+       eth->netdev[id]->irq = eth->irq[0];
+       eth->netdev[id]->dev.of_node = np;
++      eth->netdev[id]->max_mtu = MTK_MAX_RX_LENGTH - MTK_RX_ETH_HLEN;
++
+       return 0;
+ free_netdev:
diff --git a/queue-5.4/net-gre-recompute-gre-csum-for-sctp-over-gre-tunnels.patch b/queue-5.4/net-gre-recompute-gre-csum-for-sctp-over-gre-tunnels.patch
new file mode 100644 (file)
index 0000000..19bdcfe
--- /dev/null
@@ -0,0 +1,69 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Fri, 31 Jul 2020 20:12:05 +0200
+Subject: net: gre: recompute gre csum for sctp over gre tunnels
+
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+
+[ Upstream commit 622e32b7d4a6492cf5c1f759ef833f817418f7b3 ]
+
+The GRE tunnel can be used to transport traffic that does not rely on a
+Internet checksum (e.g. SCTP). The issue can be triggered creating a GRE
+or GRETAP tunnel and transmitting SCTP traffic ontop of it where CRC
+offload has been disabled. In order to fix the issue we need to
+recompute the GRE csum in gre_gso_segment() not relying on the inner
+checksum.
+The issue is still present when we have the CRC offload enabled.
+In this case we need to disable the CRC offload if we require GRE
+checksum since otherwise skb_checksum() will report a wrong value.
+
+Fixes: 90017accff61 ("sctp: Add GSO support")
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/gre_offload.c |   13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/gre_offload.c
++++ b/net/ipv4/gre_offload.c
+@@ -15,12 +15,12 @@ static struct sk_buff *gre_gso_segment(s
+                                      netdev_features_t features)
+ {
+       int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb);
++      bool need_csum, need_recompute_csum, gso_partial;
+       struct sk_buff *segs = ERR_PTR(-EINVAL);
+       u16 mac_offset = skb->mac_header;
+       __be16 protocol = skb->protocol;
+       u16 mac_len = skb->mac_len;
+       int gre_offset, outer_hlen;
+-      bool need_csum, gso_partial;
+       if (!skb->encapsulation)
+               goto out;
+@@ -41,6 +41,7 @@ static struct sk_buff *gre_gso_segment(s
+       skb->protocol = skb->inner_protocol;
+       need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM);
++      need_recompute_csum = skb->csum_not_inet;
+       skb->encap_hdr_csum = need_csum;
+       features &= skb->dev->hw_enc_features;
+@@ -98,7 +99,15 @@ static struct sk_buff *gre_gso_segment(s
+               }
+               *(pcsum + 1) = 0;
+-              *pcsum = gso_make_checksum(skb, 0);
++              if (need_recompute_csum && !skb_is_gso(skb)) {
++                      __wsum csum;
++
++                      csum = skb_checksum(skb, gre_offset,
++                                          skb->len - gre_offset, 0);
++                      *pcsum = csum_fold(csum);
++              } else {
++                      *pcsum = gso_make_checksum(skb, 0);
++              }
+       } while ((skb = skb->next));
+ out:
+       return segs;
diff --git a/queue-5.4/net-lan78xx-replace-bogus-endpoint-lookup.patch b/queue-5.4/net-lan78xx-replace-bogus-endpoint-lookup.patch
new file mode 100644 (file)
index 0000000..179d92b
--- /dev/null
@@ -0,0 +1,189 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Johan Hovold <johan@kernel.org>
+Date: Tue, 28 Jul 2020 14:10:31 +0200
+Subject: net: lan78xx: replace bogus endpoint lookup
+
+From: Johan Hovold <johan@kernel.org>
+
+[ Upstream commit ea060b352654a8de1e070140d25fe1b7e4d50310 ]
+
+Drop the bogus endpoint-lookup helper which could end up accepting
+interfaces based on endpoints belonging to unrelated altsettings.
+
+Note that the returned bulk pipes and interrupt endpoint descriptor
+were never actually used. Instead the bulk-endpoint numbers are
+hardcoded to 1 and 2 (matching the specification), while the interrupt-
+endpoint descriptor was assumed to be the third descriptor created by
+USB core.
+
+Try to bring some order to this by dropping the bogus lookup helper and
+adding the missing endpoint sanity checks while keeping the interrupt-
+descriptor assumption for now.
+
+Signed-off-by: Johan Hovold <johan@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/lan78xx.c |  117 +++++++++++-----------------------------------
+ 1 file changed, 30 insertions(+), 87 deletions(-)
+
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -377,10 +377,6 @@ struct lan78xx_net {
+       struct tasklet_struct   bh;
+       struct delayed_work     wq;
+-      struct usb_host_endpoint *ep_blkin;
+-      struct usb_host_endpoint *ep_blkout;
+-      struct usb_host_endpoint *ep_intr;
+-
+       int                     msg_enable;
+       struct urb              *urb_intr;
+@@ -2868,78 +2864,12 @@ lan78xx_start_xmit(struct sk_buff *skb,
+       return NETDEV_TX_OK;
+ }
+-static int
+-lan78xx_get_endpoints(struct lan78xx_net *dev, struct usb_interface *intf)
+-{
+-      int tmp;
+-      struct usb_host_interface *alt = NULL;
+-      struct usb_host_endpoint *in = NULL, *out = NULL;
+-      struct usb_host_endpoint *status = NULL;
+-
+-      for (tmp = 0; tmp < intf->num_altsetting; tmp++) {
+-              unsigned ep;
+-
+-              in = NULL;
+-              out = NULL;
+-              status = NULL;
+-              alt = intf->altsetting + tmp;
+-
+-              for (ep = 0; ep < alt->desc.bNumEndpoints; ep++) {
+-                      struct usb_host_endpoint *e;
+-                      int intr = 0;
+-
+-                      e = alt->endpoint + ep;
+-                      switch (e->desc.bmAttributes) {
+-                      case USB_ENDPOINT_XFER_INT:
+-                              if (!usb_endpoint_dir_in(&e->desc))
+-                                      continue;
+-                              intr = 1;
+-                              /* FALLTHROUGH */
+-                      case USB_ENDPOINT_XFER_BULK:
+-                              break;
+-                      default:
+-                              continue;
+-                      }
+-                      if (usb_endpoint_dir_in(&e->desc)) {
+-                              if (!intr && !in)
+-                                      in = e;
+-                              else if (intr && !status)
+-                                      status = e;
+-                      } else {
+-                              if (!out)
+-                                      out = e;
+-                      }
+-              }
+-              if (in && out)
+-                      break;
+-      }
+-      if (!alt || !in || !out)
+-              return -EINVAL;
+-
+-      dev->pipe_in = usb_rcvbulkpipe(dev->udev,
+-                                     in->desc.bEndpointAddress &
+-                                     USB_ENDPOINT_NUMBER_MASK);
+-      dev->pipe_out = usb_sndbulkpipe(dev->udev,
+-                                      out->desc.bEndpointAddress &
+-                                      USB_ENDPOINT_NUMBER_MASK);
+-      dev->ep_intr = status;
+-
+-      return 0;
+-}
+-
+ static int lan78xx_bind(struct lan78xx_net *dev, struct usb_interface *intf)
+ {
+       struct lan78xx_priv *pdata = NULL;
+       int ret;
+       int i;
+-      ret = lan78xx_get_endpoints(dev, intf);
+-      if (ret) {
+-              netdev_warn(dev->net, "lan78xx_get_endpoints failed: %d\n",
+-                          ret);
+-              return ret;
+-      }
+-
+       dev->data[0] = (unsigned long)kzalloc(sizeof(*pdata), GFP_KERNEL);
+       pdata = (struct lan78xx_priv *)(dev->data[0]);
+@@ -3708,6 +3638,7 @@ static void lan78xx_stat_monitor(struct
+ static int lan78xx_probe(struct usb_interface *intf,
+                        const struct usb_device_id *id)
+ {
++      struct usb_host_endpoint *ep_blkin, *ep_blkout, *ep_intr;
+       struct lan78xx_net *dev;
+       struct net_device *netdev;
+       struct usb_device *udev;
+@@ -3756,6 +3687,34 @@ static int lan78xx_probe(struct usb_inte
+       mutex_init(&dev->stats.access_lock);
++      if (intf->cur_altsetting->desc.bNumEndpoints < 3) {
++              ret = -ENODEV;
++              goto out2;
++      }
++
++      dev->pipe_in = usb_rcvbulkpipe(udev, BULK_IN_PIPE);
++      ep_blkin = usb_pipe_endpoint(udev, dev->pipe_in);
++      if (!ep_blkin || !usb_endpoint_is_bulk_in(&ep_blkin->desc)) {
++              ret = -ENODEV;
++              goto out2;
++      }
++
++      dev->pipe_out = usb_sndbulkpipe(udev, BULK_OUT_PIPE);
++      ep_blkout = usb_pipe_endpoint(udev, dev->pipe_out);
++      if (!ep_blkout || !usb_endpoint_is_bulk_out(&ep_blkout->desc)) {
++              ret = -ENODEV;
++              goto out2;
++      }
++
++      ep_intr = &intf->cur_altsetting->endpoint[2];
++      if (!usb_endpoint_is_int_in(&ep_intr->desc)) {
++              ret = -ENODEV;
++              goto out2;
++      }
++
++      dev->pipe_intr = usb_rcvintpipe(dev->udev,
++                                      usb_endpoint_num(&ep_intr->desc));
++
+       ret = lan78xx_bind(dev, intf);
+       if (ret < 0)
+               goto out2;
+@@ -3767,23 +3726,7 @@ static int lan78xx_probe(struct usb_inte
+       netdev->max_mtu = MAX_SINGLE_PACKET_SIZE;
+       netif_set_gso_max_size(netdev, MAX_SINGLE_PACKET_SIZE - MAX_HEADER);
+-      if (intf->cur_altsetting->desc.bNumEndpoints < 3) {
+-              ret = -ENODEV;
+-              goto out3;
+-      }
+-
+-      dev->ep_blkin = (intf->cur_altsetting)->endpoint + 0;
+-      dev->ep_blkout = (intf->cur_altsetting)->endpoint + 1;
+-      dev->ep_intr = (intf->cur_altsetting)->endpoint + 2;
+-
+-      dev->pipe_in = usb_rcvbulkpipe(udev, BULK_IN_PIPE);
+-      dev->pipe_out = usb_sndbulkpipe(udev, BULK_OUT_PIPE);
+-
+-      dev->pipe_intr = usb_rcvintpipe(dev->udev,
+-                                      dev->ep_intr->desc.bEndpointAddress &
+-                                      USB_ENDPOINT_NUMBER_MASK);
+-      period = dev->ep_intr->desc.bInterval;
+-
++      period = ep_intr->desc.bInterval;
+       maxp = usb_maxpacket(dev->udev, dev->pipe_intr, 0);
+       buf = kmalloc(maxp, GFP_KERNEL);
+       if (buf) {
diff --git a/queue-5.4/net-thunderx-use-spin_lock_bh-in-nicvf_set_rx_mode_task.patch b/queue-5.4/net-thunderx-use-spin_lock_bh-in-nicvf_set_rx_mode_task.patch
new file mode 100644 (file)
index 0000000..05fd021
--- /dev/null
@@ -0,0 +1,60 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Xin Long <lucien.xin@gmail.com>
+Date: Tue, 4 Aug 2020 15:02:30 +0800
+Subject: net: thunderx: use spin_lock_bh in nicvf_set_rx_mode_task()
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit bab9693a9a8c6dd19f670408ec1e78e12a320682 ]
+
+A dead lock was triggered on thunderx driver:
+
+        CPU0                    CPU1
+        ----                    ----
+   [01] lock(&(&nic->rx_mode_wq_lock)->rlock);
+                           [11] lock(&(&mc->mca_lock)->rlock);
+                           [12] lock(&(&nic->rx_mode_wq_lock)->rlock);
+   [02] <Interrupt> lock(&(&mc->mca_lock)->rlock);
+
+The path for each is:
+
+  [01] worker_thread() -> process_one_work() -> nicvf_set_rx_mode_task()
+  [02] mld_ifc_timer_expire()
+  [11] ipv6_add_dev() -> ipv6_dev_mc_inc() -> igmp6_group_added() ->
+  [12] dev_mc_add() -> __dev_set_rx_mode() -> nicvf_set_rx_mode()
+
+To fix it, it needs to disable bh on [1], so that the timer on [2]
+wouldn't be triggered until rx_mode_wq_lock is released. So change
+to use spin_lock_bh() instead of spin_lock().
+
+Thanks to Paolo for helping with this.
+
+v1->v2:
+  - post to netdev.
+
+Reported-by: Rafael P. <rparrazo@redhat.com>
+Tested-by: Dean Nelson <dnelson@redhat.com>
+Fixes: 469998c861fa ("net: thunderx: prevent concurrent data re-writing by nicvf_set_rx_mode")
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/cavium/thunder/nicvf_main.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
++++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+@@ -2047,11 +2047,11 @@ static void nicvf_set_rx_mode_task(struc
+       /* Save message data locally to prevent them from
+        * being overwritten by next ndo_set_rx_mode call().
+        */
+-      spin_lock(&nic->rx_mode_wq_lock);
++      spin_lock_bh(&nic->rx_mode_wq_lock);
+       mode = vf_work->mode;
+       mc = vf_work->mc;
+       vf_work->mc = NULL;
+-      spin_unlock(&nic->rx_mode_wq_lock);
++      spin_unlock_bh(&nic->rx_mode_wq_lock);
+       __nicvf_set_rx_mode_task(mode, mc, nic);
+ }
diff --git a/queue-5.4/openvswitch-prevent-kernel-infoleak-in-ovs_ct_put_key.patch b/queue-5.4/openvswitch-prevent-kernel-infoleak-in-ovs_ct_put_key.patch
new file mode 100644 (file)
index 0000000..df821c3
--- /dev/null
@@ -0,0 +1,81 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Peilin Ye <yepeilin.cs@gmail.com>
+Date: Fri, 31 Jul 2020 00:48:38 -0400
+Subject: openvswitch: Prevent kernel-infoleak in ovs_ct_put_key()
+
+From: Peilin Ye <yepeilin.cs@gmail.com>
+
+[ Upstream commit 9aba6c5b49254d5bee927d81593ed4429e91d4ae ]
+
+ovs_ct_put_key() is potentially copying uninitialized kernel stack memory
+into socket buffers, since the compiler may leave a 3-byte hole at the end
+of `struct ovs_key_ct_tuple_ipv4` and `struct ovs_key_ct_tuple_ipv6`. Fix
+it by initializing `orig` with memset().
+
+Fixes: 9dd7f8907c37 ("openvswitch: Add original direction conntrack tuple to sw_flow_key.")
+Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Peilin Ye <yepeilin.cs@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/conntrack.c |   38 ++++++++++++++++++++------------------
+ 1 file changed, 20 insertions(+), 18 deletions(-)
+
+--- a/net/openvswitch/conntrack.c
++++ b/net/openvswitch/conntrack.c
+@@ -276,10 +276,6 @@ void ovs_ct_fill_key(const struct sk_buf
+       ovs_ct_update_key(skb, NULL, key, false, false);
+ }
+-#define IN6_ADDR_INITIALIZER(ADDR) \
+-      { (ADDR).s6_addr32[0], (ADDR).s6_addr32[1], \
+-        (ADDR).s6_addr32[2], (ADDR).s6_addr32[3] }
+-
+ int ovs_ct_put_key(const struct sw_flow_key *swkey,
+                  const struct sw_flow_key *output, struct sk_buff *skb)
+ {
+@@ -301,24 +297,30 @@ int ovs_ct_put_key(const struct sw_flow_
+       if (swkey->ct_orig_proto) {
+               if (swkey->eth.type == htons(ETH_P_IP)) {
+-                      struct ovs_key_ct_tuple_ipv4 orig = {
+-                              output->ipv4.ct_orig.src,
+-                              output->ipv4.ct_orig.dst,
+-                              output->ct.orig_tp.src,
+-                              output->ct.orig_tp.dst,
+-                              output->ct_orig_proto,
+-                      };
++                      struct ovs_key_ct_tuple_ipv4 orig;
++
++                      memset(&orig, 0, sizeof(orig));
++                      orig.ipv4_src = output->ipv4.ct_orig.src;
++                      orig.ipv4_dst = output->ipv4.ct_orig.dst;
++                      orig.src_port = output->ct.orig_tp.src;
++                      orig.dst_port = output->ct.orig_tp.dst;
++                      orig.ipv4_proto = output->ct_orig_proto;
++
+                       if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,
+                                   sizeof(orig), &orig))
+                               return -EMSGSIZE;
+               } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+-                      struct ovs_key_ct_tuple_ipv6 orig = {
+-                              IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.src),
+-                              IN6_ADDR_INITIALIZER(output->ipv6.ct_orig.dst),
+-                              output->ct.orig_tp.src,
+-                              output->ct.orig_tp.dst,
+-                              output->ct_orig_proto,
+-                      };
++                      struct ovs_key_ct_tuple_ipv6 orig;
++
++                      memset(&orig, 0, sizeof(orig));
++                      memcpy(orig.ipv6_src, output->ipv6.ct_orig.src.s6_addr32,
++                             sizeof(orig.ipv6_src));
++                      memcpy(orig.ipv6_dst, output->ipv6.ct_orig.dst.s6_addr32,
++                             sizeof(orig.ipv6_dst));
++                      orig.src_port = output->ct.orig_tp.src;
++                      orig.dst_port = output->ct.orig_tp.dst;
++                      orig.ipv6_proto = output->ct_orig_proto;
++
+                       if (nla_put(skb, OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,
+                                   sizeof(orig), &orig))
+                               return -EMSGSIZE;
diff --git a/queue-5.4/pci-tegra-revert-tegra124-raw_violation_fixup.patch b/queue-5.4/pci-tegra-revert-tegra124-raw_violation_fixup.patch
new file mode 100644 (file)
index 0000000..e061980
--- /dev/null
@@ -0,0 +1,159 @@
+From e7b856dfcec6d3bf028adee8c65342d7035914a1 Mon Sep 17 00:00:00 2001
+From: Nicolas Chauvet <kwizart@gmail.com>
+Date: Sat, 18 Jul 2020 12:07:10 +0200
+Subject: PCI: tegra: Revert tegra124 raw_violation_fixup
+
+From: Nicolas Chauvet <kwizart@gmail.com>
+
+commit e7b856dfcec6d3bf028adee8c65342d7035914a1 upstream.
+
+As reported in https://bugzilla.kernel.org/206217 , raw_violation_fixup
+is causing more harm than good in some common use-cases.
+
+This patch is a partial revert of commit:
+
+191cd6fb5d2c ("PCI: tegra: Add SW fixup for RAW violations")
+
+and fixes the following regression since then.
+
+* Description:
+
+When both the NIC and MMC are used one can see the following message:
+
+  NETDEV WATCHDOG: enp1s0 (r8169): transmit queue 0 timed out
+
+and
+
+  pcieport 0000:00:02.0: AER: Uncorrected (Non-Fatal) error received: 0000:01:00.0
+  r8169 0000:01:00.0: AER: PCIe Bus Error: severity=Uncorrected (Non-Fatal), type=Transaction Layer, (Requester ID)
+  r8169 0000:01:00.0: AER:   device [10ec:8168] error status/mask=00004000/00400000
+  r8169 0000:01:00.0: AER:    [14] CmpltTO                (First)
+  r8169 0000:01:00.0: AER: can't recover (no error_detected callback)
+  pcieport 0000:00:02.0: AER: device recovery failed
+
+After that, the ethernet NIC is not functional anymore even after
+reloading the r8169 module. After a reboot, this is reproducible by
+copying a large file over the NIC to the MMC.
+
+For some reason this is not reproducible when files are copied to a tmpfs.
+
+* Little background on the fixup, by Manikanta Maddireddy:
+  "In the internal testing with dGPU on Tegra124, CmplTO is reported by
+dGPU. This happened because FIFO queue in AFI(AXI to PCIe) module
+get full by upstream posted writes. Back to back upstream writes
+interleaved with infrequent reads, triggers RAW violation and CmpltTO.
+This is fixed by reducing the posted write credits and by changing
+updateFC timer frequency. These settings are fixed after stress test.
+
+In the current case, RTL NIC is also reporting CmplTO. These settings
+seems to be aggravating the issue instead of fixing it."
+
+Link: https://lore.kernel.org/r/20200718100710.15398-1-kwizart@gmail.com
+Fixes: 191cd6fb5d2c ("PCI: tegra: Add SW fixup for RAW violations")
+Signed-off-by: Nicolas Chauvet <kwizart@gmail.com>
+Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+Reviewed-by: Manikanta Maddireddy <mmaddireddy@nvidia.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ drivers/pci/controller/pci-tegra.c |   32 --------------------------------
+ 1 file changed, 32 deletions(-)
+
+--- a/drivers/pci/controller/pci-tegra.c
++++ b/drivers/pci/controller/pci-tegra.c
+@@ -181,13 +181,6 @@
+ #define AFI_PEXBIAS_CTRL_0            0x168
+-#define RP_PRIV_XP_DL         0x00000494
+-#define  RP_PRIV_XP_DL_GEN2_UPD_FC_TSHOLD     (0x1ff << 1)
+-
+-#define RP_RX_HDR_LIMIT               0x00000e00
+-#define  RP_RX_HDR_LIMIT_PW_MASK      (0xff << 8)
+-#define  RP_RX_HDR_LIMIT_PW           (0x0e << 8)
+-
+ #define RP_ECTL_2_R1  0x00000e84
+ #define  RP_ECTL_2_R1_RX_CTLE_1C_MASK         0xffff
+@@ -323,7 +316,6 @@ struct tegra_pcie_soc {
+       bool program_uphy;
+       bool update_clamp_threshold;
+       bool program_deskew_time;
+-      bool raw_violation_fixup;
+       bool update_fc_timer;
+       bool has_cache_bars;
+       struct {
+@@ -669,23 +661,6 @@ static void tegra_pcie_apply_sw_fixup(st
+               writel(value, port->base + RP_VEND_CTL0);
+       }
+-      /* Fixup for read after write violation. */
+-      if (soc->raw_violation_fixup) {
+-              value = readl(port->base + RP_RX_HDR_LIMIT);
+-              value &= ~RP_RX_HDR_LIMIT_PW_MASK;
+-              value |= RP_RX_HDR_LIMIT_PW;
+-              writel(value, port->base + RP_RX_HDR_LIMIT);
+-
+-              value = readl(port->base + RP_PRIV_XP_DL);
+-              value |= RP_PRIV_XP_DL_GEN2_UPD_FC_TSHOLD;
+-              writel(value, port->base + RP_PRIV_XP_DL);
+-
+-              value = readl(port->base + RP_VEND_XP);
+-              value &= ~RP_VEND_XP_UPDATE_FC_THRESHOLD_MASK;
+-              value |= soc->update_fc_threshold;
+-              writel(value, port->base + RP_VEND_XP);
+-      }
+-
+       if (soc->update_fc_timer) {
+               value = readl(port->base + RP_VEND_XP);
+               value &= ~RP_VEND_XP_UPDATE_FC_THRESHOLD_MASK;
+@@ -2511,7 +2486,6 @@ static const struct tegra_pcie_soc tegra
+       .program_uphy = true,
+       .update_clamp_threshold = false,
+       .program_deskew_time = false,
+-      .raw_violation_fixup = false,
+       .update_fc_timer = false,
+       .has_cache_bars = true,
+       .ectl.enable = false,
+@@ -2541,7 +2515,6 @@ static const struct tegra_pcie_soc tegra
+       .program_uphy = true,
+       .update_clamp_threshold = false,
+       .program_deskew_time = false,
+-      .raw_violation_fixup = false,
+       .update_fc_timer = false,
+       .has_cache_bars = false,
+       .ectl.enable = false,
+@@ -2554,8 +2527,6 @@ static const struct tegra_pcie_soc tegra
+       .pads_pll_ctl = PADS_PLL_CTL_TEGRA30,
+       .tx_ref_sel = PADS_PLL_CTL_TXCLKREF_BUF_EN,
+       .pads_refclk_cfg0 = 0x44ac44ac,
+-      /* FC threshold is bit[25:18] */
+-      .update_fc_threshold = 0x03fc0000,
+       .has_pex_clkreq_en = true,
+       .has_pex_bias_ctrl = true,
+       .has_intr_prsnt_sense = true,
+@@ -2565,7 +2536,6 @@ static const struct tegra_pcie_soc tegra
+       .program_uphy = true,
+       .update_clamp_threshold = true,
+       .program_deskew_time = false,
+-      .raw_violation_fixup = true,
+       .update_fc_timer = false,
+       .has_cache_bars = false,
+       .ectl.enable = false,
+@@ -2589,7 +2559,6 @@ static const struct tegra_pcie_soc tegra
+       .program_uphy = true,
+       .update_clamp_threshold = true,
+       .program_deskew_time = true,
+-      .raw_violation_fixup = false,
+       .update_fc_timer = true,
+       .has_cache_bars = false,
+       .ectl = {
+@@ -2631,7 +2600,6 @@ static const struct tegra_pcie_soc tegra
+       .program_uphy = false,
+       .update_clamp_threshold = false,
+       .program_deskew_time = false,
+-      .raw_violation_fixup = false,
+       .update_fc_timer = false,
+       .has_cache_bars = false,
+       .ectl.enable = false,
diff --git a/queue-5.4/revert-powerpc-kasan-fix-shadow-pages-allocation-failure.patch b/queue-5.4/revert-powerpc-kasan-fix-shadow-pages-allocation-failure.patch
new file mode 100644 (file)
index 0000000..3f66d16
--- /dev/null
@@ -0,0 +1,79 @@
+From b506923ee44ae87fc9f4de16b53feb313623e146 Mon Sep 17 00:00:00 2001
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+Date: Thu, 2 Jul 2020 11:52:02 +0000
+Subject: Revert "powerpc/kasan: Fix shadow pages allocation failure"
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+commit b506923ee44ae87fc9f4de16b53feb313623e146 upstream.
+
+This reverts commit d2a91cef9bbdeb87b7449fdab1a6be6000930210.
+
+This commit moved too much work in kasan_init(). The allocation
+of shadow pages has to be moved for the reason explained in that
+patch, but the allocation of page tables still need to be done
+before switching to the final hash table.
+
+First revert the incorrect commit, following patch redoes it
+properly.
+
+Fixes: d2a91cef9bbd ("powerpc/kasan: Fix shadow pages allocation failure")
+Cc: stable@vger.kernel.org
+Reported-by: Erhard F. <erhard_f@mailbox.org>
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=208181
+Link: https://lore.kernel.org/r/3667deb0911affbf999b99f87c31c77d5e870cd2.1593690707.git.christophe.leroy@csgroup.eu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ arch/powerpc/include/asm/kasan.h      |    2 ++
+ arch/powerpc/mm/init_32.c             |    2 ++
+ arch/powerpc/mm/kasan/kasan_init_32.c |    4 +---
+ 3 files changed, 5 insertions(+), 3 deletions(-)
+
+--- a/arch/powerpc/include/asm/kasan.h
++++ b/arch/powerpc/include/asm/kasan.h
+@@ -27,9 +27,11 @@
+ #ifdef CONFIG_KASAN
+ void kasan_early_init(void);
++void kasan_mmu_init(void);
+ void kasan_init(void);
+ #else
+ static inline void kasan_init(void) { }
++static inline void kasan_mmu_init(void) { }
+ #endif
+ #endif /* __ASSEMBLY */
+--- a/arch/powerpc/mm/init_32.c
++++ b/arch/powerpc/mm/init_32.c
+@@ -175,6 +175,8 @@ void __init MMU_init(void)
+       btext_unmap();
+ #endif
++      kasan_mmu_init();
++
+       setup_kup();
+       /* Shortly after that, the entire linear mapping will be available */
+--- a/arch/powerpc/mm/kasan/kasan_init_32.c
++++ b/arch/powerpc/mm/kasan/kasan_init_32.c
+@@ -129,7 +129,7 @@ static void __init kasan_remap_early_sha
+       flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
+ }
+-static void __init kasan_mmu_init(void)
++void __init kasan_mmu_init(void)
+ {
+       int ret;
+       struct memblock_region *reg;
+@@ -156,8 +156,6 @@ static void __init kasan_mmu_init(void)
+ void __init kasan_init(void)
+ {
+-      kasan_mmu_init();
+-
+       kasan_remap_early_shadow_ro();
+       clear_page(kasan_early_shadow_page);
diff --git a/queue-5.4/revert-vxlan-fix-tos-value-before-xmit.patch b/queue-5.4/revert-vxlan-fix-tos-value-before-xmit.patch
new file mode 100644 (file)
index 0000000..3881188
--- /dev/null
@@ -0,0 +1,65 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Hangbin Liu <liuhangbin@gmail.com>
+Date: Wed, 5 Aug 2020 10:41:31 +0800
+Subject: Revert "vxlan: fix tos value before xmit"
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit a0dced17ad9dc08b1b25e0065b54c97a318e6e8b ]
+
+This reverts commit 71130f29979c7c7956b040673e6b9d5643003176.
+
+In commit 71130f29979c ("vxlan: fix tos value before xmit") we want to
+make sure the tos value are filtered by RT_TOS() based on RFC1349.
+
+       0     1     2     3     4     5     6     7
+    +-----+-----+-----+-----+-----+-----+-----+-----+
+    |   PRECEDENCE    |          TOS          | MBZ |
+    +-----+-----+-----+-----+-----+-----+-----+-----+
+
+But RFC1349 has been obsoleted by RFC2474. The new DSCP field defined like
+
+       0     1     2     3     4     5     6     7
+    +-----+-----+-----+-----+-----+-----+-----+-----+
+    |          DS FIELD, DSCP           | ECN FIELD |
+    +-----+-----+-----+-----+-----+-----+-----+-----+
+
+So with
+
+IPTOS_TOS_MASK          0x1E
+RT_TOS(tos)            ((tos)&IPTOS_TOS_MASK)
+
+the first 3 bits DSCP info will get lost.
+
+To take all the DSCP info in xmit, we should revert the patch and just push
+all tos bits to ip_tunnel_ecn_encap(), which will handling ECN field later.
+
+Fixes: 71130f29979c ("vxlan: fix tos value before xmit")
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Acked-by: Guillaume Nault <gnault@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -2550,7 +2550,7 @@ static void vxlan_xmit_one(struct sk_buf
+               ndst = &rt->dst;
+               skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
+-              tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
++              tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
+               ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
+               err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
+                                     vni, md, flags, udp_sum);
+@@ -2590,7 +2590,7 @@ static void vxlan_xmit_one(struct sk_buf
+               skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM);
+-              tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
++              tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
+               ttl = ttl ? : ip6_dst_hoplimit(ndst);
+               skb_scrub_packet(skb, xnet);
+               err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
diff --git a/queue-5.4/rxrpc-fix-race-between-recvmsg-and-sendmsg-on-immediate-call-failure.patch b/queue-5.4/rxrpc-fix-race-between-recvmsg-and-sendmsg-on-immediate-call-failure.patch
new file mode 100644 (file)
index 0000000..00dc068
--- /dev/null
@@ -0,0 +1,166 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: David Howells <dhowells@redhat.com>
+Date: Wed, 29 Jul 2020 00:03:56 +0100
+Subject: rxrpc: Fix race between recvmsg and sendmsg on immediate call failure
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 65550098c1c4db528400c73acf3e46bfa78d9264 ]
+
+There's a race between rxrpc_sendmsg setting up a call, but then failing to
+send anything on it due to an error, and recvmsg() seeing the call
+completion occur and trying to return the state to the user.
+
+An assertion fails in rxrpc_recvmsg() because the call has already been
+released from the socket and is about to be released again as recvmsg deals
+with it.  (The recvmsg_q queue on the socket holds a ref, so there's no
+problem with use-after-free.)
+
+We also have to be careful not to end up reporting an error twice, in such
+a way that both returns indicate to userspace that the user ID supplied
+with the call is no longer in use - which could cause the client to
+malfunction if it recycles the user ID fast enough.
+
+Fix this by the following means:
+
+ (1) When sendmsg() creates a call after the point that the call has been
+     successfully added to the socket, don't return any errors through
+     sendmsg(), but rather complete the call and let recvmsg() retrieve
+     them.  Make sendmsg() return 0 at this point.  Further calls to
+     sendmsg() for that call will fail with ESHUTDOWN.
+
+     Note that at this point, we haven't send any packets yet, so the
+     server doesn't yet know about the call.
+
+ (2) If sendmsg() returns an error when it was expected to create a new
+     call, it means that the user ID wasn't used.
+
+ (3) Mark the call disconnected before marking it completed to prevent an
+     oops in rxrpc_release_call().
+
+ (4) recvmsg() will then retrieve the error and set MSG_EOR to indicate
+     that the user ID is no longer known by the kernel.
+
+An oops like the following is produced:
+
+       kernel BUG at net/rxrpc/recvmsg.c:605!
+       ...
+       RIP: 0010:rxrpc_recvmsg+0x256/0x5ae
+       ...
+       Call Trace:
+        ? __init_waitqueue_head+0x2f/0x2f
+        ____sys_recvmsg+0x8a/0x148
+        ? import_iovec+0x69/0x9c
+        ? copy_msghdr_from_user+0x5c/0x86
+        ___sys_recvmsg+0x72/0xaa
+        ? __fget_files+0x22/0x57
+        ? __fget_light+0x46/0x51
+        ? fdget+0x9/0x1b
+        do_recvmmsg+0x15e/0x232
+        ? _raw_spin_unlock+0xa/0xb
+        ? vtime_delta+0xf/0x25
+        __x64_sys_recvmmsg+0x2c/0x2f
+        do_syscall_64+0x4c/0x78
+        entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Fixes: 357f5ef64628 ("rxrpc: Call rxrpc_release_call() on error in rxrpc_new_client_call()")
+Reported-by: syzbot+b54969381df354936d96@syzkaller.appspotmail.com
+Signed-off-by: David Howells <dhowells@redhat.com>
+Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rxrpc/call_object.c |   27 +++++++++++++++++++--------
+ net/rxrpc/conn_object.c |    8 +++++---
+ net/rxrpc/recvmsg.c     |    2 +-
+ net/rxrpc/sendmsg.c     |    3 +++
+ 4 files changed, 28 insertions(+), 12 deletions(-)
+
+--- a/net/rxrpc/call_object.c
++++ b/net/rxrpc/call_object.c
+@@ -288,7 +288,7 @@ struct rxrpc_call *rxrpc_new_client_call
+        */
+       ret = rxrpc_connect_call(rx, call, cp, srx, gfp);
+       if (ret < 0)
+-              goto error;
++              goto error_attached_to_socket;
+       trace_rxrpc_call(call->debug_id, rxrpc_call_connected,
+                        atomic_read(&call->usage), here, NULL);
+@@ -308,18 +308,29 @@ struct rxrpc_call *rxrpc_new_client_call
+ error_dup_user_ID:
+       write_unlock(&rx->call_lock);
+       release_sock(&rx->sk);
+-      ret = -EEXIST;
+-
+-error:
+       __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+-                                  RX_CALL_DEAD, ret);
++                                  RX_CALL_DEAD, -EEXIST);
+       trace_rxrpc_call(call->debug_id, rxrpc_call_error,
+-                       atomic_read(&call->usage), here, ERR_PTR(ret));
++                       atomic_read(&call->usage), here, ERR_PTR(-EEXIST));
+       rxrpc_release_call(rx, call);
+       mutex_unlock(&call->user_mutex);
+       rxrpc_put_call(call, rxrpc_call_put);
+-      _leave(" = %d", ret);
+-      return ERR_PTR(ret);
++      _leave(" = -EEXIST");
++      return ERR_PTR(-EEXIST);
++
++      /* We got an error, but the call is attached to the socket and is in
++       * need of release.  However, we might now race with recvmsg() when
++       * completing the call queues it.  Return 0 from sys_sendmsg() and
++       * leave the error to recvmsg() to deal with.
++       */
++error_attached_to_socket:
++      trace_rxrpc_call(call->debug_id, rxrpc_call_error,
++                       atomic_read(&call->usage), here, ERR_PTR(ret));
++      set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
++      __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
++                                  RX_CALL_DEAD, ret);
++      _leave(" = c=%08x [err]", call->debug_id);
++      return call;
+ }
+ /*
+--- a/net/rxrpc/conn_object.c
++++ b/net/rxrpc/conn_object.c
+@@ -212,9 +212,11 @@ void rxrpc_disconnect_call(struct rxrpc_
+       call->peer->cong_cwnd = call->cong_cwnd;
+-      spin_lock_bh(&conn->params.peer->lock);
+-      hlist_del_rcu(&call->error_link);
+-      spin_unlock_bh(&conn->params.peer->lock);
++      if (!hlist_unhashed(&call->error_link)) {
++              spin_lock_bh(&call->peer->lock);
++              hlist_del_rcu(&call->error_link);
++              spin_unlock_bh(&call->peer->lock);
++      }
+       if (rxrpc_is_client_call(call))
+               return rxrpc_disconnect_client_call(call);
+--- a/net/rxrpc/recvmsg.c
++++ b/net/rxrpc/recvmsg.c
+@@ -541,7 +541,7 @@ try_again:
+                       goto error_unlock_call;
+       }
+-      if (msg->msg_name) {
++      if (msg->msg_name && call->peer) {
+               struct sockaddr_rxrpc *srx = msg->msg_name;
+               size_t len = sizeof(call->peer->srx);
+--- a/net/rxrpc/sendmsg.c
++++ b/net/rxrpc/sendmsg.c
+@@ -683,6 +683,9 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *
+               if (IS_ERR(call))
+                       return PTR_ERR(call);
+               /* ... and we have the call lock. */
++              ret = 0;
++              if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE)
++                      goto out_put_unlock;
+       } else {
+               switch (READ_ONCE(call->state)) {
+               case RXRPC_CALL_UNINITIALISED:
diff --git a/queue-5.4/selftests-net-relax-cpu-affinity-requirement-in-msg_zerocopy-test.patch b/queue-5.4/selftests-net-relax-cpu-affinity-requirement-in-msg_zerocopy-test.patch
new file mode 100644 (file)
index 0000000..0795205
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Willem de Bruijn <willemb@google.com>
+Date: Wed, 5 Aug 2020 04:40:45 -0400
+Subject: selftests/net: relax cpu affinity requirement in msg_zerocopy test
+
+From: Willem de Bruijn <willemb@google.com>
+
+[ Upstream commit 16f6458f2478b55e2b628797bc81a4455045c74e ]
+
+The msg_zerocopy test pins the sender and receiver threads to separate
+cores to reduce variance between runs.
+
+But it hardcodes the cores and skips core 0, so it fails on machines
+with the selected cores offline, or simply fewer cores.
+
+The test mainly gives code coverage in automated runs. The throughput
+of zerocopy ('-z') and non-zerocopy runs is logged for manual
+inspection.
+
+Continue even when sched_setaffinity fails. Just log to warn anyone
+interpreting the data.
+
+Fixes: 07b65c5b31ce ("test: add msg_zerocopy test")
+Reported-by: Colin Ian King <colin.king@canonical.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Acked-by: Colin Ian King <colin.king@canonical.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/net/msg_zerocopy.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- a/tools/testing/selftests/net/msg_zerocopy.c
++++ b/tools/testing/selftests/net/msg_zerocopy.c
+@@ -125,9 +125,8 @@ static int do_setcpu(int cpu)
+       CPU_ZERO(&mask);
+       CPU_SET(cpu, &mask);
+       if (sched_setaffinity(0, sizeof(mask), &mask))
+-              error(1, 0, "setaffinity %d", cpu);
+-
+-      if (cfg_verbose)
++              fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
++      else if (cfg_verbose)
+               fprintf(stderr, "cpu: %u\n", cpu);
+       return 0;
index cca878780e39a031ad3134919df40db073344250..1e5174384a51f04d04747742d285ad3bd600d24a 100644 (file)
@@ -44,3 +44,22 @@ igb-reinit_locked-should-be-called-with-rtnl_lock.patch
 atm-fix-atm_dev-refcnt-leaks-in-atmtcp_remove_persis.patch
 tools-lib-traceevent-fix-memory-leak-in-process_dyna.patch
 drivers-hv-vmbus-ignore-channelmsg_tl_connect_result.patch
+xattr-break-delegations-in-set-remove-xattr.patch
+revert-powerpc-kasan-fix-shadow-pages-allocation-failure.patch
+pci-tegra-revert-tegra124-raw_violation_fixup.patch
+ipv4-silence-suspicious-rcu-usage-warning.patch
+ipv6-fix-memory-leaks-on-ipv6_addrform-path.patch
+ipv6-fix-nexthop-refcnt-leak-when-creating-ipv6-route-info.patch
+net-ethernet-mtk_eth_soc-fix-mtu-warnings.patch
+rxrpc-fix-race-between-recvmsg-and-sendmsg-on-immediate-call-failure.patch
+vxlan-ensure-fdb-dump-is-performed-under-rcu.patch
+net-lan78xx-replace-bogus-endpoint-lookup.patch
+appletalk-fix-atalk_proc_init-return-path.patch
+dpaa2-eth-fix-passing-zero-to-ptr_err-warning.patch
+hv_netvsc-do-not-use-vf-device-if-link-is-down.patch
+net-gre-recompute-gre-csum-for-sctp-over-gre-tunnels.patch
+net-thunderx-use-spin_lock_bh-in-nicvf_set_rx_mode_task.patch
+openvswitch-prevent-kernel-infoleak-in-ovs_ct_put_key.patch
+revert-vxlan-fix-tos-value-before-xmit.patch
+selftests-net-relax-cpu-affinity-requirement-in-msg_zerocopy-test.patch
+tcp-apply-a-floor-of-1-for-rtt-samples-from-tcp-timestamps.patch
diff --git a/queue-5.4/tcp-apply-a-floor-of-1-for-rtt-samples-from-tcp-timestamps.patch b/queue-5.4/tcp-apply-a-floor-of-1-for-rtt-samples-from-tcp-timestamps.patch
new file mode 100644 (file)
index 0000000..8b373df
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Jianfeng Wang <jfwang@google.com>
+Date: Thu, 30 Jul 2020 23:49:16 +0000
+Subject: tcp: apply a floor of 1 for RTT samples from TCP timestamps
+
+From: Jianfeng Wang <jfwang@google.com>
+
+[ Upstream commit 730e700e2c19d87e578ff0e7d8cb1d4a02b036d2 ]
+
+For retransmitted packets, TCP needs to resort to using TCP timestamps
+for computing RTT samples. In the common case where the data and ACK
+fall in the same 1-millisecond interval, TCP senders with millisecond-
+granularity TCP timestamps compute a ca_rtt_us of 0. This ca_rtt_us
+of 0 propagates to rs->rtt_us.
+
+This value of 0 can cause performance problems for congestion control
+modules. For example, in BBR, the zero min_rtt sample can bring the
+min_rtt and BDP estimate down to 0, reduce snd_cwnd and result in a
+low throughput. It would be hard to mitigate this with filtering in
+the congestion control module, because the proper floor to apply would
+depend on the method of RTT sampling (using timestamp options or
+internally-saved transmission timestamps).
+
+This fix applies a floor of 1 for the RTT sample delta from TCP
+timestamps, so that seq_rtt_us, ca_rtt_us, and rs->rtt_us will be at
+least 1 * (USEC_PER_SEC / TCP_TS_HZ).
+
+Note that the receiver RTT computation in tcp_rcv_rtt_measure() and
+min_rtt computation in tcp_update_rtt_min() both already apply a floor
+of 1 timestamp tick, so this commit makes the code more consistent in
+avoiding this edge case of a value of 0.
+
+Signed-off-by: Jianfeng Wang <jfwang@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Kevin Yang <yyd@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2944,6 +2944,8 @@ static bool tcp_ack_update_rtt(struct so
+               u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
+               if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
++                      if (!delta)
++                              delta = 1;
+                       seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+                       ca_rtt_us = seq_rtt_us;
+               }
diff --git a/queue-5.4/vxlan-ensure-fdb-dump-is-performed-under-rcu.patch b/queue-5.4/vxlan-ensure-fdb-dump-is-performed-under-rcu.patch
new file mode 100644 (file)
index 0000000..d98a5ce
--- /dev/null
@@ -0,0 +1,96 @@
+From foo@baz Mon 10 Aug 2020 02:07:44 PM CEST
+From: Ido Schimmel <idosch@mellanox.com>
+Date: Wed, 29 Jul 2020 11:34:36 +0300
+Subject: vxlan: Ensure FDB dump is performed under RCU
+
+From: Ido Schimmel <idosch@mellanox.com>
+
+[ Upstream commit b5141915b5aec3b29a63db869229e3741ebce258 ]
+
+The commit cited below removed the RCU read-side critical section from
+rtnl_fdb_dump() which means that the ndo_fdb_dump() callback is invoked
+without RCU protection.
+
+This results in the following warning [1] in the VXLAN driver, which
+relied on the callback being invoked from an RCU read-side critical
+section.
+
+Fix this by calling rcu_read_lock() in the VXLAN driver, as already done
+in the bridge driver.
+
+[1]
+WARNING: suspicious RCU usage
+5.8.0-rc4-custom-01521-g481007553ce6 #29 Not tainted
+-----------------------------
+drivers/net/vxlan.c:1379 RCU-list traversed in non-reader section!!
+
+other info that might help us debug this:
+
+rcu_scheduler_active = 2, debug_locks = 1
+1 lock held by bridge/166:
+ #0: ffffffff85a27850 (rtnl_mutex){+.+.}-{3:3}, at: netlink_dump+0xea/0x1090
+
+stack backtrace:
+CPU: 1 PID: 166 Comm: bridge Not tainted 5.8.0-rc4-custom-01521-g481007553ce6 #29
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-2.fc32 04/01/2014
+Call Trace:
+ dump_stack+0x100/0x184
+ lockdep_rcu_suspicious+0x153/0x15d
+ vxlan_fdb_dump+0x51e/0x6d0
+ rtnl_fdb_dump+0x4dc/0xad0
+ netlink_dump+0x540/0x1090
+ __netlink_dump_start+0x695/0x950
+ rtnetlink_rcv_msg+0x802/0xbd0
+ netlink_rcv_skb+0x17a/0x480
+ rtnetlink_rcv+0x22/0x30
+ netlink_unicast+0x5ae/0x890
+ netlink_sendmsg+0x98a/0xf40
+ __sys_sendto+0x279/0x3b0
+ __x64_sys_sendto+0xe6/0x1a0
+ do_syscall_64+0x54/0xa0
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+RIP: 0033:0x7fe14fa2ade0
+Code: Bad RIP value.
+RSP: 002b:00007fff75bb5b88 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
+RAX: ffffffffffffffda RBX: 00005614b1ba0020 RCX: 00007fe14fa2ade0
+RDX: 000000000000011c RSI: 00007fff75bb5b90 RDI: 0000000000000003
+RBP: 00007fff75bb5b90 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00005614b1b89160
+R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+
+Fixes: 5e6d24358799 ("bridge: netlink dump interface at par with brctl")
+Signed-off-by: Ido Schimmel <idosch@mellanox.com>
+Reviewed-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/vxlan.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/vxlan.c
++++ b/drivers/net/vxlan.c
+@@ -1225,6 +1225,7 @@ static int vxlan_fdb_dump(struct sk_buff
+       for (h = 0; h < FDB_HASH_SIZE; ++h) {
+               struct vxlan_fdb *f;
++              rcu_read_lock();
+               hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
+                       struct vxlan_rdst *rd;
+@@ -1237,12 +1238,15 @@ static int vxlan_fdb_dump(struct sk_buff
+                                                    cb->nlh->nlmsg_seq,
+                                                    RTM_NEWNEIGH,
+                                                    NLM_F_MULTI, rd);
+-                              if (err < 0)
++                              if (err < 0) {
++                                      rcu_read_unlock();
+                                       goto out;
++                              }
+ skip:
+                               *idx += 1;
+                       }
+               }
++              rcu_read_unlock();
+       }
+ out:
+       return err;
diff --git a/queue-5.4/xattr-break-delegations-in-set-remove-xattr.patch b/queue-5.4/xattr-break-delegations-in-set-remove-xattr.patch
new file mode 100644 (file)
index 0000000..5bb8ea1
--- /dev/null
@@ -0,0 +1,181 @@
+From 08b5d5014a27e717826999ad20e394a8811aae92 Mon Sep 17 00:00:00 2001
+From: Frank van der Linden <fllinden@amazon.com>
+Date: Tue, 23 Jun 2020 22:39:18 +0000
+Subject: xattr: break delegations in {set,remove}xattr
+
+From: Frank van der Linden <fllinden@amazon.com>
+
+commit 08b5d5014a27e717826999ad20e394a8811aae92 upstream.
+
+set/removexattr on an exported filesystem should break NFS delegations.
+This is true in general, but also for the upcoming support for
+RFC 8726 (NFSv4 extended attribute support). Make sure that they do.
+
+Additionally, they need to grow a _locked variant, since callers might
+call this with i_rwsem held (like the NFS server code).
+
+Cc: stable@vger.kernel.org # v4.9+
+Cc: linux-fsdevel@vger.kernel.org
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Frank van der Linden <fllinden@amazon.com>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+
+---
+ fs/xattr.c            |   84 +++++++++++++++++++++++++++++++++++++++++++++-----
+ include/linux/xattr.h |    2 +
+ 2 files changed, 79 insertions(+), 7 deletions(-)
+
+--- a/fs/xattr.c
++++ b/fs/xattr.c
+@@ -204,10 +204,22 @@ int __vfs_setxattr_noperm(struct dentry
+       return error;
+ }
+-
++/**
++ * __vfs_setxattr_locked: set an extended attribute while holding the inode
++ * lock
++ *
++ *  @dentry - object to perform setxattr on
++ *  @name - xattr name to set
++ *  @value - value to set @name to
++ *  @size - size of @value
++ *  @flags - flags to pass into filesystem operations
++ *  @delegated_inode - on return, will contain an inode pointer that
++ *  a delegation was broken on, NULL if none.
++ */
+ int
+-vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+-              size_t size, int flags)
++__vfs_setxattr_locked(struct dentry *dentry, const char *name,
++              const void *value, size_t size, int flags,
++              struct inode **delegated_inode)
+ {
+       struct inode *inode = dentry->d_inode;
+       int error;
+@@ -216,15 +228,40 @@ vfs_setxattr(struct dentry *dentry, cons
+       if (error)
+               return error;
+-      inode_lock(inode);
+       error = security_inode_setxattr(dentry, name, value, size, flags);
+       if (error)
+               goto out;
++      error = try_break_deleg(inode, delegated_inode);
++      if (error)
++              goto out;
++
+       error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
+ out:
++      return error;
++}
++EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
++
++int
++vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
++              size_t size, int flags)
++{
++      struct inode *inode = dentry->d_inode;
++      struct inode *delegated_inode = NULL;
++      int error;
++
++retry_deleg:
++      inode_lock(inode);
++      error = __vfs_setxattr_locked(dentry, name, value, size, flags,
++          &delegated_inode);
+       inode_unlock(inode);
++
++      if (delegated_inode) {
++              error = break_deleg_wait(&delegated_inode);
++              if (!error)
++                      goto retry_deleg;
++      }
+       return error;
+ }
+ EXPORT_SYMBOL_GPL(vfs_setxattr);
+@@ -378,8 +415,18 @@ __vfs_removexattr(struct dentry *dentry,
+ }
+ EXPORT_SYMBOL(__vfs_removexattr);
++/**
++ * __vfs_removexattr_locked: set an extended attribute while holding the inode
++ * lock
++ *
++ *  @dentry - object to perform setxattr on
++ *  @name - name of xattr to remove
++ *  @delegated_inode - on return, will contain an inode pointer that
++ *  a delegation was broken on, NULL if none.
++ */
+ int
+-vfs_removexattr(struct dentry *dentry, const char *name)
++__vfs_removexattr_locked(struct dentry *dentry, const char *name,
++              struct inode **delegated_inode)
+ {
+       struct inode *inode = dentry->d_inode;
+       int error;
+@@ -388,11 +435,14 @@ vfs_removexattr(struct dentry *dentry, c
+       if (error)
+               return error;
+-      inode_lock(inode);
+       error = security_inode_removexattr(dentry, name);
+       if (error)
+               goto out;
++      error = try_break_deleg(inode, delegated_inode);
++      if (error)
++              goto out;
++
+       error = __vfs_removexattr(dentry, name);
+       if (!error) {
+@@ -401,12 +451,32 @@ vfs_removexattr(struct dentry *dentry, c
+       }
+ out:
++      return error;
++}
++EXPORT_SYMBOL_GPL(__vfs_removexattr_locked);
++
++int
++vfs_removexattr(struct dentry *dentry, const char *name)
++{
++      struct inode *inode = dentry->d_inode;
++      struct inode *delegated_inode = NULL;
++      int error;
++
++retry_deleg:
++      inode_lock(inode);
++      error = __vfs_removexattr_locked(dentry, name, &delegated_inode);
+       inode_unlock(inode);
++
++      if (delegated_inode) {
++              error = break_deleg_wait(&delegated_inode);
++              if (!error)
++                      goto retry_deleg;
++      }
++
+       return error;
+ }
+ EXPORT_SYMBOL_GPL(vfs_removexattr);
+-
+ /*
+  * Extended attribute SET operations
+  */
+--- a/include/linux/xattr.h
++++ b/include/linux/xattr.h
+@@ -51,8 +51,10 @@ ssize_t vfs_getxattr(struct dentry *, co
+ ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
+ int __vfs_setxattr(struct dentry *, struct inode *, const char *, const void *, size_t, int);
+ int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int);
++int __vfs_setxattr_locked(struct dentry *, const char *, const void *, size_t, int, struct inode **);
+ int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int);
+ int __vfs_removexattr(struct dentry *, const char *);
++int __vfs_removexattr_locked(struct dentry *, const char *, struct inode **);
+ int vfs_removexattr(struct dentry *, const char *);
+ ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size);