From: Greg Kroah-Hartman Date: Sun, 28 Jun 2020 13:27:36 +0000 (+0200) Subject: 4.19-stable patches X-Git-Tag: v5.7.7~59 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b8fa4a3c66c1ffdac3b2d7091cce275b5e77b6d0;p=thirdparty%2Fkernel%2Fstable-queue.git 4.19-stable patches added patches: ibmveth-fix-max-mtu-limit.patch ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch net-bridge-enfore-alignment-for-ethernet-address.patch net-core-reduce-recursion-limit-value.patch net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch net-fix-memleak-in-register_netdevice.patch net-fix-the-arp-error-in-some-cases.patch net-increment-xmit_recursion-level-in-dev_direct_xmit.patch net-phy-check-harder-for-errors-in-get_phy_id.patch net-place-xmit-recursion-in-softnet-data.patch net-usb-ax88179_178a-fix-packet-alignment-padding.patch net-use-correct-this_cpu-primitive-in-dev_recursion_level.patch rocker-fix-incorrect-error-handling-in-dma_rings_init.patch rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch sch_cake-fix-a-few-style-nits.patch sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch --- diff --git a/queue-4.19/ibmveth-fix-max-mtu-limit.patch b/queue-4.19/ibmveth-fix-max-mtu-limit.patch new file mode 100644 index 00000000000..705382f74fc --- /dev/null +++ b/queue-4.19/ibmveth-fix-max-mtu-limit.patch @@ -0,0 +1,35 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Thomas Falcon +Date: Thu, 18 Jun 2020 10:43:46 -0500 +Subject: ibmveth: Fix max MTU limit + +From: Thomas Falcon + +[ Upstream commit 5948378b26d89f8aa5eac37629dbd0616ce8d7a7 ] + +The max MTU limit defined for ibmveth is not accounting for +virtual ethernet buffer overhead, which is twenty-two additional +bytes set aside for the ethernet header and eight additional bytes +of an opaque handle reserved for use by the hypervisor. Update the +max MTU to reflect this overhead. + +Fixes: d894be57ca92 ("ethernet: use net core MTU range checking in more drivers") +Fixes: 110447f8269a ("ethernet: fix min/max MTU typos") +Signed-off-by: Thomas Falcon +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ibm/ibmveth.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/ibm/ibmveth.c ++++ b/drivers/net/ethernet/ibm/ibmveth.c +@@ -1695,7 +1695,7 @@ static int ibmveth_probe(struct vio_dev + } + + netdev->min_mtu = IBMVETH_MIN_MTU; +- netdev->max_mtu = ETH_MAX_MTU; ++ netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH; + + memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN); + diff --git a/queue-4.19/ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch b/queue-4.19/ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch new file mode 100644 index 00000000000..beb40e904ae --- /dev/null +++ b/queue-4.19/ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch @@ -0,0 +1,114 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Taehee Yoo +Date: Tue, 16 Jun 2020 16:04:00 +0000 +Subject: ip6_gre: fix use-after-free in ip6gre_tunnel_lookup() + +From: Taehee Yoo + +[ Upstream commit dafabb6590cb15f300b77c095d50312e2c7c8e0f ] + +In the datapath, the ip6gre_tunnel_lookup() is used and it internally uses +fallback tunnel device pointer, which is fb_tunnel_dev. +This pointer variable should be set to NULL when a fb interface is deleted. +But there is no routine to set fb_tunnel_dev pointer to NULL. +So, this pointer will be still used after interface is deleted and +it eventually results in the use-after-free problem. + +Test commands: + ip netns add A + ip netns add B + ip link add eth0 type veth peer name eth1 + ip link set eth0 netns A + ip link set eth1 netns B + + ip netns exec A ip link set lo up + ip netns exec A ip link set eth0 up + ip netns exec A ip link add ip6gre1 type ip6gre local fc:0::1 \ + remote fc:0::2 + ip netns exec A ip -6 a a fc:100::1/64 dev ip6gre1 + ip netns exec A ip link set ip6gre1 up + ip netns exec A ip -6 a a fc:0::1/64 dev eth0 + ip netns exec A ip link set ip6gre0 up + + ip netns exec B ip link set lo up + ip netns exec B ip link set eth1 up + ip netns exec B ip link add ip6gre1 type ip6gre local fc:0::2 \ + remote fc:0::1 + ip netns exec B ip -6 a a fc:100::2/64 dev ip6gre1 + ip netns exec B ip link set ip6gre1 up + ip netns exec B ip -6 a a fc:0::2/64 dev eth1 + ip netns exec B ip link set ip6gre0 up + ip netns exec A ping fc:100::2 -s 60000 & + ip netns del B + +Splat looks like: +[ 73.087285][ C1] BUG: KASAN: use-after-free in ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] +[ 73.088361][ C1] Read of size 4 at addr ffff888040559218 by task ping/1429 +[ 73.089317][ C1] +[ 73.089638][ C1] CPU: 1 PID: 1429 Comm: ping Not tainted 5.7.0+ #602 +[ 73.090531][ C1] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 +[ 73.091725][ C1] Call Trace: +[ 73.092160][ C1] +[ 73.092556][ C1] dump_stack+0x96/0xdb +[ 73.093122][ C1] print_address_description.constprop.6+0x2cc/0x450 +[ 73.094016][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] +[ 73.094894][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] +[ 73.095767][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] +[ 73.096619][ C1] kasan_report+0x154/0x190 +[ 73.097209][ C1] ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] +[ 73.097989][ C1] ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre] +[ 73.098750][ C1] ? gre_del_protocol+0x60/0x60 [gre] +[ 73.099500][ C1] gre_rcv+0x1c5/0x1450 [ip6_gre] +[ 73.100199][ C1] ? ip6gre_header+0xf00/0xf00 [ip6_gre] +[ 73.100985][ C1] ? rcu_read_lock_sched_held+0xc0/0xc0 +[ 73.101830][ C1] ? ip6_input_finish+0x5/0xf0 +[ 73.102483][ C1] ip6_protocol_deliver_rcu+0xcbb/0x1510 +[ 73.103296][ C1] ip6_input_finish+0x5b/0xf0 +[ 73.103920][ C1] ip6_input+0xcd/0x2c0 +[ 73.104473][ C1] ? ip6_input_finish+0xf0/0xf0 +[ 73.105115][ C1] ? rcu_read_lock_held+0x90/0xa0 +[ 73.105783][ C1] ? rcu_read_lock_sched_held+0xc0/0xc0 +[ 73.106548][ C1] ipv6_rcv+0x1f1/0x300 +[ ... ] + +Suggested-by: Eric Dumazet +Fixes: c12b395a4664 ("gre: Support GRE over IPv6") +Signed-off-by: Taehee Yoo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -132,6 +132,7 @@ static struct ip6_tnl *ip6gre_tunnel_loo + gre_proto == htons(ETH_P_ERSPAN2)) ? + ARPHRD_ETHER : ARPHRD_IP6GRE; + int score, cand_score = 4; ++ struct net_device *ndev; + + for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) { + if (!ipv6_addr_equal(local, &t->parms.laddr) || +@@ -243,9 +244,9 @@ static struct ip6_tnl *ip6gre_tunnel_loo + if (t && t->dev->flags & IFF_UP) + return t; + +- dev = ign->fb_tunnel_dev; +- if (dev && dev->flags & IFF_UP) +- return netdev_priv(dev); ++ ndev = READ_ONCE(ign->fb_tunnel_dev); ++ if (ndev && ndev->flags & IFF_UP) ++ return netdev_priv(ndev); + + return NULL; + } +@@ -418,6 +419,8 @@ static void ip6gre_tunnel_uninit(struct + + ip6gre_tunnel_unlink_md(ign, t); + ip6gre_tunnel_unlink(ign, t); ++ if (ign->fb_tunnel_dev == dev) ++ WRITE_ONCE(ign->fb_tunnel_dev, NULL); + dst_cache_reset(&t->dst_cache); + dev_put(dev); + } diff --git a/queue-4.19/ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch b/queue-4.19/ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch new file mode 100644 index 00000000000..b053f18db90 --- /dev/null +++ b/queue-4.19/ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch @@ -0,0 +1,117 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Taehee Yoo +Date: Tue, 16 Jun 2020 16:51:51 +0000 +Subject: ip_tunnel: fix use-after-free in ip_tunnel_lookup() + +From: Taehee Yoo + +[ Upstream commit ba61539c6ae57f4146284a5cb4f7b7ed8d42bf45 ] + +In the datapath, the ip_tunnel_lookup() is used and it internally uses +fallback tunnel device pointer, which is fb_tunnel_dev. +This pointer variable should be set to NULL when a fb interface is deleted. +But there is no routine to set fb_tunnel_dev pointer to NULL. +So, this pointer will be still used after interface is deleted and +it eventually results in the use-after-free problem. + +Test commands: + ip netns add A + ip netns add B + ip link add eth0 type veth peer name eth1 + ip link set eth0 netns A + ip link set eth1 netns B + + ip netns exec A ip link set lo up + ip netns exec A ip link set eth0 up + ip netns exec A ip link add gre1 type gre local 10.0.0.1 \ + remote 10.0.0.2 + ip netns exec A ip link set gre1 up + ip netns exec A ip a a 10.0.100.1/24 dev gre1 + ip netns exec A ip a a 10.0.0.1/24 dev eth0 + + ip netns exec B ip link set lo up + ip netns exec B ip link set eth1 up + ip netns exec B ip link add gre1 type gre local 10.0.0.2 \ + remote 10.0.0.1 + ip netns exec B ip link set gre1 up + ip netns exec B ip a a 10.0.100.2/24 dev gre1 + ip netns exec B ip a a 10.0.0.2/24 dev eth1 + ip netns exec A hping3 10.0.100.2 -2 --flood -d 60000 & + ip netns del B + +Splat looks like: +[ 77.793450][ C3] ================================================================== +[ 77.794702][ C3] BUG: KASAN: use-after-free in ip_tunnel_lookup+0xcc4/0xf30 +[ 77.795573][ C3] Read of size 4 at addr ffff888060bd9c84 by task hping3/2905 +[ 77.796398][ C3] +[ 77.796664][ C3] CPU: 3 PID: 2905 Comm: hping3 Not tainted 5.8.0-rc1+ #616 +[ 77.797474][ C3] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 +[ 77.798453][ C3] Call Trace: +[ 77.798815][ C3] +[ 77.799142][ C3] dump_stack+0x9d/0xdb +[ 77.799605][ C3] print_address_description.constprop.7+0x2cc/0x450 +[ 77.800365][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 +[ 77.800908][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 +[ 77.801517][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 +[ 77.802145][ C3] kasan_report+0x154/0x190 +[ 77.802821][ C3] ? ip_tunnel_lookup+0xcc4/0xf30 +[ 77.803503][ C3] ip_tunnel_lookup+0xcc4/0xf30 +[ 77.804165][ C3] __ipgre_rcv+0x1ab/0xaa0 [ip_gre] +[ 77.804862][ C3] ? rcu_read_lock_sched_held+0xc0/0xc0 +[ 77.805621][ C3] gre_rcv+0x304/0x1910 [ip_gre] +[ 77.806293][ C3] ? lock_acquire+0x1a9/0x870 +[ 77.806925][ C3] ? gre_rcv+0xfe/0x354 [gre] +[ 77.807559][ C3] ? erspan_xmit+0x2e60/0x2e60 [ip_gre] +[ 77.808305][ C3] ? rcu_read_lock_sched_held+0xc0/0xc0 +[ 77.809032][ C3] ? rcu_read_lock_held+0x90/0xa0 +[ 77.809713][ C3] gre_rcv+0x1b8/0x354 [gre] +[ ... ] + +Suggested-by: Eric Dumazet +Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.") +Signed-off-by: Taehee Yoo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -98,9 +98,10 @@ struct ip_tunnel *ip_tunnel_lookup(struc + __be32 remote, __be32 local, + __be32 key) + { +- unsigned int hash; + struct ip_tunnel *t, *cand = NULL; + struct hlist_head *head; ++ struct net_device *ndev; ++ unsigned int hash; + + hash = ip_tunnel_hash(key, remote); + head = &itn->tunnels[hash]; +@@ -175,8 +176,9 @@ struct ip_tunnel *ip_tunnel_lookup(struc + if (t && t->dev->flags & IFF_UP) + return t; + +- if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) +- return netdev_priv(itn->fb_tunnel_dev); ++ ndev = READ_ONCE(itn->fb_tunnel_dev); ++ if (ndev && ndev->flags & IFF_UP) ++ return netdev_priv(ndev); + + return NULL; + } +@@ -1212,9 +1214,9 @@ void ip_tunnel_uninit(struct net_device + struct ip_tunnel_net *itn; + + itn = net_generic(net, tunnel->ip_tnl_net_id); +- /* fb_tunnel_dev will be unregisted in net-exit call. */ +- if (itn->fb_tunnel_dev != dev) +- ip_tunnel_del(itn, netdev_priv(dev)); ++ ip_tunnel_del(itn, netdev_priv(dev)); ++ if (itn->fb_tunnel_dev == dev) ++ WRITE_ONCE(itn->fb_tunnel_dev, NULL); + + dst_cache_reset(&tunnel->dst_cache); + } diff --git a/queue-4.19/mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch b/queue-4.19/mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch new file mode 100644 index 00000000000..2f96540b2f9 --- /dev/null +++ b/queue-4.19/mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch @@ -0,0 +1,57 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Wang Hai +Date: Thu, 11 Jun 2020 15:57:50 +0800 +Subject: mld: fix memory leak in ipv6_mc_destroy_dev() + +From: Wang Hai + +[ Upstream commit ea2fce88d2fd678ed9d45354ff49b73f1d5615dd ] + +Commit a84d01647989 ("mld: fix memory leak in mld_del_delrec()") fixed +the memory leak of MLD, but missing the ipv6_mc_destroy_dev() path, in +which mca_sources are leaked after ma_put(). + +Using ip6_mc_clear_src() to take care of the missing free. + +BUG: memory leak +unreferenced object 0xffff8881113d3180 (size 64): + comm "syz-executor071", pid 389, jiffies 4294887985 (age 17.943s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 ff 02 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 00 ................ + backtrace: + [<000000002cbc483c>] kmalloc include/linux/slab.h:555 [inline] + [<000000002cbc483c>] kzalloc include/linux/slab.h:669 [inline] + [<000000002cbc483c>] ip6_mc_add1_src net/ipv6/mcast.c:2237 [inline] + [<000000002cbc483c>] ip6_mc_add_src+0x7f5/0xbb0 net/ipv6/mcast.c:2357 + [<0000000058b8b1ff>] ip6_mc_source+0xe0c/0x1530 net/ipv6/mcast.c:449 + [<000000000bfc4fb5>] do_ipv6_setsockopt.isra.12+0x1b2c/0x3b30 net/ipv6/ipv6_sockglue.c:754 + [<00000000e4e7a722>] ipv6_setsockopt+0xda/0x150 net/ipv6/ipv6_sockglue.c:950 + [<0000000029260d9a>] rawv6_setsockopt+0x45/0x100 net/ipv6/raw.c:1081 + [<000000005c1b46f9>] __sys_setsockopt+0x131/0x210 net/socket.c:2132 + [<000000008491f7db>] __do_sys_setsockopt net/socket.c:2148 [inline] + [<000000008491f7db>] __se_sys_setsockopt net/socket.c:2145 [inline] + [<000000008491f7db>] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2145 + [<00000000c7bc11c5>] do_syscall_64+0xa1/0x530 arch/x86/entry/common.c:295 + [<000000005fb7a3f3>] entry_SYSCALL_64_after_hwframe+0x49/0xb3 + +Fixes: 1666d49e1d41 ("mld: do not remove mld souce list info when set link down") +Reported-by: Hulk Robot +Signed-off-by: Wang Hai +Acked-by: Hangbin Liu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/mcast.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv6/mcast.c ++++ b/net/ipv6/mcast.c +@@ -2620,6 +2620,7 @@ void ipv6_mc_destroy_dev(struct inet6_de + idev->mc_list = i->next; + + write_unlock_bh(&idev->lock); ++ ip6_mc_clear_src(i); + ma_put(i); + write_lock_bh(&idev->lock); + } diff --git a/queue-4.19/net-bridge-enfore-alignment-for-ethernet-address.patch b/queue-4.19/net-bridge-enfore-alignment-for-ethernet-address.patch new file mode 100644 index 00000000000..c5104204845 --- /dev/null +++ b/queue-4.19/net-bridge-enfore-alignment-for-ethernet-address.patch @@ -0,0 +1,43 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Thomas Martitz +Date: Thu, 25 Jun 2020 14:26:03 +0200 +Subject: net: bridge: enfore alignment for ethernet address + +From: Thomas Martitz + +[ Upstream commit db7202dec92e6caa2706c21d6fc359af318bde2e ] + +The eth_addr member is passed to ether_addr functions that require +2-byte alignment, therefore the member must be properly aligned +to avoid unaligned accesses. + +The problem is in place since the initial merge of multicast to unicast: +commit 6db6f0eae6052b70885562e1733896647ec1d807 bridge: multicast to unicast + +Fixes: 6db6f0eae605 ("bridge: multicast to unicast") +Cc: Roopa Prabhu +Cc: Nikolay Aleksandrov +Cc: David S. Miller +Cc: Jakub Kicinski +Cc: Felix Fietkau +Cc: stable@vger.kernel.org +Signed-off-by: Thomas Martitz +Acked-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_private.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bridge/br_private.h ++++ b/net/bridge/br_private.h +@@ -202,8 +202,8 @@ struct net_bridge_port_group { + struct rcu_head rcu; + struct timer_list timer; + struct br_ip addr; ++ unsigned char eth_addr[ETH_ALEN] __aligned(2); + unsigned char flags; +- unsigned char eth_addr[ETH_ALEN]; + }; + + struct net_bridge_mdb_entry diff --git a/queue-4.19/net-core-reduce-recursion-limit-value.patch b/queue-4.19/net-core-reduce-recursion-limit-value.patch new file mode 100644 index 00000000000..8b56d38b26e --- /dev/null +++ b/queue-4.19/net-core-reduce-recursion-limit-value.patch @@ -0,0 +1,81 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Taehee Yoo +Date: Tue, 16 Jun 2020 15:52:05 +0000 +Subject: net: core: reduce recursion limit value + +From: Taehee Yoo + +[ Upstream commit fb7861d14c8d7edac65b2fcb6e8031cb138457b2 ] + +In the current code, ->ndo_start_xmit() can be executed recursively only +10 times because of stack memory. +But, in the case of the vxlan, 10 recursion limit value results in +a stack overflow. +In the current code, the nested interface is limited by 8 depth. +There is no critical reason that the recursion limitation value should +be 10. +So, it would be good to be the same value with the limitation value of +nesting interface depth. + +Test commands: + ip link add vxlan10 type vxlan vni 10 dstport 4789 srcport 4789 4789 + ip link set vxlan10 up + ip a a 192.168.10.1/24 dev vxlan10 + ip n a 192.168.10.2 dev vxlan10 lladdr fc:22:33:44:55:66 nud permanent + + for i in {9..0} + do + let A=$i+1 + ip link add vxlan$i type vxlan vni $i dstport 4789 srcport 4789 4789 + ip link set vxlan$i up + ip a a 192.168.$i.1/24 dev vxlan$i + ip n a 192.168.$i.2 dev vxlan$i lladdr fc:22:33:44:55:66 nud permanent + bridge fdb add fc:22:33:44:55:66 dev vxlan$A dst 192.168.$i.2 self + done + hping3 192.168.10.2 -2 -d 60000 + +Splat looks like: +[ 103.814237][ T1127] ============================================================================= +[ 103.871955][ T1127] BUG kmalloc-2k (Tainted: G B ): Padding overwritten. 0x00000000897a2e4f-0x000 +[ 103.873187][ T1127] ----------------------------------------------------------------------------- +[ 103.873187][ T1127] +[ 103.874252][ T1127] INFO: Slab 0x000000005cccc724 objects=5 used=5 fp=0x0000000000000000 flags=0x10000000001020 +[ 103.881323][ T1127] CPU: 3 PID: 1127 Comm: hping3 Tainted: G B 5.7.0+ #575 +[ 103.882131][ T1127] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 +[ 103.883006][ T1127] Call Trace: +[ 103.883324][ T1127] dump_stack+0x96/0xdb +[ 103.883716][ T1127] slab_err+0xad/0xd0 +[ 103.884106][ T1127] ? _raw_spin_unlock+0x1f/0x30 +[ 103.884620][ T1127] ? get_partial_node.isra.78+0x140/0x360 +[ 103.885214][ T1127] slab_pad_check.part.53+0xf7/0x160 +[ 103.885769][ T1127] ? pskb_expand_head+0x110/0xe10 +[ 103.886316][ T1127] check_slab+0x97/0xb0 +[ 103.886763][ T1127] alloc_debug_processing+0x84/0x1a0 +[ 103.887308][ T1127] ___slab_alloc+0x5a5/0x630 +[ 103.887765][ T1127] ? pskb_expand_head+0x110/0xe10 +[ 103.888265][ T1127] ? lock_downgrade+0x730/0x730 +[ 103.888762][ T1127] ? pskb_expand_head+0x110/0xe10 +[ 103.889244][ T1127] ? __slab_alloc+0x3e/0x80 +[ 103.889675][ T1127] __slab_alloc+0x3e/0x80 +[ 103.890108][ T1127] __kmalloc_node_track_caller+0xc7/0x420 +[ ... ] + +Fixes: 11a766ce915f ("net: Increase xmit RECURSION_LIMIT to 10.") +Signed-off-by: Taehee Yoo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3004,7 +3004,7 @@ static inline int dev_recursion_level(vo + return this_cpu_read(softnet_data.xmit.recursion); + } + +-#define XMIT_RECURSION_LIMIT 10 ++#define XMIT_RECURSION_LIMIT 8 + static inline bool dev_xmit_recursion(void) + { + return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > diff --git a/queue-4.19/net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch b/queue-4.19/net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch new file mode 100644 index 00000000000..5234b82cf2d --- /dev/null +++ b/queue-4.19/net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch @@ -0,0 +1,57 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Tariq Toukan +Date: Mon, 22 Jun 2020 23:26:04 +0300 +Subject: net: Do not clear the sock TX queue in sk_set_socket() + +From: Tariq Toukan + +[ Upstream commit 41b14fb8724d5a4b382a63cb4a1a61880347ccb8 ] + +Clearing the sock TX queue in sk_set_socket() might cause unexpected +out-of-order transmit when called from sock_orphan(), as outstanding +packets can pick a different TX queue and bypass the ones already queued. + +This is undesired in general. More specifically, it breaks the in-order +scheduling property guarantee for device-offloaded TLS sockets. + +Remove the call to sk_tx_queue_clear() in sk_set_socket(), and add it +explicitly only where needed. + +Fixes: e022f0b4a03f ("net: Introduce sk_tx_queue_mapping") +Signed-off-by: Tariq Toukan +Reviewed-by: Boris Pismenny +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sock.h | 1 - + net/core/sock.c | 2 ++ + 2 files changed, 2 insertions(+), 1 deletion(-) + +--- a/include/net/sock.h ++++ b/include/net/sock.h +@@ -1775,7 +1775,6 @@ static inline int sk_rx_queue_get(const + + static inline void sk_set_socket(struct sock *sk, struct socket *sock) + { +- sk_tx_queue_clear(sk); + sk->sk_socket = sock; + } + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1540,6 +1540,7 @@ struct sock *sk_alloc(struct net *net, i + cgroup_sk_alloc(&sk->sk_cgrp_data); + sock_update_classid(&sk->sk_cgrp_data); + sock_update_netprioidx(&sk->sk_cgrp_data); ++ sk_tx_queue_clear(sk); + } + + return sk; +@@ -1747,6 +1748,7 @@ struct sock *sk_clone_lock(const struct + */ + sk_refcnt_debug_inc(newsk); + sk_set_socket(newsk, NULL); ++ sk_tx_queue_clear(newsk); + newsk->sk_wq = NULL; + + if (newsk->sk_prot->sockets_allocated) diff --git a/queue-4.19/net-fix-memleak-in-register_netdevice.patch b/queue-4.19/net-fix-memleak-in-register_netdevice.patch new file mode 100644 index 00000000000..66ba8c7c703 --- /dev/null +++ b/queue-4.19/net-fix-memleak-in-register_netdevice.patch @@ -0,0 +1,87 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Yang Yingliang +Date: Tue, 16 Jun 2020 09:39:21 +0000 +Subject: net: fix memleak in register_netdevice() + +From: Yang Yingliang + +[ Upstream commit 814152a89ed52c722ab92e9fbabcac3cb8a39245 ] + +I got a memleak report when doing some fuzz test: + +unreferenced object 0xffff888112584000 (size 13599): + comm "ip", pid 3048, jiffies 4294911734 (age 343.491s) + hex dump (first 32 bytes): + 74 61 70 30 00 00 00 00 00 00 00 00 00 00 00 00 tap0............ + 00 ee d9 19 81 88 ff ff 00 00 00 00 00 00 00 00 ................ + backtrace: + [<000000002f60ba65>] __kmalloc_node+0x309/0x3a0 + [<0000000075b211ec>] kvmalloc_node+0x7f/0xc0 + [<00000000d3a97396>] alloc_netdev_mqs+0x76/0xfc0 + [<00000000609c3655>] __tun_chr_ioctl+0x1456/0x3d70 + [<000000001127ca24>] ksys_ioctl+0xe5/0x130 + [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0 + [<00000000e1023498>] do_syscall_64+0x56/0xa0 + [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +unreferenced object 0xffff888111845cc0 (size 8): + comm "ip", pid 3048, jiffies 4294911734 (age 343.491s) + hex dump (first 8 bytes): + 74 61 70 30 00 88 ff ff tap0.... + backtrace: + [<000000004c159777>] kstrdup+0x35/0x70 + [<00000000d8b496ad>] kstrdup_const+0x3d/0x50 + [<00000000494e884a>] kvasprintf_const+0xf1/0x180 + [<0000000097880a2b>] kobject_set_name_vargs+0x56/0x140 + [<000000008fbdfc7b>] dev_set_name+0xab/0xe0 + [<000000005b99e3b4>] netdev_register_kobject+0xc0/0x390 + [<00000000602704fe>] register_netdevice+0xb61/0x1250 + [<000000002b7ca244>] __tun_chr_ioctl+0x1cd1/0x3d70 + [<000000001127ca24>] ksys_ioctl+0xe5/0x130 + [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0 + [<00000000e1023498>] do_syscall_64+0x56/0xa0 + [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +unreferenced object 0xffff88811886d800 (size 512): + comm "ip", pid 3048, jiffies 4294911734 (age 343.491s) + hex dump (first 32 bytes): + 00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00 .....N.......... + ff ff ff ff ff ff ff ff c0 66 3d a3 ff ff ff ff .........f=..... + backtrace: + [<0000000050315800>] device_add+0x61e/0x1950 + [<0000000021008dfb>] netdev_register_kobject+0x17e/0x390 + [<00000000602704fe>] register_netdevice+0xb61/0x1250 + [<000000002b7ca244>] __tun_chr_ioctl+0x1cd1/0x3d70 + [<000000001127ca24>] ksys_ioctl+0xe5/0x130 + [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0 + [<00000000e1023498>] do_syscall_64+0x56/0xa0 + [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +If call_netdevice_notifiers() failed, then rollback_registered() +calls netdev_unregister_kobject() which holds the kobject. The +reference cannot be put because the netdev won't be add to todo +list, so it will leads a memleak, we need put the reference to +avoid memleak. + +Reported-by: Hulk Robot +Signed-off-by: Yang Yingliang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -8753,6 +8753,13 @@ int register_netdevice(struct net_device + rcu_barrier(); + + dev->reg_state = NETREG_UNREGISTERED; ++ /* We should put the kobject that hold in ++ * netdev_unregister_kobject(), otherwise ++ * the net device cannot be freed when ++ * driver calls free_netdev(), because the ++ * kobject is being hold. ++ */ ++ kobject_put(&dev->dev.kobj); + } + /* + * Prevent userspace races by waiting until the network diff --git a/queue-4.19/net-fix-the-arp-error-in-some-cases.patch b/queue-4.19/net-fix-the-arp-error-in-some-cases.patch new file mode 100644 index 00000000000..e7c99b1f29f --- /dev/null +++ b/queue-4.19/net-fix-the-arp-error-in-some-cases.patch @@ -0,0 +1,50 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: guodeqing +Date: Wed, 17 Jun 2020 10:07:16 +0800 +Subject: net: Fix the arp error in some cases + +From: guodeqing + +[ Upstream commit 5eea3a63ff4aba6a26002e657a6d21934b7e2b96 ] + +ie., +$ ifconfig eth0 6.6.6.6 netmask 255.255.255.0 + +$ ip rule add from 6.6.6.6 table 6666 + +$ ip route add 9.9.9.9 via 6.6.6.6 + +$ ping -I 6.6.6.6 9.9.9.9 +PING 9.9.9.9 (9.9.9.9) from 6.6.6.6 : 56(84) bytes of data. + +3 packets transmitted, 0 received, 100% packet loss, time 2079ms + +$ arp +Address HWtype HWaddress Flags Mask Iface +6.6.6.6 (incomplete) eth0 + +The arp request address is error, this is because fib_table_lookup in +fib_check_nh lookup the destnation 9.9.9.9 nexthop, the scope of +the fib result is RT_SCOPE_LINK,the correct scope is RT_SCOPE_HOST. +Here I add a check of whether this is RT_TABLE_MAIN to solve this problem. + +Fixes: 3bfd847203c6 ("net: Use passed in table for nexthop lookups") +Signed-off-by: guodeqing +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_semantics.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -831,7 +831,7 @@ static int fib_check_nh(struct fib_confi + if (fl4.flowi4_scope < RT_SCOPE_LINK) + fl4.flowi4_scope = RT_SCOPE_LINK; + +- if (cfg->fc_table) ++ if (cfg->fc_table && cfg->fc_table != RT_TABLE_MAIN) + tbl = fib_get_table(net, cfg->fc_table); + + if (tbl) diff --git a/queue-4.19/net-increment-xmit_recursion-level-in-dev_direct_xmit.patch b/queue-4.19/net-increment-xmit_recursion-level-in-dev_direct_xmit.patch new file mode 100644 index 00000000000..40e2b063a64 --- /dev/null +++ b/queue-4.19/net-increment-xmit_recursion-level-in-dev_direct_xmit.patch @@ -0,0 +1,105 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Eric Dumazet +Date: Wed, 17 Jun 2020 22:23:25 -0700 +Subject: net: increment xmit_recursion level in dev_direct_xmit() + +From: Eric Dumazet + +[ Upstream commit 0ad6f6e767ec2f613418cbc7ebe5ec4c35af540c ] + +Back in commit f60e5990d9c1 ("ipv6: protect skb->sk accesses +from recursive dereference inside the stack") Hannes added code +so that IPv6 stack would not trust skb->sk for typical cases +where packet goes through 'standard' xmit path (__dev_queue_xmit()) + +Alas af_packet had a dev_direct_xmit() path that was not +dealing yet with xmit_recursion level. + +Also change sk_mc_loop() to dump a stack once only. + +Without this patch, syzbot was able to trigger : + +[1] +[ 153.567378] WARNING: CPU: 7 PID: 11273 at net/core/sock.c:721 sk_mc_loop+0x51/0x70 +[ 153.567378] Modules linked in: nfnetlink ip6table_raw ip6table_filter iptable_raw iptable_nat nf_nat nf_conntrack nf_defrag_ipv4 nf_defrag_ipv6 iptable_filter macsec macvtap tap macvlan 8021q hsr wireguard libblake2s blake2s_x86_64 libblake2s_generic udp_tunnel ip6_udp_tunnel libchacha20poly1305 poly1305_x86_64 chacha_x86_64 libchacha curve25519_x86_64 libcurve25519_generic netdevsim batman_adv dummy team bridge stp llc w1_therm wire i2c_mux_pca954x i2c_mux cdc_acm ehci_pci ehci_hcd mlx4_en mlx4_ib ib_uverbs ib_core mlx4_core +[ 153.567386] CPU: 7 PID: 11273 Comm: b159172088 Not tainted 5.8.0-smp-DEV #273 +[ 153.567387] RIP: 0010:sk_mc_loop+0x51/0x70 +[ 153.567388] Code: 66 83 f8 0a 75 24 0f b6 4f 12 b8 01 00 00 00 31 d2 d3 e0 a9 bf ef ff ff 74 07 48 8b 97 f0 02 00 00 0f b6 42 3a 83 e0 01 5d c3 <0f> 0b b8 01 00 00 00 5d c3 0f b6 87 18 03 00 00 5d c0 e8 04 83 e0 +[ 153.567388] RSP: 0018:ffff95c69bb93990 EFLAGS: 00010212 +[ 153.567388] RAX: 0000000000000011 RBX: ffff95c6e0ee3e00 RCX: 0000000000000007 +[ 153.567389] RDX: ffff95c69ae50000 RSI: ffff95c6c30c3000 RDI: ffff95c6c30c3000 +[ 153.567389] RBP: ffff95c69bb93990 R08: ffff95c69a77f000 R09: 0000000000000008 +[ 153.567389] R10: 0000000000000040 R11: 00003e0e00026128 R12: ffff95c6c30c3000 +[ 153.567390] R13: ffff95c6cc4fd500 R14: ffff95c6f84500c0 R15: ffff95c69aa13c00 +[ 153.567390] FS: 00007fdc3a283700(0000) GS:ffff95c6ff9c0000(0000) knlGS:0000000000000000 +[ 153.567390] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 153.567391] CR2: 00007ffee758e890 CR3: 0000001f9ba20003 CR4: 00000000001606e0 +[ 153.567391] Call Trace: +[ 153.567391] ip6_finish_output2+0x34e/0x550 +[ 153.567391] __ip6_finish_output+0xe7/0x110 +[ 153.567391] ip6_finish_output+0x2d/0xb0 +[ 153.567392] ip6_output+0x77/0x120 +[ 153.567392] ? __ip6_finish_output+0x110/0x110 +[ 153.567392] ip6_local_out+0x3d/0x50 +[ 153.567392] ipvlan_queue_xmit+0x56c/0x5e0 +[ 153.567393] ? ksize+0x19/0x30 +[ 153.567393] ipvlan_start_xmit+0x18/0x50 +[ 153.567393] dev_direct_xmit+0xf3/0x1c0 +[ 153.567393] packet_direct_xmit+0x69/0xa0 +[ 153.567394] packet_sendmsg+0xbf0/0x19b0 +[ 153.567394] ? plist_del+0x62/0xb0 +[ 153.567394] sock_sendmsg+0x65/0x70 +[ 153.567394] sock_write_iter+0x93/0xf0 +[ 153.567394] new_sync_write+0x18e/0x1a0 +[ 153.567395] __vfs_write+0x29/0x40 +[ 153.567395] vfs_write+0xb9/0x1b0 +[ 153.567395] ksys_write+0xb1/0xe0 +[ 153.567395] __x64_sys_write+0x1a/0x20 +[ 153.567395] do_syscall_64+0x43/0x70 +[ 153.567396] entry_SYSCALL_64_after_hwframe+0x44/0xa9 +[ 153.567396] RIP: 0033:0x453549 +[ 153.567396] Code: Bad RIP value. +[ 153.567396] RSP: 002b:00007fdc3a282cc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 +[ 153.567397] RAX: ffffffffffffffda RBX: 00000000004d32d0 RCX: 0000000000453549 +[ 153.567397] RDX: 0000000000000020 RSI: 0000000020000300 RDI: 0000000000000003 +[ 153.567398] RBP: 00000000004d32d8 R08: 0000000000000000 R09: 0000000000000000 +[ 153.567398] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000004d32dc +[ 153.567398] R13: 00007ffee742260f R14: 00007fdc3a282dc0 R15: 00007fdc3a283700 +[ 153.567399] ---[ end trace c1d5ae2b1059ec62 ]--- + +f60e5990d9c1 ("ipv6: protect skb->sk accesses from recursive dereference inside the stack") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 2 ++ + net/core/sock.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3899,10 +3899,12 @@ int dev_direct_xmit(struct sk_buff *skb, + + local_bh_disable(); + ++ dev_xmit_recursion_inc(); + HARD_TX_LOCK(dev, txq, smp_processor_id()); + if (!netif_xmit_frozen_or_drv_stopped(txq)) + ret = netdev_start_xmit(skb, dev, txq, false); + HARD_TX_UNLOCK(dev, txq); ++ dev_xmit_recursion_dec(); + + local_bh_enable(); + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -640,7 +640,7 @@ bool sk_mc_loop(struct sock *sk) + return inet6_sk(sk)->mc_loop; + #endif + } +- WARN_ON(1); ++ WARN_ON_ONCE(1); + return true; + } + EXPORT_SYMBOL(sk_mc_loop); diff --git a/queue-4.19/net-phy-check-harder-for-errors-in-get_phy_id.patch b/queue-4.19/net-phy-check-harder-for-errors-in-get_phy_id.patch new file mode 100644 index 00000000000..56ed94dbd23 --- /dev/null +++ b/queue-4.19/net-phy-check-harder-for-errors-in-get_phy_id.patch @@ -0,0 +1,50 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Florian Fainelli +Date: Fri, 19 Jun 2020 11:47:47 -0700 +Subject: net: phy: Check harder for errors in get_phy_id() + +From: Florian Fainelli + +[ Upstream commit b2ffc75e2e990b09903f9d15ccd53bc5f3a4217c ] + +Commit 02a6efcab675 ("net: phy: allow scanning busses with missing +phys") added a special condition to return -ENODEV in case -ENODEV or +-EIO was returned from the first read of the MII_PHYSID1 register. + +In case the MDIO bus data line pull-up is not strong enough, the MDIO +bus controller will not flag this as a read error. This can happen when +a pluggable daughter card is not connected and weak internal pull-ups +are used (since that is the only option, otherwise the pins are +floating). + +The second read of MII_PHYSID2 will be correctly flagged an error +though, but now we will return -EIO which will be treated as a hard +error, thus preventing MDIO bus scanning loops to continue succesfully. + +Apply the same logic to both register reads, thus allowing the scanning +logic to proceed. + +Fixes: 02a6efcab675 ("net: phy: allow scanning busses with missing phys") +Reviewed-by: Andrew Lunn +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/phy_device.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -606,8 +606,10 @@ static int get_phy_id(struct mii_bus *bu + + /* Grab the bits from PHYIR2, and put them in the lower half */ + phy_reg = mdiobus_read(bus, addr, MII_PHYSID2); +- if (phy_reg < 0) +- return -EIO; ++ if (phy_reg < 0) { ++ /* returning -ENODEV doesn't stop bus scanning */ ++ return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO; ++ } + + *phy_id |= (phy_reg & 0xffff); + diff --git a/queue-4.19/net-place-xmit-recursion-in-softnet-data.patch b/queue-4.19/net-place-xmit-recursion-in-softnet-data.patch new file mode 100644 index 00000000000..4471a605df1 --- /dev/null +++ b/queue-4.19/net-place-xmit-recursion-in-softnet-data.patch @@ -0,0 +1,151 @@ +From 97cdcf37b57e3f204be3000b9eab9686f38b4356 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Mon, 1 Apr 2019 16:42:13 +0200 +Subject: net: place xmit recursion in softnet data + +From: Florian Westphal + +commit 97cdcf37b57e3f204be3000b9eab9686f38b4356 upstream. + +This fills a hole in softnet data, so no change in structure size. + +Also prepares for xmit_more placement in the same spot; +skb->xmit_more will be removed in followup patch. + +Signed-off-by: Florian Westphal +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/netdevice.h | 40 ++++++++++++++++++++++++++++++++-------- + net/core/dev.c | 10 +++------- + net/core/filter.c | 6 +++--- + 3 files changed, 38 insertions(+), 18 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -2620,14 +2620,6 @@ void netdev_freemem(struct net_device *d + void synchronize_net(void); + int init_dummy_netdev(struct net_device *dev); + +-DECLARE_PER_CPU(int, xmit_recursion); +-#define XMIT_RECURSION_LIMIT 10 +- +-static inline int dev_recursion_level(void) +-{ +- return this_cpu_read(xmit_recursion); +-} +- + struct net_device *dev_get_by_index(struct net *net, int ifindex); + struct net_device *__dev_get_by_index(struct net *net, int ifindex); + struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex); +@@ -2967,6 +2959,11 @@ struct softnet_data { + #ifdef CONFIG_XFRM_OFFLOAD + struct sk_buff_head xfrm_backlog; + #endif ++ /* written and read only by owning cpu: */ ++ struct { ++ u16 recursion; ++ u8 more; ++ } xmit; + #ifdef CONFIG_RPS + /* input_queue_head should be written by cpu owning this struct, + * and only read by other cpus. Worth using a cache line. +@@ -3002,6 +2999,28 @@ static inline void input_queue_tail_incr + + DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data); + ++static inline int dev_recursion_level(void) ++{ ++ return __this_cpu_read(softnet_data.xmit.recursion); ++} ++ ++#define XMIT_RECURSION_LIMIT 10 ++static inline bool dev_xmit_recursion(void) ++{ ++ return unlikely(__this_cpu_read(softnet_data.xmit.recursion) > ++ XMIT_RECURSION_LIMIT); ++} ++ ++static inline void dev_xmit_recursion_inc(void) ++{ ++ __this_cpu_inc(softnet_data.xmit.recursion); ++} ++ ++static inline void dev_xmit_recursion_dec(void) ++{ ++ __this_cpu_dec(softnet_data.xmit.recursion); ++} ++ + void __netif_schedule(struct Qdisc *q); + void netif_schedule_queue(struct netdev_queue *txq); + +@@ -4314,6 +4333,11 @@ static inline netdev_tx_t __netdev_start + return ops->ndo_start_xmit(skb, dev); + } + ++static inline bool netdev_xmit_more(void) ++{ ++ return __this_cpu_read(softnet_data.xmit.more); ++} ++ + static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq, bool more) + { +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3534,9 +3534,6 @@ static void skb_update_prio(struct sk_bu + #define skb_update_prio(skb) + #endif + +-DEFINE_PER_CPU(int, xmit_recursion); +-EXPORT_SYMBOL(xmit_recursion); +- + /** + * dev_loopback_xmit - loop back @skb + * @net: network namespace this loopback is happening in +@@ -3827,8 +3824,7 @@ static int __dev_queue_xmit(struct sk_bu + int cpu = smp_processor_id(); /* ok because BHs are off */ + + if (txq->xmit_lock_owner != cpu) { +- if (unlikely(__this_cpu_read(xmit_recursion) > +- XMIT_RECURSION_LIMIT)) ++ if (dev_xmit_recursion()) + goto recursion_alert; + + skb = validate_xmit_skb(skb, dev, &again); +@@ -3838,9 +3834,9 @@ static int __dev_queue_xmit(struct sk_bu + HARD_TX_LOCK(dev, txq, cpu); + + if (!netif_xmit_stopped(txq)) { +- __this_cpu_inc(xmit_recursion); ++ dev_xmit_recursion_inc(); + skb = dev_hard_start_xmit(skb, dev, txq, &rc); +- __this_cpu_dec(xmit_recursion); ++ dev_xmit_recursion_dec(); + if (dev_xmit_complete(rc)) { + HARD_TX_UNLOCK(dev, txq); + goto out; +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -2002,7 +2002,7 @@ static inline int __bpf_tx_skb(struct ne + { + int ret; + +- if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) { ++ if (dev_xmit_recursion()) { + net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n"); + kfree_skb(skb); + return -ENETDOWN; +@@ -2011,9 +2011,9 @@ static inline int __bpf_tx_skb(struct ne + skb->dev = dev; + skb->tstamp = 0; + +- __this_cpu_inc(xmit_recursion); ++ dev_xmit_recursion_inc(); + ret = dev_queue_xmit(skb); +- __this_cpu_dec(xmit_recursion); ++ dev_xmit_recursion_dec(); + + return ret; + } diff --git a/queue-4.19/net-usb-ax88179_178a-fix-packet-alignment-padding.patch b/queue-4.19/net-usb-ax88179_178a-fix-packet-alignment-padding.patch new file mode 100644 index 00000000000..f5627732507 --- /dev/null +++ b/queue-4.19/net-usb-ax88179_178a-fix-packet-alignment-padding.patch @@ -0,0 +1,69 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Jeremy Kerr +Date: Mon, 15 Jun 2020 10:54:56 +0800 +Subject: net: usb: ax88179_178a: fix packet alignment padding + +From: Jeremy Kerr + +[ Upstream commit e869e7a17798d85829fa7d4f9bbe1eebd4b2d3f6 ] + +Using a AX88179 device (0b95:1790), I see two bytes of appended data on +every RX packet. For example, this 48-byte ping, using 0xff as a +payload byte: + + 04:20:22.528472 IP 192.168.1.1 > 192.168.1.2: ICMP echo request, id 2447, seq 1, length 64 + 0x0000: 000a cd35 ea50 000a cd35 ea4f 0800 4500 + 0x0010: 0054 c116 4000 4001 f63e c0a8 0101 c0a8 + 0x0020: 0102 0800 b633 098f 0001 87ea cd5e 0000 + 0x0030: 0000 dcf2 0600 0000 0000 ffff ffff ffff + 0x0040: ffff ffff ffff ffff ffff ffff ffff ffff + 0x0050: ffff ffff ffff ffff ffff ffff ffff ffff + 0x0060: ffff 961f + +Those last two bytes - 96 1f - aren't part of the original packet. + +In the ax88179 RX path, the usbnet rx_fixup function trims a 2-byte +'alignment pseudo header' from the start of the packet, and sets the +length from a per-packet field populated by hardware. It looks like that +length field *includes* the 2-byte header; the current driver assumes +that it's excluded. + +This change trims the 2-byte alignment header after we've set the packet +length, so the resulting packet length is correct. While we're moving +the comment around, this also fixes the spelling of 'pseudo'. + +Signed-off-by: Jeremy Kerr +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/ax88179_178a.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/net/usb/ax88179_178a.c ++++ b/drivers/net/usb/ax88179_178a.c +@@ -1400,10 +1400,10 @@ static int ax88179_rx_fixup(struct usbne + } + + if (pkt_cnt == 0) { +- /* Skip IP alignment psudo header */ +- skb_pull(skb, 2); + skb->len = pkt_len; +- skb_set_tail_pointer(skb, pkt_len); ++ /* Skip IP alignment pseudo header */ ++ skb_pull(skb, 2); ++ skb_set_tail_pointer(skb, skb->len); + skb->truesize = pkt_len + sizeof(struct sk_buff); + ax88179_rx_checksum(skb, pkt_hdr); + return 1; +@@ -1412,8 +1412,9 @@ static int ax88179_rx_fixup(struct usbne + ax_skb = skb_clone(skb, GFP_ATOMIC); + if (ax_skb) { + ax_skb->len = pkt_len; +- ax_skb->data = skb->data + 2; +- skb_set_tail_pointer(ax_skb, pkt_len); ++ /* Skip IP alignment pseudo header */ ++ skb_pull(ax_skb, 2); ++ skb_set_tail_pointer(ax_skb, ax_skb->len); + ax_skb->truesize = pkt_len + sizeof(struct sk_buff); + ax88179_rx_checksum(ax_skb, pkt_hdr); + usbnet_skb_return(dev, ax_skb); diff --git a/queue-4.19/net-use-correct-this_cpu-primitive-in-dev_recursion_level.patch b/queue-4.19/net-use-correct-this_cpu-primitive-in-dev_recursion_level.patch new file mode 100644 index 00000000000..63e33d7347e --- /dev/null +++ b/queue-4.19/net-use-correct-this_cpu-primitive-in-dev_recursion_level.patch @@ -0,0 +1,41 @@ +From 28b05b92886871bdd8e6a9df73e3a15845fe8ef4 Mon Sep 17 00:00:00 2001 +From: Florian Westphal +Date: Wed, 3 Apr 2019 08:28:35 +0200 +Subject: net: use correct this_cpu primitive in dev_recursion_level + +From: Florian Westphal + +commit 28b05b92886871bdd8e6a9df73e3a15845fe8ef4 upstream. + +syzbot reports: +BUG: using __this_cpu_read() in preemptible code: +caller is dev_recursion_level include/linux/netdevice.h:3052 [inline] + __this_cpu_preempt_check+0x246/0x270 lib/smp_processor_id.c:47 + dev_recursion_level include/linux/netdevice.h:3052 [inline] + ip6_skb_dst_mtu include/net/ip6_route.h:245 [inline] + +I erronously downgraded a this_cpu_read to __this_cpu_read when +moving dev_recursion_level() around. + +Reported-by: syzbot+51471b4aae195285a4a3@syzkaller.appspotmail.com +Fixes: 97cdcf37b57e ("net: place xmit recursion in softnet data") +Signed-off-by: Florian Westphal +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman + +--- + include/linux/netdevice.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3001,7 +3001,7 @@ DECLARE_PER_CPU_ALIGNED(struct softnet_d + + static inline int dev_recursion_level(void) + { +- return __this_cpu_read(softnet_data.xmit.recursion); ++ return this_cpu_read(softnet_data.xmit.recursion); + } + + #define XMIT_RECURSION_LIMIT 10 diff --git a/queue-4.19/rocker-fix-incorrect-error-handling-in-dma_rings_init.patch b/queue-4.19/rocker-fix-incorrect-error-handling-in-dma_rings_init.patch new file mode 100644 index 00000000000..08a4d81d514 --- /dev/null +++ b/queue-4.19/rocker-fix-incorrect-error-handling-in-dma_rings_init.patch @@ -0,0 +1,36 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Aditya Pakki +Date: Fri, 12 Jun 2020 15:27:55 -0500 +Subject: rocker: fix incorrect error handling in dma_rings_init + +From: Aditya Pakki + +[ Upstream commit 58d0c864e1a759a15c9df78f50ea5a5c32b3989e ] + +In rocker_dma_rings_init, the goto blocks in case of errors +caused by the functions rocker_dma_cmd_ring_waits_alloc() and +rocker_dma_ring_create() are incorrect. The patch fixes the +order consistent with cleanup in rocker_dma_rings_fini(). + +Signed-off-by: Aditya Pakki +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/rocker/rocker_main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/rocker/rocker_main.c ++++ b/drivers/net/ethernet/rocker/rocker_main.c +@@ -651,10 +651,10 @@ static int rocker_dma_rings_init(struct + err_dma_event_ring_bufs_alloc: + rocker_dma_ring_destroy(rocker, &rocker->event_ring); + err_dma_event_ring_create: ++ rocker_dma_cmd_ring_waits_free(rocker); ++err_dma_cmd_ring_waits_alloc: + rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring, + PCI_DMA_BIDIRECTIONAL); +-err_dma_cmd_ring_waits_alloc: +- rocker_dma_cmd_ring_waits_free(rocker); + err_dma_cmd_ring_bufs_alloc: + rocker_dma_ring_destroy(rocker, &rocker->cmd_ring); + return err; diff --git a/queue-4.19/rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch b/queue-4.19/rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch new file mode 100644 index 00000000000..3362ff2c9fa --- /dev/null +++ b/queue-4.19/rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch @@ -0,0 +1,146 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: David Howells +Date: Fri, 19 Jun 2020 23:38:16 +0100 +Subject: rxrpc: Fix notification call on completion of discarded calls + +From: David Howells + +[ Upstream commit 0041cd5a50442db6e456b145892a0eaf2dff061f ] + +When preallocated service calls are being discarded, they're passed to +->discard_new_call() to have the caller clean up any attached higher-layer +preallocated pieces before being marked completed. However, the act of +marking them completed now invokes the call's notification function - which +causes a problem because that function might assume that the previously +freed pieces of memory are still there. + +Fix this by setting a dummy notification function on the socket after +calling ->discard_new_call(). + +This results in the following kasan message when the kafs module is +removed. + +================================================================== +BUG: KASAN: use-after-free in afs_wake_up_async_call+0x6aa/0x770 fs/afs/rxrpc.c:707 +Write of size 1 at addr ffff8880946c39e4 by task kworker/u4:1/21 + +CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.8.0-rc1-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Workqueue: netns cleanup_net +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x18f/0x20d lib/dump_stack.c:118 + print_address_description.constprop.0.cold+0xd3/0x413 mm/kasan/report.c:383 + __kasan_report mm/kasan/report.c:513 [inline] + kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530 + afs_wake_up_async_call+0x6aa/0x770 fs/afs/rxrpc.c:707 + rxrpc_notify_socket+0x1db/0x5d0 net/rxrpc/recvmsg.c:40 + __rxrpc_set_call_completion.part.0+0x172/0x410 net/rxrpc/recvmsg.c:76 + __rxrpc_call_completed net/rxrpc/recvmsg.c:112 [inline] + rxrpc_call_completed+0xca/0xf0 net/rxrpc/recvmsg.c:111 + rxrpc_discard_prealloc+0x781/0xab0 net/rxrpc/call_accept.c:233 + rxrpc_listen+0x147/0x360 net/rxrpc/af_rxrpc.c:245 + afs_close_socket+0x95/0x320 fs/afs/rxrpc.c:110 + afs_net_exit+0x1bc/0x310 fs/afs/main.c:155 + ops_exit_list.isra.0+0xa8/0x150 net/core/net_namespace.c:186 + cleanup_net+0x511/0xa50 net/core/net_namespace.c:603 + process_one_work+0x965/0x1690 kernel/workqueue.c:2269 + worker_thread+0x96/0xe10 kernel/workqueue.c:2415 + kthread+0x3b5/0x4a0 kernel/kthread.c:291 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 + +Allocated by task 6820: + save_stack+0x1b/0x40 mm/kasan/common.c:48 + set_track mm/kasan/common.c:56 [inline] + __kasan_kmalloc mm/kasan/common.c:494 [inline] + __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:467 + kmem_cache_alloc_trace+0x153/0x7d0 mm/slab.c:3551 + kmalloc include/linux/slab.h:555 [inline] + kzalloc include/linux/slab.h:669 [inline] + afs_alloc_call+0x55/0x630 fs/afs/rxrpc.c:141 + afs_charge_preallocation+0xe9/0x2d0 fs/afs/rxrpc.c:757 + afs_open_socket+0x292/0x360 fs/afs/rxrpc.c:92 + afs_net_init+0xa6c/0xe30 fs/afs/main.c:125 + ops_init+0xaf/0x420 net/core/net_namespace.c:151 + setup_net+0x2de/0x860 net/core/net_namespace.c:341 + copy_net_ns+0x293/0x590 net/core/net_namespace.c:482 + create_new_namespaces+0x3fb/0xb30 kernel/nsproxy.c:110 + unshare_nsproxy_namespaces+0xbd/0x1f0 kernel/nsproxy.c:231 + ksys_unshare+0x43d/0x8e0 kernel/fork.c:2983 + __do_sys_unshare kernel/fork.c:3051 [inline] + __se_sys_unshare kernel/fork.c:3049 [inline] + __x64_sys_unshare+0x2d/0x40 kernel/fork.c:3049 + do_syscall_64+0x60/0xe0 arch/x86/entry/common.c:359 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Freed by task 21: + save_stack+0x1b/0x40 mm/kasan/common.c:48 + set_track mm/kasan/common.c:56 [inline] + kasan_set_free_info mm/kasan/common.c:316 [inline] + __kasan_slab_free+0xf7/0x140 mm/kasan/common.c:455 + __cache_free mm/slab.c:3426 [inline] + kfree+0x109/0x2b0 mm/slab.c:3757 + afs_put_call+0x585/0xa40 fs/afs/rxrpc.c:190 + rxrpc_discard_prealloc+0x764/0xab0 net/rxrpc/call_accept.c:230 + rxrpc_listen+0x147/0x360 net/rxrpc/af_rxrpc.c:245 + afs_close_socket+0x95/0x320 fs/afs/rxrpc.c:110 + afs_net_exit+0x1bc/0x310 fs/afs/main.c:155 + ops_exit_list.isra.0+0xa8/0x150 net/core/net_namespace.c:186 + cleanup_net+0x511/0xa50 net/core/net_namespace.c:603 + process_one_work+0x965/0x1690 kernel/workqueue.c:2269 + worker_thread+0x96/0xe10 kernel/workqueue.c:2415 + kthread+0x3b5/0x4a0 kernel/kthread.c:291 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293 + +The buggy address belongs to the object at ffff8880946c3800 + which belongs to the cache kmalloc-1k of size 1024 +The buggy address is located 484 bytes inside of + 1024-byte region [ffff8880946c3800, ffff8880946c3c00) +The buggy address belongs to the page: +page:ffffea000251b0c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 +flags: 0xfffe0000000200(slab) +raw: 00fffe0000000200 ffffea0002546508 ffffea00024fa248 ffff8880aa000c40 +raw: 0000000000000000 ffff8880946c3000 0000000100000002 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff8880946c3880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8880946c3900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +>ffff8880946c3980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff8880946c3a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8880946c3a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +================================================================== + +Reported-by: syzbot+d3eccef36ddbd02713e9@syzkaller.appspotmail.com +Fixes: 5ac0d62226a0 ("rxrpc: Fix missing notification") +Signed-off-by: David Howells +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rxrpc/call_accept.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +--- a/net/rxrpc/call_accept.c ++++ b/net/rxrpc/call_accept.c +@@ -26,6 +26,11 @@ + #include + #include "ar-internal.h" + ++static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call, ++ unsigned long user_call_ID) ++{ ++} ++ + /* + * Preallocate a single service call, connection and peer and, if possible, + * give them a user ID and attach the user's side of the ID to them. +@@ -232,6 +237,8 @@ void rxrpc_discard_prealloc(struct rxrpc + if (rx->discard_new_call) { + _debug("discard %lx", call->user_call_ID); + rx->discard_new_call(call, call->user_call_ID); ++ if (call->notify_rx) ++ call->notify_rx = rxrpc_dummy_notify; + rxrpc_put_call(call, rxrpc_call_put_kernel); + } + rxrpc_call_completed(call); diff --git a/queue-4.19/sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch b/queue-4.19/sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch new file mode 100644 index 00000000000..38a3678d55b --- /dev/null +++ b/queue-4.19/sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch @@ -0,0 +1,61 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: "Toke Høiland-Jørgensen" +Date: Thu, 25 Jun 2020 22:12:08 +0200 +Subject: sch_cake: don't call diffserv parsing code when it is not needed + +From: "Toke Høiland-Jørgensen" + +[ Upstream commit 8c95eca0bb8c4bd2231a0d581f1ad0d50c90488c ] + +As a further optimisation of the diffserv parsing codepath, we can skip it +entirely if CAKE is configured to neither use diffserv-based +classification, nor to zero out the diffserv bits. + +Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits") +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_cake.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -1508,7 +1508,7 @@ static unsigned int cake_drop(struct Qdi + return idx + (tin << 16); + } + +-static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) ++static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash) + { + const int offset = skb_network_offset(skb); + u16 *buf, buf_; +@@ -1569,13 +1569,16 @@ static struct cake_tin_data *cake_select + { + struct cake_sched_data *q = qdisc_priv(sch); + u32 tin; ++ bool wash; + u8 dscp; + + /* Tin selection: Default to diffserv-based selection, allow overriding +- * using firewall marks or skb->priority. ++ * using firewall marks or skb->priority. Call DSCP parsing early if ++ * wash is enabled, otherwise defer to below to skip unneeded parsing. + */ +- dscp = cake_handle_diffserv(skb, +- q->rate_flags & CAKE_FLAG_WASH); ++ wash = !!(q->rate_flags & CAKE_FLAG_WASH); ++ if (wash) ++ dscp = cake_handle_diffserv(skb, wash); + + if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) + tin = 0; +@@ -1586,6 +1589,8 @@ static struct cake_tin_data *cake_select + tin = q->tin_order[TC_H_MIN(skb->priority) - 1]; + + else { ++ if (!wash) ++ dscp = cake_handle_diffserv(skb, wash); + tin = q->tin_index[dscp]; + + if (unlikely(tin >= q->tin_cnt)) diff --git a/queue-4.19/sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch b/queue-4.19/sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch new file mode 100644 index 00000000000..e174d7619d5 --- /dev/null +++ b/queue-4.19/sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch @@ -0,0 +1,96 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Ilya Ponetayev +Date: Thu, 25 Jun 2020 22:12:07 +0200 +Subject: sch_cake: don't try to reallocate or unshare skb unconditionally + +From: Ilya Ponetayev + +[ Upstream commit 9208d2863ac689a563b92f2161d8d1e7127d0add ] + +cake_handle_diffserv() tries to linearize mac and network header parts of +skb and to make it writable unconditionally. In some cases it leads to full +skb reallocation, which reduces throughput and increases CPU load. Some +measurements of IPv4 forward + NAPT on MIPS router with 580 MHz single-core +CPU was conducted. It appears that on kernel 4.9 skb_try_make_writable() +reallocates skb, if skb was allocated in ethernet driver via so-called +'build skb' method from page cache (it was discovered by strange increase +of kmalloc-2048 slab at first). + +Obtain DSCP value via read-only skb_header_pointer() call, and leave +linearization only for DSCP bleaching or ECN CE setting. And, as an +additional optimisation, skip diffserv parsing entirely if it is not needed +by the current configuration. + +Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits") +Signed-off-by: Ilya Ponetayev +[ fix a few style issues, reflow commit message ] +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_cake.c | 41 ++++++++++++++++++++++++++++++----------- + 1 file changed, 30 insertions(+), 11 deletions(-) + +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -1510,30 +1510,49 @@ static unsigned int cake_drop(struct Qdi + + static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash) + { +- int wlen = skb_network_offset(skb); ++ const int offset = skb_network_offset(skb); ++ u16 *buf, buf_; + u8 dscp; + + switch (tc_skb_protocol(skb)) { + case htons(ETH_P_IP): +- wlen += sizeof(struct iphdr); +- if (!pskb_may_pull(skb, wlen) || +- skb_try_make_writable(skb, wlen)) ++ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); ++ if (unlikely(!buf)) + return 0; + +- dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; +- if (wash && dscp) ++ /* ToS is in the second byte of iphdr */ ++ dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2; ++ ++ if (wash && dscp) { ++ const int wlen = offset + sizeof(struct iphdr); ++ ++ if (!pskb_may_pull(skb, wlen) || ++ skb_try_make_writable(skb, wlen)) ++ return 0; ++ + ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0); ++ } ++ + return dscp; + + case htons(ETH_P_IPV6): +- wlen += sizeof(struct ipv6hdr); +- if (!pskb_may_pull(skb, wlen) || +- skb_try_make_writable(skb, wlen)) ++ buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_); ++ if (unlikely(!buf)) + return 0; + +- dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; +- if (wash && dscp) ++ /* Traffic class is in the first and second bytes of ipv6hdr */ ++ dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2; ++ ++ if (wash && dscp) { ++ const int wlen = offset + sizeof(struct ipv6hdr); ++ ++ if (!pskb_may_pull(skb, wlen) || ++ skb_try_make_writable(skb, wlen)) ++ return 0; ++ + ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0); ++ } ++ + return dscp; + + case htons(ETH_P_ARP): diff --git a/queue-4.19/sch_cake-fix-a-few-style-nits.patch b/queue-4.19/sch_cake-fix-a-few-style-nits.patch new file mode 100644 index 00000000000..afc0f87e321 --- /dev/null +++ b/queue-4.19/sch_cake-fix-a-few-style-nits.patch @@ -0,0 +1,41 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: "Toke Høiland-Jørgensen" +Date: Thu, 25 Jun 2020 22:12:09 +0200 +Subject: sch_cake: fix a few style nits + +From: "Toke Høiland-Jørgensen" + +[ Upstream commit 3f608f0c41360b11b04c763f348b712f651c8bac ] + +I spotted a few nits when comparing the in-tree version of sch_cake with +the out-of-tree one: A redundant error variable declaration shadowing an +outer declaration, and an indentation alignment issue. Fix both of these. + +Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc") +Signed-off-by: Toke Høiland-Jørgensen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_cake.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/sched/sch_cake.c ++++ b/net/sched/sch_cake.c +@@ -2649,7 +2649,7 @@ static int cake_init(struct Qdisc *sch, + qdisc_watchdog_init(&q->watchdog, sch); + + if (opt) { +- int err = cake_change(sch, opt, extack); ++ err = cake_change(sch, opt, extack); + + if (err) + return err; +@@ -2963,7 +2963,7 @@ static int cake_dump_class_stats(struct + PUT_STAT_S32(BLUE_TIMER_US, + ktime_to_us( + ktime_sub(now, +- flow->cvars.blue_timer))); ++ flow->cvars.blue_timer))); + } + if (flow->cvars.dropping) { + PUT_STAT_S32(DROP_NEXT_US, diff --git a/queue-4.19/sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch b/queue-4.19/sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch new file mode 100644 index 00000000000..3524940ddff --- /dev/null +++ b/queue-4.19/sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch @@ -0,0 +1,88 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Marcelo Ricardo Leitner +Date: Wed, 24 Jun 2020 17:34:18 -0300 +Subject: sctp: Don't advertise IPv4 addresses if ipv6only is set on the socket + +From: Marcelo Ricardo Leitner + +[ Upstream commit 471e39df96b9a4c4ba88a2da9e25a126624d7a9c ] + +If a socket is set ipv6only, it will still send IPv4 addresses in the +INIT and INIT_ACK packets. This potentially misleads the peer into using +them, which then would cause association termination. + +The fix is to not add IPv4 addresses to ipv6only sockets. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: Corey Minyard +Signed-off-by: Marcelo Ricardo Leitner +Tested-by: Corey Minyard +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sctp/constants.h | 8 +++++--- + net/sctp/associola.c | 5 ++++- + net/sctp/bind_addr.c | 1 + + net/sctp/protocol.c | 3 ++- + 4 files changed, 12 insertions(+), 5 deletions(-) + +--- a/include/net/sctp/constants.h ++++ b/include/net/sctp/constants.h +@@ -361,11 +361,13 @@ enum { + ipv4_is_anycast_6to4(a)) + + /* Flags used for the bind address copy functions. */ +-#define SCTP_ADDR6_ALLOWED 0x00000001 /* IPv6 address is allowed by ++#define SCTP_ADDR4_ALLOWED 0x00000001 /* IPv4 address is allowed by + local sock family */ +-#define SCTP_ADDR4_PEERSUPP 0x00000002 /* IPv4 address is supported by ++#define SCTP_ADDR6_ALLOWED 0x00000002 /* IPv6 address is allowed by ++ local sock family */ ++#define SCTP_ADDR4_PEERSUPP 0x00000004 /* IPv4 address is supported by + peer */ +-#define SCTP_ADDR6_PEERSUPP 0x00000004 /* IPv6 address is supported by ++#define SCTP_ADDR6_PEERSUPP 0x00000008 /* IPv6 address is supported by + peer */ + + /* Reasons to retransmit. */ +--- a/net/sctp/associola.c ++++ b/net/sctp/associola.c +@@ -1593,12 +1593,15 @@ void sctp_assoc_rwnd_decrease(struct sct + int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, + enum sctp_scope scope, gfp_t gfp) + { ++ struct sock *sk = asoc->base.sk; + int flags; + + /* Use scoping rules to determine the subset of addresses from + * the endpoint. + */ +- flags = (PF_INET6 == asoc->base.sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; ++ flags = (PF_INET6 == sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0; ++ if (!inet_v6_ipv6only(sk)) ++ flags |= SCTP_ADDR4_ALLOWED; + if (asoc->peer.ipv4_address) + flags |= SCTP_ADDR4_PEERSUPP; + if (asoc->peer.ipv6_address) +--- a/net/sctp/bind_addr.c ++++ b/net/sctp/bind_addr.c +@@ -453,6 +453,7 @@ static int sctp_copy_one_addr(struct net + * well as the remote peer. + */ + if ((((AF_INET == addr->sa.sa_family) && ++ (flags & SCTP_ADDR4_ALLOWED) && + (flags & SCTP_ADDR4_PEERSUPP))) || + (((AF_INET6 == addr->sa.sa_family) && + (flags & SCTP_ADDR6_ALLOWED) && +--- a/net/sctp/protocol.c ++++ b/net/sctp/protocol.c +@@ -163,7 +163,8 @@ int sctp_copy_local_addr_list(struct net + * sock as well as the remote peer. + */ + if (addr->a.sa.sa_family == AF_INET && +- !(copy_flags & SCTP_ADDR4_PEERSUPP)) ++ (!(copy_flags & SCTP_ADDR4_ALLOWED) || ++ !(copy_flags & SCTP_ADDR4_PEERSUPP))) + continue; + if (addr->a.sa.sa_family == AF_INET6 && + (!(copy_flags & SCTP_ADDR6_ALLOWED) || diff --git a/queue-4.19/series b/queue-4.19/series index 49090678d4c..3fffbe63515 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -14,3 +14,27 @@ alsa-hda-realtek-enable-the-headset-of-asus-b9450fa-.patch alsa-hda-realtek-enable-mute-led-on-an-hp-system.patch alsa-hda-realtek-enable-micmute-led-on-and-hp-system.patch apparmor-don-t-try-to-replace-stale-label-in-ptracem.patch +ibmveth-fix-max-mtu-limit.patch +mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch +net-bridge-enfore-alignment-for-ethernet-address.patch +net-fix-memleak-in-register_netdevice.patch +net-place-xmit-recursion-in-softnet-data.patch +net-use-correct-this_cpu-primitive-in-dev_recursion_level.patch +net-increment-xmit_recursion-level-in-dev_direct_xmit.patch +net-usb-ax88179_178a-fix-packet-alignment-padding.patch +rocker-fix-incorrect-error-handling-in-dma_rings_init.patch +rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch +sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch +tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch +tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch +tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch +ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch +net-phy-check-harder-for-errors-in-get_phy_id.patch +ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch +sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch +sch_cake-fix-a-few-style-nits.patch +tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch +sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch +net-fix-the-arp-error-in-some-cases.patch +net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch +net-core-reduce-recursion-limit-value.patch diff --git a/queue-4.19/tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch b/queue-4.19/tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch new file mode 100644 index 00000000000..cf039b23d69 --- /dev/null +++ b/queue-4.19/tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch @@ -0,0 +1,97 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Denis Kirjanov +Date: Thu, 25 Jun 2020 14:51:06 +0300 +Subject: tcp: don't ignore ECN CWR on pure ACK + +From: Denis Kirjanov + +[ Upstream commit 2570284060b48f3f79d8f1a2698792f36c385e9a ] + +there is a problem with the CWR flag set in an incoming ACK segment +and it leads to the situation when the ECE flag is latched forever + +the following packetdrill script shows what happens: + +// Stack receives incoming segments with CE set ++0.1 <[ect0] . 11001:12001(1000) ack 1001 win 65535 ++0.0 <[ce] . 12001:13001(1000) ack 1001 win 65535 ++0.0 <[ect0] P. 13001:14001(1000) ack 1001 win 65535 + +// Stack repsonds with ECN ECHO ++0.0 >[noecn] . 1001:1001(0) ack 12001 ++0.0 >[noecn] E. 1001:1001(0) ack 13001 ++0.0 >[noecn] E. 1001:1001(0) ack 14001 + +// Write a packet ++0.1 write(3, ..., 1000) = 1000 ++0.0 >[ect0] PE. 1001:2001(1000) ack 14001 + +// Pure ACK received ++0.01 <[noecn] W. 14001:14001(0) ack 2001 win 65535 + +// Since CWR was sent, this packet should NOT have ECE set + ++0.1 write(3, ..., 1000) = 1000 ++0.0 >[ect0] P. 2001:3001(1000) ack 14001 +// but Linux will still keep ECE latched here, with packetdrill +// flagging a missing ECE flag, expecting +// >[ect0] PE. 2001:3001(1000) ack 14001 +// in the script + +In the situation above we will continue to send ECN ECHO packets +and trigger the peer to reduce the congestion window. To avoid that +we can check CWR on pure ACKs received. + +v3: +- Add a sequence check to avoid sending an ACK to an ACK + +v2: +- Adjusted the comment +- move CWR check before checking for unacknowledged packets + +Signed-off-by: Denis Kirjanov +Acked-by: Neal Cardwell +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -254,7 +254,8 @@ static void tcp_ecn_accept_cwr(struct so + * cwnd may be very low (even just 1 packet), so we should ACK + * immediately. + */ +- inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; ++ if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) ++ inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; + } + } + +@@ -3665,6 +3666,15 @@ static int tcp_ack(struct sock *sk, cons + tcp_in_ack_event(sk, ack_ev_flags); + } + ++ /* This is a deviation from RFC3168 since it states that: ++ * "When the TCP data sender is ready to set the CWR bit after reducing ++ * the congestion window, it SHOULD set the CWR bit only on the first ++ * new data packet that it transmits." ++ * We accept CWR on pure ACKs to be more robust ++ * with widely-deployed TCP implementations that do this. ++ */ ++ tcp_ecn_accept_cwr(sk, skb); ++ + /* We passed data and got it acked, remove any soft error + * log. Something worked... + */ +@@ -4703,8 +4713,6 @@ static void tcp_data_queue(struct sock * + skb_dst_drop(skb); + __skb_pull(skb, tcp_hdr(skb)->doff * 4); + +- tcp_ecn_accept_cwr(sk, skb); +- + tp->rx_opt.dsack = 0; + + /* Queue data for delivery to the user. diff --git a/queue-4.19/tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch b/queue-4.19/tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch new file mode 100644 index 00000000000..ef9e16bbbea --- /dev/null +++ b/queue-4.19/tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch @@ -0,0 +1,94 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Eric Dumazet +Date: Mon, 15 Jun 2020 20:37:07 -0700 +Subject: tcp: grow window for OOO packets only for SACK flows + +From: Eric Dumazet + +[ Upstream commit 662051215c758ae8545451628816204ed6cd372d ] + +Back in 2013, we made a change that broke fast retransmit +for non SACK flows. + +Indeed, for these flows, a sender needs to receive three duplicate +ACK before starting fast retransmit. Sending ACK with different +receive window do not count. + +Even if enabling SACK is strongly recommended these days, +there still are some cases where it has to be disabled. + +Not increasing the window seems better than having to +rely on RTO. + +After the fix, following packetdrill test gives : + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 + +0 > S. 0:0(0) ack 1 + +0 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 < . 1:1001(1000) ack 1 win 514 +// Quick ack + +0 > . 1:1(0) ack 1001 win 264 + + +0 < . 2001:3001(1000) ack 1 win 514 +// DUPACK : Normally we should not change the window + +0 > . 1:1(0) ack 1001 win 264 + + +0 < . 3001:4001(1000) ack 1 win 514 +// DUPACK : Normally we should not change the window + +0 > . 1:1(0) ack 1001 win 264 + + +0 < . 4001:5001(1000) ack 1 win 514 +// DUPACK : Normally we should not change the window + +0 > . 1:1(0) ack 1001 win 264 + + +0 < . 1001:2001(1000) ack 1 win 514 +// Hole is repaired. + +0 > . 1:1(0) ack 5001 win 272 + +Fixes: 4e4f1fc22681 ("tcp: properly increase rcv_ssthresh for ofo packets") +Signed-off-by: Eric Dumazet +Reported-by: Venkat Venkatsubra +Acked-by: Neal Cardwell +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 12 ++++++++++-- + 1 file changed, 10 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -4528,7 +4528,11 @@ static void tcp_data_queue_ofo(struct so + if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb, + skb, &fragstolen)) { + coalesce_done: +- tcp_grow_window(sk, skb); ++ /* For non sack flows, do not grow window to force DUPACK ++ * and trigger fast retransmit. ++ */ ++ if (tcp_is_sack(tp)) ++ tcp_grow_window(sk, skb); + kfree_skb_partial(skb, fragstolen); + skb = NULL; + goto add_sack; +@@ -4612,7 +4616,11 @@ add_sack: + tcp_sack_new_ofo_skb(sk, seq, end_seq); + end: + if (skb) { +- tcp_grow_window(sk, skb); ++ /* For non sack flows, do not grow window to force DUPACK ++ * and trigger fast retransmit. ++ */ ++ if (tcp_is_sack(tp)) ++ tcp_grow_window(sk, skb); + skb_condense(skb); + skb_set_owner_r(skb, sk); + } diff --git a/queue-4.19/tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch b/queue-4.19/tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch new file mode 100644 index 00000000000..de01aac8b8e --- /dev/null +++ b/queue-4.19/tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch @@ -0,0 +1,50 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: Neal Cardwell +Date: Wed, 24 Jun 2020 12:42:02 -0400 +Subject: tcp_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT + +From: Neal Cardwell + +[ Upstream commit b344579ca8478598937215f7005d6c7b84d28aee ] + +Mirja Kuehlewind reported a bug in Linux TCP CUBIC Hystart, where +Hystart HYSTART_DELAY mechanism can exit Slow Start spuriously on an +ACK when the minimum rtt of a connection goes down. From inspection it +is clear from the existing code that this could happen in an example +like the following: + +o The first 8 RTT samples in a round trip are 150ms, resulting in a + curr_rtt of 150ms and a delay_min of 150ms. + +o The 9th RTT sample is 100ms. The curr_rtt does not change after the + first 8 samples, so curr_rtt remains 150ms. But delay_min can be + lowered at any time, so delay_min falls to 100ms. The code executes + the HYSTART_DELAY comparison between curr_rtt of 150ms and delay_min + of 100ms, and the curr_rtt is declared far enough above delay_min to + force a (spurious) exit of Slow start. + +The fix here is simple: allow every RTT sample in a round trip to +lower the curr_rtt. + +Fixes: ae27e98a5152 ("[TCP] CUBIC v2.3") +Reported-by: Mirja Kuehlewind +Signed-off-by: Neal Cardwell +Signed-off-by: Eric Dumazet +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_cubic.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/ipv4/tcp_cubic.c ++++ b/net/ipv4/tcp_cubic.c +@@ -403,6 +403,8 @@ static void hystart_update(struct sock * + + if (hystart_detect & HYSTART_DELAY) { + /* obtain the minimum delay of more than sampling packets */ ++ if (ca->curr_rtt > delay) ++ ca->curr_rtt = delay; + if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { + if (ca->curr_rtt == 0 || ca->curr_rtt > delay) + ca->curr_rtt = delay; diff --git a/queue-4.19/tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch b/queue-4.19/tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch new file mode 100644 index 00000000000..497b26c01fe --- /dev/null +++ b/queue-4.19/tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch @@ -0,0 +1,37 @@ +From foo@baz Sun 28 Jun 2020 02:29:11 PM CEST +From: David Christensen +Date: Wed, 17 Jun 2020 11:51:17 -0700 +Subject: tg3: driver sleeps indefinitely when EEH errors exceed eeh_max_freezes + +From: David Christensen + +[ Upstream commit 3a2656a211caf35e56afc9425e6e518fa52f7fbc ] + +The driver function tg3_io_error_detected() calls napi_disable twice, +without an intervening napi_enable, when the number of EEH errors exceeds +eeh_max_freezes, resulting in an indefinite sleep while holding rtnl_lock. + +Add check for pcierr_recovery which skips code already executed for the +"Frozen" state. + +Signed-off-by: David Christensen +Reviewed-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/tg3.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -18229,8 +18229,8 @@ static pci_ers_result_t tg3_io_error_det + + rtnl_lock(); + +- /* We probably don't have netdev yet */ +- if (!netdev || !netif_running(netdev)) ++ /* Could be second call or maybe we don't have netdev yet */ ++ if (!netdev || tp->pcierr_recovery || !netif_running(netdev)) + goto done; + + /* We needn't recover from permanent error */