]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 28 Jun 2020 12:23:05 +0000 (14:23 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 28 Jun 2020 12:23:05 +0000 (14:23 +0200)
added patches:
enetc-fix-tx-rings-bitmap-iteration-range-irq-handling.patch
geneve-allow-changing-df-behavior-after-creation.patch
ibmveth-fix-max-mtu-limit.patch
ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch
ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch
mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch
mvpp2-ethtool-rxtx-stats-fix.patch
net-bridge-enfore-alignment-for-ethernet-address.patch
net-core-reduce-recursion-limit-value.patch
net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch
net-fix-memleak-in-register_netdevice.patch
net-fix-the-arp-error-in-some-cases.patch
net-increment-xmit_recursion-level-in-dev_direct_xmit.patch
net-phy-check-harder-for-errors-in-get_phy_id.patch
net-usb-ax88179_178a-fix-packet-alignment-padding.patch
openvswitch-take-into-account-de-fragmentation-gso_size-in-execute_check_pkt_len.patch
rocker-fix-incorrect-error-handling-in-dma_rings_init.patch
rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch
sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch
sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch
sch_cake-fix-a-few-style-nits.patch
sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch
tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch
tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch
tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch
tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch

27 files changed:
queue-5.4/enetc-fix-tx-rings-bitmap-iteration-range-irq-handling.patch [new file with mode: 0644]
queue-5.4/geneve-allow-changing-df-behavior-after-creation.patch [new file with mode: 0644]
queue-5.4/ibmveth-fix-max-mtu-limit.patch [new file with mode: 0644]
queue-5.4/ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch [new file with mode: 0644]
queue-5.4/ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch [new file with mode: 0644]
queue-5.4/mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch [new file with mode: 0644]
queue-5.4/mvpp2-ethtool-rxtx-stats-fix.patch [new file with mode: 0644]
queue-5.4/net-bridge-enfore-alignment-for-ethernet-address.patch [new file with mode: 0644]
queue-5.4/net-core-reduce-recursion-limit-value.patch [new file with mode: 0644]
queue-5.4/net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch [new file with mode: 0644]
queue-5.4/net-fix-memleak-in-register_netdevice.patch [new file with mode: 0644]
queue-5.4/net-fix-the-arp-error-in-some-cases.patch [new file with mode: 0644]
queue-5.4/net-increment-xmit_recursion-level-in-dev_direct_xmit.patch [new file with mode: 0644]
queue-5.4/net-phy-check-harder-for-errors-in-get_phy_id.patch [new file with mode: 0644]
queue-5.4/net-usb-ax88179_178a-fix-packet-alignment-padding.patch [new file with mode: 0644]
queue-5.4/openvswitch-take-into-account-de-fragmentation-gso_size-in-execute_check_pkt_len.patch [new file with mode: 0644]
queue-5.4/rocker-fix-incorrect-error-handling-in-dma_rings_init.patch [new file with mode: 0644]
queue-5.4/rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch [new file with mode: 0644]
queue-5.4/sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch [new file with mode: 0644]
queue-5.4/sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch [new file with mode: 0644]
queue-5.4/sch_cake-fix-a-few-style-nits.patch [new file with mode: 0644]
queue-5.4/sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch [new file with mode: 0644]
queue-5.4/series
queue-5.4/tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch [new file with mode: 0644]
queue-5.4/tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch [new file with mode: 0644]
queue-5.4/tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch [new file with mode: 0644]
queue-5.4/tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch [new file with mode: 0644]

diff --git a/queue-5.4/enetc-fix-tx-rings-bitmap-iteration-range-irq-handling.patch b/queue-5.4/enetc-fix-tx-rings-bitmap-iteration-range-irq-handling.patch
new file mode 100644 (file)
index 0000000..70b4571
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Claudiu Manoil <claudiu.manoil@nxp.com>
+Date: Fri, 26 Jun 2020 19:17:29 +0300
+Subject: enetc: Fix tx rings bitmap iteration range, irq handling
+
+From: Claudiu Manoil <claudiu.manoil@nxp.com>
+
+[ Upstream commit 0574e2000fc3103cbc69ba82ec1175ce171fdf5e ]
+
+The rings bitmap of an interrupt vector encodes
+which of the device's rings were assigned to that
+interrupt vector.
+Hence the iteration range of the tx rings bitmap
+(for_each_set_bit()) should be the total number of
+Tx rings of that netdevice instead of the number of
+rings assigned to the interrupt vector.
+Since there are 2 cores, and one interrupt vector for
+each core, the number of rings asigned to an interrupt
+vector is half the number of available rings.
+The impact of this error is that the upper half of the
+tx rings could still generate interrupts during napi
+polling.
+
+Fixes: d4fd0404c1c9 ("enetc: Introduce basic PF and VF ENETC ethernet drivers")
+Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/freescale/enetc/enetc.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/freescale/enetc/enetc.c
++++ b/drivers/net/ethernet/freescale/enetc/enetc.c
+@@ -254,7 +254,7 @@ static irqreturn_t enetc_msix(int irq, v
+       /* disable interrupts */
+       enetc_wr_reg(v->rbier, 0);
+-      for_each_set_bit(i, &v->tx_rings_map, v->count_tx_rings)
++      for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS)
+               enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i), 0);
+       napi_schedule_irqoff(&v->napi);
+@@ -290,7 +290,7 @@ static int enetc_poll(struct napi_struct
+       /* enable interrupts */
+       enetc_wr_reg(v->rbier, ENETC_RBIER_RXTIE);
+-      for_each_set_bit(i, &v->tx_rings_map, v->count_tx_rings)
++      for_each_set_bit(i, &v->tx_rings_map, ENETC_MAX_NUM_TXQS)
+               enetc_wr_reg(v->tbier_base + ENETC_BDR_OFF(i),
+                            ENETC_TBIER_TXTIE);
diff --git a/queue-5.4/geneve-allow-changing-df-behavior-after-creation.patch b/queue-5.4/geneve-allow-changing-df-behavior-after-creation.patch
new file mode 100644 (file)
index 0000000..7f536c4
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Thu, 18 Jun 2020 12:13:22 +0200
+Subject: geneve: allow changing DF behavior after creation
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit 56c09de347e40804fc8dad155272fb9609e0a97b ]
+
+Currently, trying to change the DF parameter of a geneve device does
+nothing:
+
+    # ip -d link show geneve1
+    14: geneve1: <snip>
+        link/ether <snip>
+        geneve id 1 remote 10.0.0.1 ttl auto df set dstport 6081 <snip>
+    # ip link set geneve1 type geneve id 1 df unset
+    # ip -d link show geneve1
+    14: geneve1: <snip>
+        link/ether <snip>
+        geneve id 1 remote 10.0.0.1 ttl auto df set dstport 6081 <snip>
+
+We just need to update the value in geneve_changelink.
+
+Fixes: a025fb5f49ad ("geneve: Allow configuration of DF behaviour")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/geneve.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/net/geneve.c
++++ b/drivers/net/geneve.c
+@@ -1649,6 +1649,7 @@ static int geneve_changelink(struct net_
+       geneve->collect_md = metadata;
+       geneve->use_udp6_rx_checksums = use_udp6_rx_checksums;
+       geneve->ttl_inherit = ttl_inherit;
++      geneve->df = df;
+       geneve_unquiesce(geneve, gs4, gs6);
+       return 0;
diff --git a/queue-5.4/ibmveth-fix-max-mtu-limit.patch b/queue-5.4/ibmveth-fix-max-mtu-limit.patch
new file mode 100644 (file)
index 0000000..6076a2c
--- /dev/null
@@ -0,0 +1,35 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Thomas Falcon <tlfalcon@linux.ibm.com>
+Date: Thu, 18 Jun 2020 10:43:46 -0500
+Subject: ibmveth: Fix max MTU limit
+
+From: Thomas Falcon <tlfalcon@linux.ibm.com>
+
+[ Upstream commit 5948378b26d89f8aa5eac37629dbd0616ce8d7a7 ]
+
+The max MTU limit defined for ibmveth is not accounting for
+virtual ethernet buffer overhead, which is twenty-two additional
+bytes set aside for the ethernet header and eight additional bytes
+of an opaque handle reserved for use by the hypervisor. Update the
+max MTU to reflect this overhead.
+
+Fixes: d894be57ca92 ("ethernet: use net core MTU range checking in more drivers")
+Fixes: 110447f8269a ("ethernet: fix min/max MTU typos")
+Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/ibmveth.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/ibm/ibmveth.c
++++ b/drivers/net/ethernet/ibm/ibmveth.c
+@@ -1682,7 +1682,7 @@ static int ibmveth_probe(struct vio_dev
+       }
+       netdev->min_mtu = IBMVETH_MIN_MTU;
+-      netdev->max_mtu = ETH_MAX_MTU;
++      netdev->max_mtu = ETH_MAX_MTU - IBMVETH_BUFF_OH;
+       memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN);
diff --git a/queue-5.4/ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch b/queue-5.4/ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch
new file mode 100644 (file)
index 0000000..297118e
--- /dev/null
@@ -0,0 +1,114 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Tue, 16 Jun 2020 16:04:00 +0000
+Subject: ip6_gre: fix use-after-free in ip6gre_tunnel_lookup()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit dafabb6590cb15f300b77c095d50312e2c7c8e0f ]
+
+In the datapath, the ip6gre_tunnel_lookup() is used and it internally uses
+fallback tunnel device pointer, which is fb_tunnel_dev.
+This pointer variable should be set to NULL when a fb interface is deleted.
+But there is no routine to set fb_tunnel_dev pointer to NULL.
+So, this pointer will be still used after interface is deleted and
+it eventually results in the use-after-free problem.
+
+Test commands:
+    ip netns add A
+    ip netns add B
+    ip link add eth0 type veth peer name eth1
+    ip link set eth0 netns A
+    ip link set eth1 netns B
+
+    ip netns exec A ip link set lo up
+    ip netns exec A ip link set eth0 up
+    ip netns exec A ip link add ip6gre1 type ip6gre local fc:0::1 \
+           remote fc:0::2
+    ip netns exec A ip -6 a a fc:100::1/64 dev ip6gre1
+    ip netns exec A ip link set ip6gre1 up
+    ip netns exec A ip -6 a a fc:0::1/64 dev eth0
+    ip netns exec A ip link set ip6gre0 up
+
+    ip netns exec B ip link set lo up
+    ip netns exec B ip link set eth1 up
+    ip netns exec B ip link add ip6gre1 type ip6gre local fc:0::2 \
+           remote fc:0::1
+    ip netns exec B ip -6 a a fc:100::2/64 dev ip6gre1
+    ip netns exec B ip link set ip6gre1 up
+    ip netns exec B ip -6 a a fc:0::2/64 dev eth1
+    ip netns exec B ip link set ip6gre0 up
+    ip netns exec A ping fc:100::2 -s 60000 &
+    ip netns del B
+
+Splat looks like:
+[   73.087285][    C1] BUG: KASAN: use-after-free in ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre]
+[   73.088361][    C1] Read of size 4 at addr ffff888040559218 by task ping/1429
+[   73.089317][    C1]
+[   73.089638][    C1] CPU: 1 PID: 1429 Comm: ping Not tainted 5.7.0+ #602
+[   73.090531][    C1] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[   73.091725][    C1] Call Trace:
+[   73.092160][    C1]  <IRQ>
+[   73.092556][    C1]  dump_stack+0x96/0xdb
+[   73.093122][    C1]  print_address_description.constprop.6+0x2cc/0x450
+[   73.094016][    C1]  ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre]
+[   73.094894][    C1]  ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre]
+[   73.095767][    C1]  ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre]
+[   73.096619][    C1]  kasan_report+0x154/0x190
+[   73.097209][    C1]  ? ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre]
+[   73.097989][    C1]  ip6gre_tunnel_lookup+0x1064/0x13f0 [ip6_gre]
+[   73.098750][    C1]  ? gre_del_protocol+0x60/0x60 [gre]
+[   73.099500][    C1]  gre_rcv+0x1c5/0x1450 [ip6_gre]
+[   73.100199][    C1]  ? ip6gre_header+0xf00/0xf00 [ip6_gre]
+[   73.100985][    C1]  ? rcu_read_lock_sched_held+0xc0/0xc0
+[   73.101830][    C1]  ? ip6_input_finish+0x5/0xf0
+[   73.102483][    C1]  ip6_protocol_deliver_rcu+0xcbb/0x1510
+[   73.103296][    C1]  ip6_input_finish+0x5b/0xf0
+[   73.103920][    C1]  ip6_input+0xcd/0x2c0
+[   73.104473][    C1]  ? ip6_input_finish+0xf0/0xf0
+[   73.105115][    C1]  ? rcu_read_lock_held+0x90/0xa0
+[   73.105783][    C1]  ? rcu_read_lock_sched_held+0xc0/0xc0
+[   73.106548][    C1]  ipv6_rcv+0x1f1/0x300
+[ ... ]
+
+Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
+Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -127,6 +127,7 @@ static struct ip6_tnl *ip6gre_tunnel_loo
+                       gre_proto == htons(ETH_P_ERSPAN2)) ?
+                      ARPHRD_ETHER : ARPHRD_IP6GRE;
+       int score, cand_score = 4;
++      struct net_device *ndev;
+       for_each_ip_tunnel_rcu(t, ign->tunnels_r_l[h0 ^ h1]) {
+               if (!ipv6_addr_equal(local, &t->parms.laddr) ||
+@@ -238,9 +239,9 @@ static struct ip6_tnl *ip6gre_tunnel_loo
+       if (t && t->dev->flags & IFF_UP)
+               return t;
+-      dev = ign->fb_tunnel_dev;
+-      if (dev && dev->flags & IFF_UP)
+-              return netdev_priv(dev);
++      ndev = READ_ONCE(ign->fb_tunnel_dev);
++      if (ndev && ndev->flags & IFF_UP)
++              return netdev_priv(ndev);
+       return NULL;
+ }
+@@ -413,6 +414,8 @@ static void ip6gre_tunnel_uninit(struct
+       ip6gre_tunnel_unlink_md(ign, t);
+       ip6gre_tunnel_unlink(ign, t);
++      if (ign->fb_tunnel_dev == dev)
++              WRITE_ONCE(ign->fb_tunnel_dev, NULL);
+       dst_cache_reset(&t->dst_cache);
+       dev_put(dev);
+ }
diff --git a/queue-5.4/ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch b/queue-5.4/ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch
new file mode 100644 (file)
index 0000000..1bb05e1
--- /dev/null
@@ -0,0 +1,117 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Tue, 16 Jun 2020 16:51:51 +0000
+Subject: ip_tunnel: fix use-after-free in ip_tunnel_lookup()
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit ba61539c6ae57f4146284a5cb4f7b7ed8d42bf45 ]
+
+In the datapath, the ip_tunnel_lookup() is used and it internally uses
+fallback tunnel device pointer, which is fb_tunnel_dev.
+This pointer variable should be set to NULL when a fb interface is deleted.
+But there is no routine to set fb_tunnel_dev pointer to NULL.
+So, this pointer will be still used after interface is deleted and
+it eventually results in the use-after-free problem.
+
+Test commands:
+    ip netns add A
+    ip netns add B
+    ip link add eth0 type veth peer name eth1
+    ip link set eth0 netns A
+    ip link set eth1 netns B
+
+    ip netns exec A ip link set lo up
+    ip netns exec A ip link set eth0 up
+    ip netns exec A ip link add gre1 type gre local 10.0.0.1 \
+           remote 10.0.0.2
+    ip netns exec A ip link set gre1 up
+    ip netns exec A ip a a 10.0.100.1/24 dev gre1
+    ip netns exec A ip a a 10.0.0.1/24 dev eth0
+
+    ip netns exec B ip link set lo up
+    ip netns exec B ip link set eth1 up
+    ip netns exec B ip link add gre1 type gre local 10.0.0.2 \
+           remote 10.0.0.1
+    ip netns exec B ip link set gre1 up
+    ip netns exec B ip a a 10.0.100.2/24 dev gre1
+    ip netns exec B ip a a 10.0.0.2/24 dev eth1
+    ip netns exec A hping3 10.0.100.2 -2 --flood -d 60000 &
+    ip netns del B
+
+Splat looks like:
+[   77.793450][    C3] ==================================================================
+[   77.794702][    C3] BUG: KASAN: use-after-free in ip_tunnel_lookup+0xcc4/0xf30
+[   77.795573][    C3] Read of size 4 at addr ffff888060bd9c84 by task hping3/2905
+[   77.796398][    C3]
+[   77.796664][    C3] CPU: 3 PID: 2905 Comm: hping3 Not tainted 5.8.0-rc1+ #616
+[   77.797474][    C3] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[   77.798453][    C3] Call Trace:
+[   77.798815][    C3]  <IRQ>
+[   77.799142][    C3]  dump_stack+0x9d/0xdb
+[   77.799605][    C3]  print_address_description.constprop.7+0x2cc/0x450
+[   77.800365][    C3]  ? ip_tunnel_lookup+0xcc4/0xf30
+[   77.800908][    C3]  ? ip_tunnel_lookup+0xcc4/0xf30
+[   77.801517][    C3]  ? ip_tunnel_lookup+0xcc4/0xf30
+[   77.802145][    C3]  kasan_report+0x154/0x190
+[   77.802821][    C3]  ? ip_tunnel_lookup+0xcc4/0xf30
+[   77.803503][    C3]  ip_tunnel_lookup+0xcc4/0xf30
+[   77.804165][    C3]  __ipgre_rcv+0x1ab/0xaa0 [ip_gre]
+[   77.804862][    C3]  ? rcu_read_lock_sched_held+0xc0/0xc0
+[   77.805621][    C3]  gre_rcv+0x304/0x1910 [ip_gre]
+[   77.806293][    C3]  ? lock_acquire+0x1a9/0x870
+[   77.806925][    C3]  ? gre_rcv+0xfe/0x354 [gre]
+[   77.807559][    C3]  ? erspan_xmit+0x2e60/0x2e60 [ip_gre]
+[   77.808305][    C3]  ? rcu_read_lock_sched_held+0xc0/0xc0
+[   77.809032][    C3]  ? rcu_read_lock_held+0x90/0xa0
+[   77.809713][    C3]  gre_rcv+0x1b8/0x354 [gre]
+[ ... ]
+
+Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
+Fixes: c54419321455 ("GRE: Refactor GRE tunneling code.")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_tunnel.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/net/ipv4/ip_tunnel.c
++++ b/net/ipv4/ip_tunnel.c
+@@ -85,9 +85,10 @@ struct ip_tunnel *ip_tunnel_lookup(struc
+                                  __be32 remote, __be32 local,
+                                  __be32 key)
+ {
+-      unsigned int hash;
+       struct ip_tunnel *t, *cand = NULL;
+       struct hlist_head *head;
++      struct net_device *ndev;
++      unsigned int hash;
+       hash = ip_tunnel_hash(key, remote);
+       head = &itn->tunnels[hash];
+@@ -162,8 +163,9 @@ struct ip_tunnel *ip_tunnel_lookup(struc
+       if (t && t->dev->flags & IFF_UP)
+               return t;
+-      if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
+-              return netdev_priv(itn->fb_tunnel_dev);
++      ndev = READ_ONCE(itn->fb_tunnel_dev);
++      if (ndev && ndev->flags & IFF_UP)
++              return netdev_priv(ndev);
+       return NULL;
+ }
+@@ -1245,9 +1247,9 @@ void ip_tunnel_uninit(struct net_device
+       struct ip_tunnel_net *itn;
+       itn = net_generic(net, tunnel->ip_tnl_net_id);
+-      /* fb_tunnel_dev will be unregisted in net-exit call. */
+-      if (itn->fb_tunnel_dev != dev)
+-              ip_tunnel_del(itn, netdev_priv(dev));
++      ip_tunnel_del(itn, netdev_priv(dev));
++      if (itn->fb_tunnel_dev == dev)
++              WRITE_ONCE(itn->fb_tunnel_dev, NULL);
+       dst_cache_reset(&tunnel->dst_cache);
+ }
diff --git a/queue-5.4/mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch b/queue-5.4/mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch
new file mode 100644 (file)
index 0000000..435bc4d
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Wang Hai <wanghai38@huawei.com>
+Date: Thu, 11 Jun 2020 15:57:50 +0800
+Subject: mld: fix memory leak in ipv6_mc_destroy_dev()
+
+From: Wang Hai <wanghai38@huawei.com>
+
+[ Upstream commit ea2fce88d2fd678ed9d45354ff49b73f1d5615dd ]
+
+Commit a84d01647989 ("mld: fix memory leak in mld_del_delrec()") fixed
+the memory leak of MLD, but missing the ipv6_mc_destroy_dev() path, in
+which mca_sources are leaked after ma_put().
+
+Using ip6_mc_clear_src() to take care of the missing free.
+
+BUG: memory leak
+unreferenced object 0xffff8881113d3180 (size 64):
+  comm "syz-executor071", pid 389, jiffies 4294887985 (age 17.943s)
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 ff 02 00 00 00 00 00 00  ................
+    00 00 00 00 00 00 00 01 00 00 00 00 00 00 00 00  ................
+  backtrace:
+    [<000000002cbc483c>] kmalloc include/linux/slab.h:555 [inline]
+    [<000000002cbc483c>] kzalloc include/linux/slab.h:669 [inline]
+    [<000000002cbc483c>] ip6_mc_add1_src net/ipv6/mcast.c:2237 [inline]
+    [<000000002cbc483c>] ip6_mc_add_src+0x7f5/0xbb0 net/ipv6/mcast.c:2357
+    [<0000000058b8b1ff>] ip6_mc_source+0xe0c/0x1530 net/ipv6/mcast.c:449
+    [<000000000bfc4fb5>] do_ipv6_setsockopt.isra.12+0x1b2c/0x3b30 net/ipv6/ipv6_sockglue.c:754
+    [<00000000e4e7a722>] ipv6_setsockopt+0xda/0x150 net/ipv6/ipv6_sockglue.c:950
+    [<0000000029260d9a>] rawv6_setsockopt+0x45/0x100 net/ipv6/raw.c:1081
+    [<000000005c1b46f9>] __sys_setsockopt+0x131/0x210 net/socket.c:2132
+    [<000000008491f7db>] __do_sys_setsockopt net/socket.c:2148 [inline]
+    [<000000008491f7db>] __se_sys_setsockopt net/socket.c:2145 [inline]
+    [<000000008491f7db>] __x64_sys_setsockopt+0xba/0x150 net/socket.c:2145
+    [<00000000c7bc11c5>] do_syscall_64+0xa1/0x530 arch/x86/entry/common.c:295
+    [<000000005fb7a3f3>] entry_SYSCALL_64_after_hwframe+0x49/0xb3
+
+Fixes: 1666d49e1d41 ("mld: do not remove mld souce list info when set link down")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Wang Hai <wanghai38@huawei.com>
+Acked-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/mcast.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/mcast.c
++++ b/net/ipv6/mcast.c
+@@ -2618,6 +2618,7 @@ void ipv6_mc_destroy_dev(struct inet6_de
+               idev->mc_list = i->next;
+               write_unlock_bh(&idev->lock);
++              ip6_mc_clear_src(i);
+               ma_put(i);
+               write_lock_bh(&idev->lock);
+       }
diff --git a/queue-5.4/mvpp2-ethtool-rxtx-stats-fix.patch b/queue-5.4/mvpp2-ethtool-rxtx-stats-fix.patch
new file mode 100644 (file)
index 0000000..bde89b4
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Sven Auhagen <sven.auhagen@voleatech.de>
+Date: Sun, 14 Jun 2020 09:19:17 +0200
+Subject: mvpp2: ethtool rxtx stats fix
+
+From: Sven Auhagen <sven.auhagen@voleatech.de>
+
+[ Upstream commit cc970925feb9a38c2f0d34305518e00a3084ce85 ]
+
+The ethtool rx and tx queue statistics are reporting wrong values.
+Fix reading out the correct ones.
+
+Signed-off-by: Sven Auhagen <sven.auhagen@voleatech.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
++++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+@@ -1541,7 +1541,7 @@ static void mvpp2_read_stats(struct mvpp
+       for (q = 0; q < port->ntxqs; q++)
+               for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_txq_regs); i++)
+                       *pstats++ += mvpp2_read_index(port->priv,
+-                                                    MVPP22_CTRS_TX_CTR(port->id, i),
++                                                    MVPP22_CTRS_TX_CTR(port->id, q),
+                                                     mvpp2_ethtool_txq_regs[i].offset);
+       /* Rxqs are numbered from 0 from the user standpoint, but not from the
+@@ -1550,7 +1550,7 @@ static void mvpp2_read_stats(struct mvpp
+       for (q = 0; q < port->nrxqs; q++)
+               for (i = 0; i < ARRAY_SIZE(mvpp2_ethtool_rxq_regs); i++)
+                       *pstats++ += mvpp2_read_index(port->priv,
+-                                                    port->first_rxq + i,
++                                                    port->first_rxq + q,
+                                                     mvpp2_ethtool_rxq_regs[i].offset);
+ }
diff --git a/queue-5.4/net-bridge-enfore-alignment-for-ethernet-address.patch b/queue-5.4/net-bridge-enfore-alignment-for-ethernet-address.patch
new file mode 100644 (file)
index 0000000..b4c319e
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Thomas Martitz <t.martitz@avm.de>
+Date: Thu, 25 Jun 2020 14:26:03 +0200
+Subject: net: bridge: enfore alignment for ethernet address
+
+From: Thomas Martitz <t.martitz@avm.de>
+
+[ Upstream commit db7202dec92e6caa2706c21d6fc359af318bde2e ]
+
+The eth_addr member is passed to ether_addr functions that require
+2-byte alignment, therefore the member must be properly aligned
+to avoid unaligned accesses.
+
+The problem is in place since the initial merge of multicast to unicast:
+commit 6db6f0eae6052b70885562e1733896647ec1d807 bridge: multicast to unicast
+
+Fixes: 6db6f0eae605 ("bridge: multicast to unicast")
+Cc: Roopa Prabhu <roopa@cumulusnetworks.com>
+Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Felix Fietkau <nbd@nbd.name>
+Cc: stable@vger.kernel.org
+Signed-off-by: Thomas Martitz <t.martitz@avm.de>
+Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_private.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -208,8 +208,8 @@ struct net_bridge_port_group {
+       struct rcu_head                 rcu;
+       struct timer_list               timer;
+       struct br_ip                    addr;
++      unsigned char                   eth_addr[ETH_ALEN] __aligned(2);
+       unsigned char                   flags;
+-      unsigned char                   eth_addr[ETH_ALEN];
+ };
+ struct net_bridge_mdb_entry {
diff --git a/queue-5.4/net-core-reduce-recursion-limit-value.patch b/queue-5.4/net-core-reduce-recursion-limit-value.patch
new file mode 100644 (file)
index 0000000..545bd0f
--- /dev/null
@@ -0,0 +1,81 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Taehee Yoo <ap420073@gmail.com>
+Date: Tue, 16 Jun 2020 15:52:05 +0000
+Subject: net: core: reduce recursion limit value
+
+From: Taehee Yoo <ap420073@gmail.com>
+
+[ Upstream commit fb7861d14c8d7edac65b2fcb6e8031cb138457b2 ]
+
+In the current code, ->ndo_start_xmit() can be executed recursively only
+10 times because of stack memory.
+But, in the case of the vxlan, 10 recursion limit value results in
+a stack overflow.
+In the current code, the nested interface is limited by 8 depth.
+There is no critical reason that the recursion limitation value should
+be 10.
+So, it would be good to be the same value with the limitation value of
+nesting interface depth.
+
+Test commands:
+    ip link add vxlan10 type vxlan vni 10 dstport 4789 srcport 4789 4789
+    ip link set vxlan10 up
+    ip a a 192.168.10.1/24 dev vxlan10
+    ip n a 192.168.10.2 dev vxlan10 lladdr fc:22:33:44:55:66 nud permanent
+
+    for i in {9..0}
+    do
+        let A=$i+1
+       ip link add vxlan$i type vxlan vni $i dstport 4789 srcport 4789 4789
+       ip link set vxlan$i up
+       ip a a 192.168.$i.1/24 dev vxlan$i
+       ip n a 192.168.$i.2 dev vxlan$i lladdr fc:22:33:44:55:66 nud permanent
+       bridge fdb add fc:22:33:44:55:66 dev vxlan$A dst 192.168.$i.2 self
+    done
+    hping3 192.168.10.2 -2 -d 60000
+
+Splat looks like:
+[  103.814237][ T1127] =============================================================================
+[  103.871955][ T1127] BUG kmalloc-2k (Tainted: G    B            ): Padding overwritten. 0x00000000897a2e4f-0x000
+[  103.873187][ T1127] -----------------------------------------------------------------------------
+[  103.873187][ T1127]
+[  103.874252][ T1127] INFO: Slab 0x000000005cccc724 objects=5 used=5 fp=0x0000000000000000 flags=0x10000000001020
+[  103.881323][ T1127] CPU: 3 PID: 1127 Comm: hping3 Tainted: G    B             5.7.0+ #575
+[  103.882131][ T1127] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006
+[  103.883006][ T1127] Call Trace:
+[  103.883324][ T1127]  dump_stack+0x96/0xdb
+[  103.883716][ T1127]  slab_err+0xad/0xd0
+[  103.884106][ T1127]  ? _raw_spin_unlock+0x1f/0x30
+[  103.884620][ T1127]  ? get_partial_node.isra.78+0x140/0x360
+[  103.885214][ T1127]  slab_pad_check.part.53+0xf7/0x160
+[  103.885769][ T1127]  ? pskb_expand_head+0x110/0xe10
+[  103.886316][ T1127]  check_slab+0x97/0xb0
+[  103.886763][ T1127]  alloc_debug_processing+0x84/0x1a0
+[  103.887308][ T1127]  ___slab_alloc+0x5a5/0x630
+[  103.887765][ T1127]  ? pskb_expand_head+0x110/0xe10
+[  103.888265][ T1127]  ? lock_downgrade+0x730/0x730
+[  103.888762][ T1127]  ? pskb_expand_head+0x110/0xe10
+[  103.889244][ T1127]  ? __slab_alloc+0x3e/0x80
+[  103.889675][ T1127]  __slab_alloc+0x3e/0x80
+[  103.890108][ T1127]  __kmalloc_node_track_caller+0xc7/0x420
+[ ... ]
+
+Fixes: 11a766ce915f ("net: Increase xmit RECURSION_LIMIT to 10.")
+Signed-off-by: Taehee Yoo <ap420073@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/netdevice.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -3043,7 +3043,7 @@ static inline int dev_recursion_level(vo
+       return this_cpu_read(softnet_data.xmit.recursion);
+ }
+-#define XMIT_RECURSION_LIMIT  10
++#define XMIT_RECURSION_LIMIT  8
+ static inline bool dev_xmit_recursion(void)
+ {
+       return unlikely(__this_cpu_read(softnet_data.xmit.recursion) >
diff --git a/queue-5.4/net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch b/queue-5.4/net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch
new file mode 100644 (file)
index 0000000..064e9bf
--- /dev/null
@@ -0,0 +1,57 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Tariq Toukan <tariqt@mellanox.com>
+Date: Mon, 22 Jun 2020 23:26:04 +0300
+Subject: net: Do not clear the sock TX queue in sk_set_socket()
+
+From: Tariq Toukan <tariqt@mellanox.com>
+
+[ Upstream commit 41b14fb8724d5a4b382a63cb4a1a61880347ccb8 ]
+
+Clearing the sock TX queue in sk_set_socket() might cause unexpected
+out-of-order transmit when called from sock_orphan(), as outstanding
+packets can pick a different TX queue and bypass the ones already queued.
+
+This is undesired in general. More specifically, it breaks the in-order
+scheduling property guarantee for device-offloaded TLS sockets.
+
+Remove the call to sk_tx_queue_clear() in sk_set_socket(), and add it
+explicitly only where needed.
+
+Fixes: e022f0b4a03f ("net: Introduce sk_tx_queue_mapping")
+Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
+Reviewed-by: Boris Pismenny <borisp@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sock.h |    1 -
+ net/core/sock.c    |    2 ++
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -1803,7 +1803,6 @@ static inline int sk_rx_queue_get(const
+ static inline void sk_set_socket(struct sock *sk, struct socket *sock)
+ {
+-      sk_tx_queue_clear(sk);
+       sk->sk_socket = sock;
+ }
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1679,6 +1679,7 @@ struct sock *sk_alloc(struct net *net, i
+               cgroup_sk_alloc(&sk->sk_cgrp_data);
+               sock_update_classid(&sk->sk_cgrp_data);
+               sock_update_netprioidx(&sk->sk_cgrp_data);
++              sk_tx_queue_clear(sk);
+       }
+       return sk;
+@@ -1895,6 +1896,7 @@ struct sock *sk_clone_lock(const struct
+                */
+               sk_refcnt_debug_inc(newsk);
+               sk_set_socket(newsk, NULL);
++              sk_tx_queue_clear(newsk);
+               RCU_INIT_POINTER(newsk->sk_wq, NULL);
+               if (newsk->sk_prot->sockets_allocated)
diff --git a/queue-5.4/net-fix-memleak-in-register_netdevice.patch b/queue-5.4/net-fix-memleak-in-register_netdevice.patch
new file mode 100644 (file)
index 0000000..9f61611
--- /dev/null
@@ -0,0 +1,87 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Yang Yingliang <yangyingliang@huawei.com>
+Date: Tue, 16 Jun 2020 09:39:21 +0000
+Subject: net: fix memleak in register_netdevice()
+
+From: Yang Yingliang <yangyingliang@huawei.com>
+
+[ Upstream commit 814152a89ed52c722ab92e9fbabcac3cb8a39245 ]
+
+I got a memleak report when doing some fuzz test:
+
+unreferenced object 0xffff888112584000 (size 13599):
+  comm "ip", pid 3048, jiffies 4294911734 (age 343.491s)
+  hex dump (first 32 bytes):
+    74 61 70 30 00 00 00 00 00 00 00 00 00 00 00 00  tap0............
+    00 ee d9 19 81 88 ff ff 00 00 00 00 00 00 00 00  ................
+  backtrace:
+    [<000000002f60ba65>] __kmalloc_node+0x309/0x3a0
+    [<0000000075b211ec>] kvmalloc_node+0x7f/0xc0
+    [<00000000d3a97396>] alloc_netdev_mqs+0x76/0xfc0
+    [<00000000609c3655>] __tun_chr_ioctl+0x1456/0x3d70
+    [<000000001127ca24>] ksys_ioctl+0xe5/0x130
+    [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0
+    [<00000000e1023498>] do_syscall_64+0x56/0xa0
+    [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+unreferenced object 0xffff888111845cc0 (size 8):
+  comm "ip", pid 3048, jiffies 4294911734 (age 343.491s)
+  hex dump (first 8 bytes):
+    74 61 70 30 00 88 ff ff                          tap0....
+  backtrace:
+    [<000000004c159777>] kstrdup+0x35/0x70
+    [<00000000d8b496ad>] kstrdup_const+0x3d/0x50
+    [<00000000494e884a>] kvasprintf_const+0xf1/0x180
+    [<0000000097880a2b>] kobject_set_name_vargs+0x56/0x140
+    [<000000008fbdfc7b>] dev_set_name+0xab/0xe0
+    [<000000005b99e3b4>] netdev_register_kobject+0xc0/0x390
+    [<00000000602704fe>] register_netdevice+0xb61/0x1250
+    [<000000002b7ca244>] __tun_chr_ioctl+0x1cd1/0x3d70
+    [<000000001127ca24>] ksys_ioctl+0xe5/0x130
+    [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0
+    [<00000000e1023498>] do_syscall_64+0x56/0xa0
+    [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+unreferenced object 0xffff88811886d800 (size 512):
+  comm "ip", pid 3048, jiffies 4294911734 (age 343.491s)
+  hex dump (first 32 bytes):
+    00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00  .....N..........
+    ff ff ff ff ff ff ff ff c0 66 3d a3 ff ff ff ff  .........f=.....
+  backtrace:
+    [<0000000050315800>] device_add+0x61e/0x1950
+    [<0000000021008dfb>] netdev_register_kobject+0x17e/0x390
+    [<00000000602704fe>] register_netdevice+0xb61/0x1250
+    [<000000002b7ca244>] __tun_chr_ioctl+0x1cd1/0x3d70
+    [<000000001127ca24>] ksys_ioctl+0xe5/0x130
+    [<00000000b7d5e66a>] __x64_sys_ioctl+0x6f/0xb0
+    [<00000000e1023498>] do_syscall_64+0x56/0xa0
+    [<000000009ec0eb12>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+If call_netdevice_notifiers() failed, then rollback_registered()
+calls netdev_unregister_kobject() which holds the kobject. The
+reference cannot be put because the netdev won't be add to todo
+list, so it will leads a memleak, we need put the reference to
+avoid memleak.
+
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -9114,6 +9114,13 @@ int register_netdevice(struct net_device
+               rcu_barrier();
+               dev->reg_state = NETREG_UNREGISTERED;
++              /* We should put the kobject that hold in
++               * netdev_unregister_kobject(), otherwise
++               * the net device cannot be freed when
++               * driver calls free_netdev(), because the
++               * kobject is being hold.
++               */
++              kobject_put(&dev->dev.kobj);
+       }
+       /*
+        *      Prevent userspace races by waiting until the network
diff --git a/queue-5.4/net-fix-the-arp-error-in-some-cases.patch b/queue-5.4/net-fix-the-arp-error-in-some-cases.patch
new file mode 100644 (file)
index 0000000..7183024
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: guodeqing <geffrey.guo@huawei.com>
+Date: Wed, 17 Jun 2020 10:07:16 +0800
+Subject: net: Fix the arp error in some cases
+
+From: guodeqing <geffrey.guo@huawei.com>
+
+[ Upstream commit 5eea3a63ff4aba6a26002e657a6d21934b7e2b96 ]
+
+ie.,
+$ ifconfig eth0 6.6.6.6 netmask 255.255.255.0
+
+$ ip rule add from 6.6.6.6 table 6666
+
+$ ip route add 9.9.9.9 via 6.6.6.6
+
+$ ping -I 6.6.6.6 9.9.9.9
+PING 9.9.9.9 (9.9.9.9) from 6.6.6.6 : 56(84) bytes of data.
+
+3 packets transmitted, 0 received, 100% packet loss, time 2079ms
+
+$ arp
+Address     HWtype  HWaddress           Flags Mask            Iface
+6.6.6.6             (incomplete)                              eth0
+
+The arp request address is error, this is because fib_table_lookup in
+fib_check_nh lookup the destnation 9.9.9.9 nexthop, the scope of
+the fib result is RT_SCOPE_LINK,the correct scope is RT_SCOPE_HOST.
+Here I add a check of whether this is RT_TABLE_MAIN to solve this problem.
+
+Fixes: 3bfd847203c6 ("net: Use passed in table for nexthop lookups")
+Signed-off-by: guodeqing <geffrey.guo@huawei.com>
+Reviewed-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/fib_semantics.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/fib_semantics.c
++++ b/net/ipv4/fib_semantics.c
+@@ -1100,7 +1100,7 @@ static int fib_check_nh_v4_gw(struct net
+               if (fl4.flowi4_scope < RT_SCOPE_LINK)
+                       fl4.flowi4_scope = RT_SCOPE_LINK;
+-              if (table)
++              if (table && table != RT_TABLE_MAIN)
+                       tbl = fib_get_table(net, table);
+               if (tbl)
diff --git a/queue-5.4/net-increment-xmit_recursion-level-in-dev_direct_xmit.patch b/queue-5.4/net-increment-xmit_recursion-level-in-dev_direct_xmit.patch
new file mode 100644 (file)
index 0000000..c92a364
--- /dev/null
@@ -0,0 +1,105 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 17 Jun 2020 22:23:25 -0700
+Subject: net: increment xmit_recursion level in dev_direct_xmit()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 0ad6f6e767ec2f613418cbc7ebe5ec4c35af540c ]
+
+Back in commit f60e5990d9c1 ("ipv6: protect skb->sk accesses
+from recursive dereference inside the stack") Hannes added code
+so that IPv6 stack would not trust skb->sk for typical cases
+where packet goes through 'standard' xmit path (__dev_queue_xmit())
+
+Alas af_packet had a dev_direct_xmit() path that was not
+dealing yet with xmit_recursion level.
+
+Also change sk_mc_loop() to dump a stack once only.
+
+Without this patch, syzbot was able to trigger :
+
+[1]
+[  153.567378] WARNING: CPU: 7 PID: 11273 at net/core/sock.c:721 sk_mc_loop+0x51/0x70
+[  153.567378] Modules linked in: nfnetlink ip6table_raw ip6table_filter iptable_raw iptable_nat nf_nat nf_conntrack nf_defrag_ipv4 nf_defrag_ipv6 iptable_filter macsec macvtap tap macvlan 8021q hsr wireguard libblake2s blake2s_x86_64 libblake2s_generic udp_tunnel ip6_udp_tunnel libchacha20poly1305 poly1305_x86_64 chacha_x86_64 libchacha curve25519_x86_64 libcurve25519_generic netdevsim batman_adv dummy team bridge stp llc w1_therm wire i2c_mux_pca954x i2c_mux cdc_acm ehci_pci ehci_hcd mlx4_en mlx4_ib ib_uverbs ib_core mlx4_core
+[  153.567386] CPU: 7 PID: 11273 Comm: b159172088 Not tainted 5.8.0-smp-DEV #273
+[  153.567387] RIP: 0010:sk_mc_loop+0x51/0x70
+[  153.567388] Code: 66 83 f8 0a 75 24 0f b6 4f 12 b8 01 00 00 00 31 d2 d3 e0 a9 bf ef ff ff 74 07 48 8b 97 f0 02 00 00 0f b6 42 3a 83 e0 01 5d c3 <0f> 0b b8 01 00 00 00 5d c3 0f b6 87 18 03 00 00 5d c0 e8 04 83 e0
+[  153.567388] RSP: 0018:ffff95c69bb93990 EFLAGS: 00010212
+[  153.567388] RAX: 0000000000000011 RBX: ffff95c6e0ee3e00 RCX: 0000000000000007
+[  153.567389] RDX: ffff95c69ae50000 RSI: ffff95c6c30c3000 RDI: ffff95c6c30c3000
+[  153.567389] RBP: ffff95c69bb93990 R08: ffff95c69a77f000 R09: 0000000000000008
+[  153.567389] R10: 0000000000000040 R11: 00003e0e00026128 R12: ffff95c6c30c3000
+[  153.567390] R13: ffff95c6cc4fd500 R14: ffff95c6f84500c0 R15: ffff95c69aa13c00
+[  153.567390] FS:  00007fdc3a283700(0000) GS:ffff95c6ff9c0000(0000) knlGS:0000000000000000
+[  153.567390] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  153.567391] CR2: 00007ffee758e890 CR3: 0000001f9ba20003 CR4: 00000000001606e0
+[  153.567391] Call Trace:
+[  153.567391]  ip6_finish_output2+0x34e/0x550
+[  153.567391]  __ip6_finish_output+0xe7/0x110
+[  153.567391]  ip6_finish_output+0x2d/0xb0
+[  153.567392]  ip6_output+0x77/0x120
+[  153.567392]  ? __ip6_finish_output+0x110/0x110
+[  153.567392]  ip6_local_out+0x3d/0x50
+[  153.567392]  ipvlan_queue_xmit+0x56c/0x5e0
+[  153.567393]  ? ksize+0x19/0x30
+[  153.567393]  ipvlan_start_xmit+0x18/0x50
+[  153.567393]  dev_direct_xmit+0xf3/0x1c0
+[  153.567393]  packet_direct_xmit+0x69/0xa0
+[  153.567394]  packet_sendmsg+0xbf0/0x19b0
+[  153.567394]  ? plist_del+0x62/0xb0
+[  153.567394]  sock_sendmsg+0x65/0x70
+[  153.567394]  sock_write_iter+0x93/0xf0
+[  153.567394]  new_sync_write+0x18e/0x1a0
+[  153.567395]  __vfs_write+0x29/0x40
+[  153.567395]  vfs_write+0xb9/0x1b0
+[  153.567395]  ksys_write+0xb1/0xe0
+[  153.567395]  __x64_sys_write+0x1a/0x20
+[  153.567395]  do_syscall_64+0x43/0x70
+[  153.567396]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+[  153.567396] RIP: 0033:0x453549
+[  153.567396] Code: Bad RIP value.
+[  153.567396] RSP: 002b:00007fdc3a282cc8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+[  153.567397] RAX: ffffffffffffffda RBX: 00000000004d32d0 RCX: 0000000000453549
+[  153.567397] RDX: 0000000000000020 RSI: 0000000020000300 RDI: 0000000000000003
+[  153.567398] RBP: 00000000004d32d8 R08: 0000000000000000 R09: 0000000000000000
+[  153.567398] R10: 0000000000000000 R11: 0000000000000246 R12: 00000000004d32dc
+[  153.567398] R13: 00007ffee742260f R14: 00007fdc3a282dc0 R15: 00007fdc3a283700
+[  153.567399] ---[ end trace c1d5ae2b1059ec62 ]---
+
+f60e5990d9c1 ("ipv6: protect skb->sk accesses from recursive dereference inside the stack")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c  |    2 ++
+ net/core/sock.c |    2 +-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3832,10 +3832,12 @@ int dev_direct_xmit(struct sk_buff *skb,
+       local_bh_disable();
++      dev_xmit_recursion_inc();
+       HARD_TX_LOCK(dev, txq, smp_processor_id());
+       if (!netif_xmit_frozen_or_drv_stopped(txq))
+               ret = netdev_start_xmit(skb, dev, txq, false);
+       HARD_TX_UNLOCK(dev, txq);
++      dev_xmit_recursion_dec();
+       local_bh_enable();
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -709,7 +709,7 @@ bool sk_mc_loop(struct sock *sk)
+               return inet6_sk(sk)->mc_loop;
+ #endif
+       }
+-      WARN_ON(1);
++      WARN_ON_ONCE(1);
+       return true;
+ }
+ EXPORT_SYMBOL(sk_mc_loop);
diff --git a/queue-5.4/net-phy-check-harder-for-errors-in-get_phy_id.patch b/queue-5.4/net-phy-check-harder-for-errors-in-get_phy_id.patch
new file mode 100644 (file)
index 0000000..99e491c
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Fri, 19 Jun 2020 11:47:47 -0700
+Subject: net: phy: Check harder for errors in get_phy_id()
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+[ Upstream commit b2ffc75e2e990b09903f9d15ccd53bc5f3a4217c ]
+
+Commit 02a6efcab675 ("net: phy: allow scanning busses with missing
+phys") added a special condition to return -ENODEV in case -ENODEV or
+-EIO was returned from the first read of the MII_PHYSID1 register.
+
+In case the MDIO bus data line pull-up is not strong enough, the MDIO
+bus controller will not flag this as a read error. This can happen when
+a pluggable daughter card is not connected and weak internal pull-ups
+are used (since that is the only option, otherwise the pins are
+floating).
+
+The second read of MII_PHYSID2 will be correctly flagged an error
+though, but now we will return -EIO which will be treated as a hard
+error, thus preventing MDIO bus scanning loops to continue succesfully.
+
+Apply the same logic to both register reads, thus allowing the scanning
+logic to proceed.
+
+Fixes: 02a6efcab675 ("net: phy: allow scanning busses with missing phys")
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/phy_device.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -797,8 +797,10 @@ static int get_phy_id(struct mii_bus *bu
+       /* Grab the bits from PHYIR2, and put them in the lower half */
+       phy_reg = mdiobus_read(bus, addr, MII_PHYSID2);
+-      if (phy_reg < 0)
+-              return -EIO;
++      if (phy_reg < 0) {
++              /* returning -ENODEV doesn't stop bus scanning */
++              return (phy_reg == -EIO || phy_reg == -ENODEV) ? -ENODEV : -EIO;
++      }
+       *phy_id |= phy_reg;
diff --git a/queue-5.4/net-usb-ax88179_178a-fix-packet-alignment-padding.patch b/queue-5.4/net-usb-ax88179_178a-fix-packet-alignment-padding.patch
new file mode 100644 (file)
index 0000000..be36cd3
--- /dev/null
@@ -0,0 +1,69 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Jeremy Kerr <jk@ozlabs.org>
+Date: Mon, 15 Jun 2020 10:54:56 +0800
+Subject: net: usb: ax88179_178a: fix packet alignment padding
+
+From: Jeremy Kerr <jk@ozlabs.org>
+
+[ Upstream commit e869e7a17798d85829fa7d4f9bbe1eebd4b2d3f6 ]
+
+Using a AX88179 device (0b95:1790), I see two bytes of appended data on
+every RX packet. For example, this 48-byte ping, using 0xff as a
+payload byte:
+
+  04:20:22.528472 IP 192.168.1.1 > 192.168.1.2: ICMP echo request, id 2447, seq 1, length 64
+       0x0000:  000a cd35 ea50 000a cd35 ea4f 0800 4500
+       0x0010:  0054 c116 4000 4001 f63e c0a8 0101 c0a8
+       0x0020:  0102 0800 b633 098f 0001 87ea cd5e 0000
+       0x0030:  0000 dcf2 0600 0000 0000 ffff ffff ffff
+       0x0040:  ffff ffff ffff ffff ffff ffff ffff ffff
+       0x0050:  ffff ffff ffff ffff ffff ffff ffff ffff
+       0x0060:  ffff 961f
+
+Those last two bytes - 96 1f - aren't part of the original packet.
+
+In the ax88179 RX path, the usbnet rx_fixup function trims a 2-byte
+'alignment pseudo header' from the start of the packet, and sets the
+length from a per-packet field populated by hardware. It looks like that
+length field *includes* the 2-byte header; the current driver assumes
+that it's excluded.
+
+This change trims the 2-byte alignment header after we've set the packet
+length, so the resulting packet length is correct. While we're moving
+the comment around, this also fixes the spelling of 'pseudo'.
+
+Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/ax88179_178a.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/drivers/net/usb/ax88179_178a.c
++++ b/drivers/net/usb/ax88179_178a.c
+@@ -1387,10 +1387,10 @@ static int ax88179_rx_fixup(struct usbne
+               }
+               if (pkt_cnt == 0) {
+-                      /* Skip IP alignment psudo header */
+-                      skb_pull(skb, 2);
+                       skb->len = pkt_len;
+-                      skb_set_tail_pointer(skb, pkt_len);
++                      /* Skip IP alignment pseudo header */
++                      skb_pull(skb, 2);
++                      skb_set_tail_pointer(skb, skb->len);
+                       skb->truesize = pkt_len + sizeof(struct sk_buff);
+                       ax88179_rx_checksum(skb, pkt_hdr);
+                       return 1;
+@@ -1399,8 +1399,9 @@ static int ax88179_rx_fixup(struct usbne
+               ax_skb = skb_clone(skb, GFP_ATOMIC);
+               if (ax_skb) {
+                       ax_skb->len = pkt_len;
+-                      ax_skb->data = skb->data + 2;
+-                      skb_set_tail_pointer(ax_skb, pkt_len);
++                      /* Skip IP alignment pseudo header */
++                      skb_pull(ax_skb, 2);
++                      skb_set_tail_pointer(ax_skb, ax_skb->len);
+                       ax_skb->truesize = pkt_len + sizeof(struct sk_buff);
+                       ax88179_rx_checksum(ax_skb, pkt_hdr);
+                       usbnet_skb_return(dev, ax_skb);
diff --git a/queue-5.4/openvswitch-take-into-account-de-fragmentation-gso_size-in-execute_check_pkt_len.patch b/queue-5.4/openvswitch-take-into-account-de-fragmentation-gso_size-in-execute_check_pkt_len.patch
new file mode 100644 (file)
index 0000000..f8cb14a
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+Date: Tue, 23 Jun 2020 18:33:15 +0200
+Subject: openvswitch: take into account de-fragmentation/gso_size in execute_check_pkt_len
+
+From: Lorenzo Bianconi <lorenzo@kernel.org>
+
+[ Upstream commit 17843655708e1941c0653af3cd61be6948e36f43 ]
+
+ovs connection tracking module performs de-fragmentation on incoming
+fragmented traffic. Take info account if traffic has been de-fragmented
+in execute_check_pkt_len action otherwise we will perform the wrong
+nested action considering the original packet size. This issue typically
+occurs if ovs-vswitchd adds a rule in the pipeline that requires connection
+tracking (e.g. OVN stateful ACLs) before execute_check_pkt_len action.
+Moreover take into account GSO fragment size for GSO packet in
+execute_check_pkt_len routine
+
+Fixes: 4d5ec89fc8d14 ("net: openvswitch: Add a new action check_pkt_len")
+Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/openvswitch/actions.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -1146,9 +1146,10 @@ static int execute_check_pkt_len(struct
+                                struct sw_flow_key *key,
+                                const struct nlattr *attr, bool last)
+ {
++      struct ovs_skb_cb *ovs_cb = OVS_CB(skb);
+       const struct nlattr *actions, *cpl_arg;
++      int len, max_len, rem = nla_len(attr);
+       const struct check_pkt_len_arg *arg;
+-      int rem = nla_len(attr);
+       bool clone_flow_key;
+       /* The first netlink attribute in 'attr' is always
+@@ -1157,7 +1158,11 @@ static int execute_check_pkt_len(struct
+       cpl_arg = nla_data(attr);
+       arg = nla_data(cpl_arg);
+-      if (skb->len <= arg->pkt_len) {
++      len = ovs_cb->mru ? ovs_cb->mru + skb->mac_len : skb->len;
++      max_len = arg->pkt_len;
++
++      if ((skb_is_gso(skb) && skb_gso_validate_mac_len(skb, max_len)) ||
++          len <= max_len) {
+               /* Second netlink attribute in 'attr' is always
+                * 'OVS_CHECK_PKT_LEN_ATTR_ACTIONS_IF_LESS_EQUAL'.
+                */
diff --git a/queue-5.4/rocker-fix-incorrect-error-handling-in-dma_rings_init.patch b/queue-5.4/rocker-fix-incorrect-error-handling-in-dma_rings_init.patch
new file mode 100644 (file)
index 0000000..3d41176
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Aditya Pakki <pakki001@umn.edu>
+Date: Fri, 12 Jun 2020 15:27:55 -0500
+Subject: rocker: fix incorrect error handling in dma_rings_init
+
+From: Aditya Pakki <pakki001@umn.edu>
+
+[ Upstream commit 58d0c864e1a759a15c9df78f50ea5a5c32b3989e ]
+
+In rocker_dma_rings_init, the goto blocks in case of errors
+caused by the functions rocker_dma_cmd_ring_waits_alloc() and
+rocker_dma_ring_create() are incorrect. The patch fixes the
+order consistent with cleanup in rocker_dma_rings_fini().
+
+Signed-off-by: Aditya Pakki <pakki001@umn.edu>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/rocker/rocker_main.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/rocker/rocker_main.c
++++ b/drivers/net/ethernet/rocker/rocker_main.c
+@@ -647,10 +647,10 @@ static int rocker_dma_rings_init(struct
+ err_dma_event_ring_bufs_alloc:
+       rocker_dma_ring_destroy(rocker, &rocker->event_ring);
+ err_dma_event_ring_create:
++      rocker_dma_cmd_ring_waits_free(rocker);
++err_dma_cmd_ring_waits_alloc:
+       rocker_dma_ring_bufs_free(rocker, &rocker->cmd_ring,
+                                 PCI_DMA_BIDIRECTIONAL);
+-err_dma_cmd_ring_waits_alloc:
+-      rocker_dma_cmd_ring_waits_free(rocker);
+ err_dma_cmd_ring_bufs_alloc:
+       rocker_dma_ring_destroy(rocker, &rocker->cmd_ring);
+       return err;
diff --git a/queue-5.4/rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch b/queue-5.4/rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch
new file mode 100644 (file)
index 0000000..c3639e9
--- /dev/null
@@ -0,0 +1,146 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: David Howells <dhowells@redhat.com>
+Date: Fri, 19 Jun 2020 23:38:16 +0100
+Subject: rxrpc: Fix notification call on completion of discarded calls
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 0041cd5a50442db6e456b145892a0eaf2dff061f ]
+
+When preallocated service calls are being discarded, they're passed to
+->discard_new_call() to have the caller clean up any attached higher-layer
+preallocated pieces before being marked completed.  However, the act of
+marking them completed now invokes the call's notification function - which
+causes a problem because that function might assume that the previously
+freed pieces of memory are still there.
+
+Fix this by setting a dummy notification function on the socket after
+calling ->discard_new_call().
+
+This results in the following kasan message when the kafs module is
+removed.
+
+==================================================================
+BUG: KASAN: use-after-free in afs_wake_up_async_call+0x6aa/0x770 fs/afs/rxrpc.c:707
+Write of size 1 at addr ffff8880946c39e4 by task kworker/u4:1/21
+
+CPU: 0 PID: 21 Comm: kworker/u4:1 Not tainted 5.8.0-rc1-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Workqueue: netns cleanup_net
+Call Trace:
+ __dump_stack lib/dump_stack.c:77 [inline]
+ dump_stack+0x18f/0x20d lib/dump_stack.c:118
+ print_address_description.constprop.0.cold+0xd3/0x413 mm/kasan/report.c:383
+ __kasan_report mm/kasan/report.c:513 [inline]
+ kasan_report.cold+0x1f/0x37 mm/kasan/report.c:530
+ afs_wake_up_async_call+0x6aa/0x770 fs/afs/rxrpc.c:707
+ rxrpc_notify_socket+0x1db/0x5d0 net/rxrpc/recvmsg.c:40
+ __rxrpc_set_call_completion.part.0+0x172/0x410 net/rxrpc/recvmsg.c:76
+ __rxrpc_call_completed net/rxrpc/recvmsg.c:112 [inline]
+ rxrpc_call_completed+0xca/0xf0 net/rxrpc/recvmsg.c:111
+ rxrpc_discard_prealloc+0x781/0xab0 net/rxrpc/call_accept.c:233
+ rxrpc_listen+0x147/0x360 net/rxrpc/af_rxrpc.c:245
+ afs_close_socket+0x95/0x320 fs/afs/rxrpc.c:110
+ afs_net_exit+0x1bc/0x310 fs/afs/main.c:155
+ ops_exit_list.isra.0+0xa8/0x150 net/core/net_namespace.c:186
+ cleanup_net+0x511/0xa50 net/core/net_namespace.c:603
+ process_one_work+0x965/0x1690 kernel/workqueue.c:2269
+ worker_thread+0x96/0xe10 kernel/workqueue.c:2415
+ kthread+0x3b5/0x4a0 kernel/kthread.c:291
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293
+
+Allocated by task 6820:
+ save_stack+0x1b/0x40 mm/kasan/common.c:48
+ set_track mm/kasan/common.c:56 [inline]
+ __kasan_kmalloc mm/kasan/common.c:494 [inline]
+ __kasan_kmalloc.constprop.0+0xbf/0xd0 mm/kasan/common.c:467
+ kmem_cache_alloc_trace+0x153/0x7d0 mm/slab.c:3551
+ kmalloc include/linux/slab.h:555 [inline]
+ kzalloc include/linux/slab.h:669 [inline]
+ afs_alloc_call+0x55/0x630 fs/afs/rxrpc.c:141
+ afs_charge_preallocation+0xe9/0x2d0 fs/afs/rxrpc.c:757
+ afs_open_socket+0x292/0x360 fs/afs/rxrpc.c:92
+ afs_net_init+0xa6c/0xe30 fs/afs/main.c:125
+ ops_init+0xaf/0x420 net/core/net_namespace.c:151
+ setup_net+0x2de/0x860 net/core/net_namespace.c:341
+ copy_net_ns+0x293/0x590 net/core/net_namespace.c:482
+ create_new_namespaces+0x3fb/0xb30 kernel/nsproxy.c:110
+ unshare_nsproxy_namespaces+0xbd/0x1f0 kernel/nsproxy.c:231
+ ksys_unshare+0x43d/0x8e0 kernel/fork.c:2983
+ __do_sys_unshare kernel/fork.c:3051 [inline]
+ __se_sys_unshare kernel/fork.c:3049 [inline]
+ __x64_sys_unshare+0x2d/0x40 kernel/fork.c:3049
+ do_syscall_64+0x60/0xe0 arch/x86/entry/common.c:359
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Freed by task 21:
+ save_stack+0x1b/0x40 mm/kasan/common.c:48
+ set_track mm/kasan/common.c:56 [inline]
+ kasan_set_free_info mm/kasan/common.c:316 [inline]
+ __kasan_slab_free+0xf7/0x140 mm/kasan/common.c:455
+ __cache_free mm/slab.c:3426 [inline]
+ kfree+0x109/0x2b0 mm/slab.c:3757
+ afs_put_call+0x585/0xa40 fs/afs/rxrpc.c:190
+ rxrpc_discard_prealloc+0x764/0xab0 net/rxrpc/call_accept.c:230
+ rxrpc_listen+0x147/0x360 net/rxrpc/af_rxrpc.c:245
+ afs_close_socket+0x95/0x320 fs/afs/rxrpc.c:110
+ afs_net_exit+0x1bc/0x310 fs/afs/main.c:155
+ ops_exit_list.isra.0+0xa8/0x150 net/core/net_namespace.c:186
+ cleanup_net+0x511/0xa50 net/core/net_namespace.c:603
+ process_one_work+0x965/0x1690 kernel/workqueue.c:2269
+ worker_thread+0x96/0xe10 kernel/workqueue.c:2415
+ kthread+0x3b5/0x4a0 kernel/kthread.c:291
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:293
+
+The buggy address belongs to the object at ffff8880946c3800
+ which belongs to the cache kmalloc-1k of size 1024
+The buggy address is located 484 bytes inside of
+ 1024-byte region [ffff8880946c3800, ffff8880946c3c00)
+The buggy address belongs to the page:
+page:ffffea000251b0c0 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0
+flags: 0xfffe0000000200(slab)
+raw: 00fffe0000000200 ffffea0002546508 ffffea00024fa248 ffff8880aa000c40
+raw: 0000000000000000 ffff8880946c3000 0000000100000002 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff8880946c3880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8880946c3900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+>ffff8880946c3980: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+                                                       ^
+ ffff8880946c3a00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff8880946c3a80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+==================================================================
+
+Reported-by: syzbot+d3eccef36ddbd02713e9@syzkaller.appspotmail.com
+Fixes: 5ac0d62226a0 ("rxrpc: Fix missing notification")
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rxrpc/call_accept.c |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/net/rxrpc/call_accept.c
++++ b/net/rxrpc/call_accept.c
+@@ -22,6 +22,11 @@
+ #include <net/ip.h>
+ #include "ar-internal.h"
++static void rxrpc_dummy_notify(struct sock *sk, struct rxrpc_call *call,
++                             unsigned long user_call_ID)
++{
++}
++
+ /*
+  * Preallocate a single service call, connection and peer and, if possible,
+  * give them a user ID and attach the user's side of the ID to them.
+@@ -228,6 +233,8 @@ void rxrpc_discard_prealloc(struct rxrpc
+               if (rx->discard_new_call) {
+                       _debug("discard %lx", call->user_call_ID);
+                       rx->discard_new_call(call, call->user_call_ID);
++                      if (call->notify_rx)
++                              call->notify_rx = rxrpc_dummy_notify;
+                       rxrpc_put_call(call, rxrpc_call_put_kernel);
+               }
+               rxrpc_call_completed(call);
diff --git a/queue-5.4/sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch b/queue-5.4/sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch
new file mode 100644 (file)
index 0000000..7d9f2f6
--- /dev/null
@@ -0,0 +1,62 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: "Toke Høiland-Jørgensen" <toke@redhat.com>
+Date: Thu, 25 Jun 2020 22:12:08 +0200
+Subject: sch_cake: don't call diffserv parsing code when it is not needed
+
+From: "Toke Høiland-Jørgensen" <toke@redhat.com>
+
+[ Upstream commit 8c95eca0bb8c4bd2231a0d581f1ad0d50c90488c ]
+
+As a further optimisation of the diffserv parsing codepath, we can skip it
+entirely if CAKE is configured to neither use diffserv-based
+classification, nor to zero out the diffserv bits.
+
+Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits")
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_cake.c |   13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -1515,7 +1515,7 @@ static unsigned int cake_drop(struct Qdi
+       return idx + (tin << 16);
+ }
+-static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
++static u8 cake_handle_diffserv(struct sk_buff *skb, bool wash)
+ {
+       const int offset = skb_network_offset(skb);
+       u16 *buf, buf_;
+@@ -1576,14 +1576,17 @@ static struct cake_tin_data *cake_select
+ {
+       struct cake_sched_data *q = qdisc_priv(sch);
+       u32 tin, mark;
++      bool wash;
+       u8 dscp;
+       /* Tin selection: Default to diffserv-based selection, allow overriding
+-       * using firewall marks or skb->priority.
++       * using firewall marks or skb->priority. Call DSCP parsing early if
++       * wash is enabled, otherwise defer to below to skip unneeded parsing.
+        */
+-      dscp = cake_handle_diffserv(skb,
+-                                  q->rate_flags & CAKE_FLAG_WASH);
+       mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
++      wash = !!(q->rate_flags & CAKE_FLAG_WASH);
++      if (wash)
++              dscp = cake_handle_diffserv(skb, wash);
+       if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
+               tin = 0;
+@@ -1597,6 +1600,8 @@ static struct cake_tin_data *cake_select
+               tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
+       else {
++              if (!wash)
++                      dscp = cake_handle_diffserv(skb, wash);
+               tin = q->tin_index[dscp];
+               if (unlikely(tin >= q->tin_cnt))
diff --git a/queue-5.4/sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch b/queue-5.4/sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch
new file mode 100644 (file)
index 0000000..e31cfcb
--- /dev/null
@@ -0,0 +1,96 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
+Date: Thu, 25 Jun 2020 22:12:07 +0200
+Subject: sch_cake: don't try to reallocate or unshare skb unconditionally
+
+From: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
+
+[ Upstream commit 9208d2863ac689a563b92f2161d8d1e7127d0add ]
+
+cake_handle_diffserv() tries to linearize mac and network header parts of
+skb and to make it writable unconditionally. In some cases it leads to full
+skb reallocation, which reduces throughput and increases CPU load. Some
+measurements of IPv4 forward + NAPT on MIPS router with 580 MHz single-core
+CPU was conducted. It appears that on kernel 4.9 skb_try_make_writable()
+reallocates skb, if skb was allocated in ethernet driver via so-called
+'build skb' method from page cache (it was discovered by strange increase
+of kmalloc-2048 slab at first).
+
+Obtain DSCP value via read-only skb_header_pointer() call, and leave
+linearization only for DSCP bleaching or ECN CE setting. And, as an
+additional optimisation, skip diffserv parsing entirely if it is not needed
+by the current configuration.
+
+Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits")
+Signed-off-by: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
+[ fix a few style issues, reflow commit message ]
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_cake.c |   41 ++++++++++++++++++++++++++++++-----------
+ 1 file changed, 30 insertions(+), 11 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -1517,30 +1517,49 @@ static unsigned int cake_drop(struct Qdi
+ static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
+ {
+-      int wlen = skb_network_offset(skb);
++      const int offset = skb_network_offset(skb);
++      u16 *buf, buf_;
+       u8 dscp;
+       switch (tc_skb_protocol(skb)) {
+       case htons(ETH_P_IP):
+-              wlen += sizeof(struct iphdr);
+-              if (!pskb_may_pull(skb, wlen) ||
+-                  skb_try_make_writable(skb, wlen))
++              buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
++              if (unlikely(!buf))
+                       return 0;
+-              dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
+-              if (wash && dscp)
++              /* ToS is in the second byte of iphdr */
++              dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2;
++
++              if (wash && dscp) {
++                      const int wlen = offset + sizeof(struct iphdr);
++
++                      if (!pskb_may_pull(skb, wlen) ||
++                          skb_try_make_writable(skb, wlen))
++                              return 0;
++
+                       ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
++              }
++
+               return dscp;
+       case htons(ETH_P_IPV6):
+-              wlen += sizeof(struct ipv6hdr);
+-              if (!pskb_may_pull(skb, wlen) ||
+-                  skb_try_make_writable(skb, wlen))
++              buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
++              if (unlikely(!buf))
+                       return 0;
+-              dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
+-              if (wash && dscp)
++              /* Traffic class is in the first and second bytes of ipv6hdr */
++              dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2;
++
++              if (wash && dscp) {
++                      const int wlen = offset + sizeof(struct ipv6hdr);
++
++                      if (!pskb_may_pull(skb, wlen) ||
++                          skb_try_make_writable(skb, wlen))
++                              return 0;
++
+                       ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
++              }
++
+               return dscp;
+       case htons(ETH_P_ARP):
diff --git a/queue-5.4/sch_cake-fix-a-few-style-nits.patch b/queue-5.4/sch_cake-fix-a-few-style-nits.patch
new file mode 100644 (file)
index 0000000..89bdcef
--- /dev/null
@@ -0,0 +1,41 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: "Toke Høiland-Jørgensen" <toke@redhat.com>
+Date: Thu, 25 Jun 2020 22:12:09 +0200
+Subject: sch_cake: fix a few style nits
+
+From: "Toke Høiland-Jørgensen" <toke@redhat.com>
+
+[ Upstream commit 3f608f0c41360b11b04c763f348b712f651c8bac ]
+
+I spotted a few nits when comparing the in-tree version of sch_cake with
+the out-of-tree one: A redundant error variable declaration shadowing an
+outer declaration, and an indentation alignment issue. Fix both of these.
+
+Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_cake.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -2703,7 +2703,7 @@ static int cake_init(struct Qdisc *sch,
+       qdisc_watchdog_init(&q->watchdog, sch);
+       if (opt) {
+-              int err = cake_change(sch, opt, extack);
++              err = cake_change(sch, opt, extack);
+               if (err)
+                       return err;
+@@ -3020,7 +3020,7 @@ static int cake_dump_class_stats(struct
+                       PUT_STAT_S32(BLUE_TIMER_US,
+                                    ktime_to_us(
+                                            ktime_sub(now,
+-                                                   flow->cvars.blue_timer)));
++                                                     flow->cvars.blue_timer)));
+               }
+               if (flow->cvars.dropping) {
+                       PUT_STAT_S32(DROP_NEXT_US,
diff --git a/queue-5.4/sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch b/queue-5.4/sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch
new file mode 100644 (file)
index 0000000..2ad2806
--- /dev/null
@@ -0,0 +1,88 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Date: Wed, 24 Jun 2020 17:34:18 -0300
+Subject: sctp: Don't advertise IPv4 addresses if ipv6only is set on the socket
+
+From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+
+[ Upstream commit 471e39df96b9a4c4ba88a2da9e25a126624d7a9c ]
+
+If a socket is set ipv6only, it will still send IPv4 addresses in the
+INIT and INIT_ACK packets. This potentially misleads the peer into using
+them, which then would cause association termination.
+
+The fix is to not add IPv4 addresses to ipv6only sockets.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Corey Minyard <cminyard@mvista.com>
+Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Tested-by: Corey Minyard <cminyard@mvista.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sctp/constants.h |    8 +++++---
+ net/sctp/associola.c         |    5 ++++-
+ net/sctp/bind_addr.c         |    1 +
+ net/sctp/protocol.c          |    3 ++-
+ 4 files changed, 12 insertions(+), 5 deletions(-)
+
+--- a/include/net/sctp/constants.h
++++ b/include/net/sctp/constants.h
+@@ -341,11 +341,13 @@ enum {
+        ipv4_is_anycast_6to4(a))
+ /* Flags used for the bind address copy functions.  */
+-#define SCTP_ADDR6_ALLOWED    0x00000001      /* IPv6 address is allowed by
++#define SCTP_ADDR4_ALLOWED    0x00000001      /* IPv4 address is allowed by
+                                                  local sock family */
+-#define SCTP_ADDR4_PEERSUPP   0x00000002      /* IPv4 address is supported by
++#define SCTP_ADDR6_ALLOWED    0x00000002      /* IPv6 address is allowed by
++                                                 local sock family */
++#define SCTP_ADDR4_PEERSUPP   0x00000004      /* IPv4 address is supported by
+                                                  peer */
+-#define SCTP_ADDR6_PEERSUPP   0x00000004      /* IPv6 address is supported by
++#define SCTP_ADDR6_PEERSUPP   0x00000008      /* IPv6 address is supported by
+                                                  peer */
+ /* Reasons to retransmit. */
+--- a/net/sctp/associola.c
++++ b/net/sctp/associola.c
+@@ -1569,12 +1569,15 @@ void sctp_assoc_rwnd_decrease(struct sct
+ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
+                                    enum sctp_scope scope, gfp_t gfp)
+ {
++      struct sock *sk = asoc->base.sk;
+       int flags;
+       /* Use scoping rules to determine the subset of addresses from
+        * the endpoint.
+        */
+-      flags = (PF_INET6 == asoc->base.sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0;
++      flags = (PF_INET6 == sk->sk_family) ? SCTP_ADDR6_ALLOWED : 0;
++      if (!inet_v6_ipv6only(sk))
++              flags |= SCTP_ADDR4_ALLOWED;
+       if (asoc->peer.ipv4_address)
+               flags |= SCTP_ADDR4_PEERSUPP;
+       if (asoc->peer.ipv6_address)
+--- a/net/sctp/bind_addr.c
++++ b/net/sctp/bind_addr.c
+@@ -461,6 +461,7 @@ static int sctp_copy_one_addr(struct net
+                * well as the remote peer.
+                */
+               if ((((AF_INET == addr->sa.sa_family) &&
++                    (flags & SCTP_ADDR4_ALLOWED) &&
+                     (flags & SCTP_ADDR4_PEERSUPP))) ||
+                   (((AF_INET6 == addr->sa.sa_family) &&
+                     (flags & SCTP_ADDR6_ALLOWED) &&
+--- a/net/sctp/protocol.c
++++ b/net/sctp/protocol.c
+@@ -148,7 +148,8 @@ int sctp_copy_local_addr_list(struct net
+                * sock as well as the remote peer.
+                */
+               if (addr->a.sa.sa_family == AF_INET &&
+-                  !(copy_flags & SCTP_ADDR4_PEERSUPP))
++                  (!(copy_flags & SCTP_ADDR4_ALLOWED) ||
++                   !(copy_flags & SCTP_ADDR4_PEERSUPP)))
+                       continue;
+               if (addr->a.sa.sa_family == AF_INET6 &&
+                   (!(copy_flags & SCTP_ADDR6_ALLOWED) ||
index 1ab482d4a03b09d70e27e0cf54a29c4d44bdb090..2199dc78d84052301ca2c468dc51c0b93e8bd470 100644 (file)
@@ -1 +1,27 @@
 block-bio-integrity-don-t-free-buf-if-bio_integrity_add_page-failed.patch
+enetc-fix-tx-rings-bitmap-iteration-range-irq-handling.patch
+geneve-allow-changing-df-behavior-after-creation.patch
+ibmveth-fix-max-mtu-limit.patch
+mld-fix-memory-leak-in-ipv6_mc_destroy_dev.patch
+mvpp2-ethtool-rxtx-stats-fix.patch
+net-bridge-enfore-alignment-for-ethernet-address.patch
+net-core-reduce-recursion-limit-value.patch
+net-do-not-clear-the-sock-tx-queue-in-sk_set_socket.patch
+net-fix-memleak-in-register_netdevice.patch
+net-fix-the-arp-error-in-some-cases.patch
+net-increment-xmit_recursion-level-in-dev_direct_xmit.patch
+net-usb-ax88179_178a-fix-packet-alignment-padding.patch
+openvswitch-take-into-account-de-fragmentation-gso_size-in-execute_check_pkt_len.patch
+rocker-fix-incorrect-error-handling-in-dma_rings_init.patch
+rxrpc-fix-notification-call-on-completion-of-discarded-calls.patch
+sctp-don-t-advertise-ipv4-addresses-if-ipv6only-is-set-on-the-socket.patch
+tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch
+tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch
+tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch
+ip6_gre-fix-use-after-free-in-ip6gre_tunnel_lookup.patch
+net-phy-check-harder-for-errors-in-get_phy_id.patch
+ip_tunnel-fix-use-after-free-in-ip_tunnel_lookup.patch
+sch_cake-don-t-try-to-reallocate-or-unshare-skb-unconditionally.patch
+sch_cake-don-t-call-diffserv-parsing-code-when-it-is-not-needed.patch
+sch_cake-fix-a-few-style-nits.patch
+tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch
diff --git a/queue-5.4/tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch b/queue-5.4/tcp-don-t-ignore-ecn-cwr-on-pure-ack.patch
new file mode 100644 (file)
index 0000000..39f7731
--- /dev/null
@@ -0,0 +1,97 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Denis Kirjanov <kda@linux-powerpc.org>
+Date: Thu, 25 Jun 2020 14:51:06 +0300
+Subject: tcp: don't ignore ECN CWR on pure ACK
+
+From: Denis Kirjanov <kda@linux-powerpc.org>
+
+[ Upstream commit 2570284060b48f3f79d8f1a2698792f36c385e9a ]
+
+there is a problem with the CWR flag set in an incoming ACK segment
+and it leads to the situation when the ECE flag is latched forever
+
+the following packetdrill script shows what happens:
+
+// Stack receives incoming segments with CE set
++0.1 <[ect0]  . 11001:12001(1000) ack 1001 win 65535
++0.0 <[ce]    . 12001:13001(1000) ack 1001 win 65535
++0.0 <[ect0] P. 13001:14001(1000) ack 1001 win 65535
+
+// Stack repsonds with ECN ECHO
++0.0 >[noecn]  . 1001:1001(0) ack 12001
++0.0 >[noecn] E. 1001:1001(0) ack 13001
++0.0 >[noecn] E. 1001:1001(0) ack 14001
+
+// Write a packet
++0.1 write(3, ..., 1000) = 1000
++0.0 >[ect0] PE. 1001:2001(1000) ack 14001
+
+// Pure ACK received
++0.01 <[noecn] W. 14001:14001(0) ack 2001 win 65535
+
+// Since CWR was sent, this packet should NOT have ECE set
+
++0.1 write(3, ..., 1000) = 1000
++0.0 >[ect0]  P. 2001:3001(1000) ack 14001
+// but Linux will still keep ECE latched here, with packetdrill
+// flagging a missing ECE flag, expecting
+// >[ect0] PE. 2001:3001(1000) ack 14001
+// in the script
+
+In the situation above we will continue to send ECN ECHO packets
+and trigger the peer to reduce the congestion window. To avoid that
+we can check CWR on pure ACKs received.
+
+v3:
+- Add a sequence check to avoid sending an ACK to an ACK
+
+v2:
+- Adjusted the comment
+- move CWR check before checking for unacknowledged packets
+
+Signed-off-by: Denis Kirjanov <denis.kirjanov@suse.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |   14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -260,7 +260,8 @@ static void tcp_ecn_accept_cwr(struct so
+                * cwnd may be very low (even just 1 packet), so we should ACK
+                * immediately.
+                */
+-              inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
++              if (TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq)
++                      inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;
+       }
+ }
+@@ -3682,6 +3683,15 @@ static int tcp_ack(struct sock *sk, cons
+               tcp_in_ack_event(sk, ack_ev_flags);
+       }
++      /* This is a deviation from RFC3168 since it states that:
++       * "When the TCP data sender is ready to set the CWR bit after reducing
++       * the congestion window, it SHOULD set the CWR bit only on the first
++       * new data packet that it transmits."
++       * We accept CWR on pure ACKs to be more robust
++       * with widely-deployed TCP implementations that do this.
++       */
++      tcp_ecn_accept_cwr(sk, skb);
++
+       /* We passed data and got it acked, remove any soft error
+        * log. Something worked...
+        */
+@@ -4771,8 +4781,6 @@ static void tcp_data_queue(struct sock *
+       skb_dst_drop(skb);
+       __skb_pull(skb, tcp_hdr(skb)->doff * 4);
+-      tcp_ecn_accept_cwr(sk, skb);
+-
+       tp->rx_opt.dsack = 0;
+       /*  Queue data for delivery to the user.
diff --git a/queue-5.4/tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch b/queue-5.4/tcp-grow-window-for-ooo-packets-only-for-sack-flows.patch
new file mode 100644 (file)
index 0000000..d717222
--- /dev/null
@@ -0,0 +1,94 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 15 Jun 2020 20:37:07 -0700
+Subject: tcp: grow window for OOO packets only for SACK flows
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 662051215c758ae8545451628816204ed6cd372d ]
+
+Back in 2013, we made a change that broke fast retransmit
+for non SACK flows.
+
+Indeed, for these flows, a sender needs to receive three duplicate
+ACK before starting fast retransmit. Sending ACK with different
+receive window do not count.
+
+Even if enabling SACK is strongly recommended these days,
+there still are some cases where it has to be disabled.
+
+Not increasing the window seems better than having to
+rely on RTO.
+
+After the fix, following packetdrill test gives :
+
+// Initialize connection
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+   +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+   +0 < . 1:1(0) ack 1 win 514
+
+   +0 accept(3, ..., ...) = 4
+
+   +0 < . 1:1001(1000) ack 1 win 514
+// Quick ack
+   +0 > . 1:1(0) ack 1001 win 264
+
+   +0 < . 2001:3001(1000) ack 1 win 514
+// DUPACK : Normally we should not change the window
+   +0 > . 1:1(0) ack 1001 win 264
+
+   +0 < . 3001:4001(1000) ack 1 win 514
+// DUPACK : Normally we should not change the window
+   +0 > . 1:1(0) ack 1001 win 264
+
+   +0 < . 4001:5001(1000) ack 1 win 514
+// DUPACK : Normally we should not change the window
+    +0 > . 1:1(0) ack 1001 win 264
+
+   +0 < . 1001:2001(1000) ack 1 win 514
+// Hole is repaired.
+   +0 > . 1:1(0) ack 5001 win 272
+
+Fixes: 4e4f1fc22681 ("tcp: properly increase rcv_ssthresh for ofo packets")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Venkat Venkatsubra <venkat.x.venkatsubra@oracle.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |   12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -4597,7 +4597,11 @@ static void tcp_data_queue_ofo(struct so
+       if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
+                                skb, &fragstolen)) {
+ coalesce_done:
+-              tcp_grow_window(sk, skb);
++              /* For non sack flows, do not grow window to force DUPACK
++               * and trigger fast retransmit.
++               */
++              if (tcp_is_sack(tp))
++                      tcp_grow_window(sk, skb);
+               kfree_skb_partial(skb, fragstolen);
+               skb = NULL;
+               goto add_sack;
+@@ -4681,7 +4685,11 @@ add_sack:
+               tcp_sack_new_ofo_skb(sk, seq, end_seq);
+ end:
+       if (skb) {
+-              tcp_grow_window(sk, skb);
++              /* For non sack flows, do not grow window to force DUPACK
++               * and trigger fast retransmit.
++               */
++              if (tcp_is_sack(tp))
++                      tcp_grow_window(sk, skb);
+               skb_condense(skb);
+               skb_set_owner_r(skb, sk);
+       }
diff --git a/queue-5.4/tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch b/queue-5.4/tcp_cubic-fix-spurious-hystart_delay-exit-upon-drop-in-min-rtt.patch
new file mode 100644 (file)
index 0000000..736ae27
--- /dev/null
@@ -0,0 +1,50 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: Neal Cardwell <ncardwell@google.com>
+Date: Wed, 24 Jun 2020 12:42:02 -0400
+Subject: tcp_cubic: fix spurious HYSTART_DELAY exit upon drop in min RTT
+
+From: Neal Cardwell <ncardwell@google.com>
+
+[ Upstream commit b344579ca8478598937215f7005d6c7b84d28aee ]
+
+Mirja Kuehlewind reported a bug in Linux TCP CUBIC Hystart, where
+Hystart HYSTART_DELAY mechanism can exit Slow Start spuriously on an
+ACK when the minimum rtt of a connection goes down. From inspection it
+is clear from the existing code that this could happen in an example
+like the following:
+
+o The first 8 RTT samples in a round trip are 150ms, resulting in a
+  curr_rtt of 150ms and a delay_min of 150ms.
+
+o The 9th RTT sample is 100ms. The curr_rtt does not change after the
+  first 8 samples, so curr_rtt remains 150ms. But delay_min can be
+  lowered at any time, so delay_min falls to 100ms. The code executes
+  the HYSTART_DELAY comparison between curr_rtt of 150ms and delay_min
+  of 100ms, and the curr_rtt is declared far enough above delay_min to
+  force a (spurious) exit of Slow start.
+
+The fix here is simple: allow every RTT sample in a round trip to
+lower the curr_rtt.
+
+Fixes: ae27e98a5152 ("[TCP] CUBIC v2.3")
+Reported-by: Mirja Kuehlewind <mirja.kuehlewind@ericsson.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_cubic.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/ipv4/tcp_cubic.c
++++ b/net/ipv4/tcp_cubic.c
+@@ -404,6 +404,8 @@ static void hystart_update(struct sock *
+       if (hystart_detect & HYSTART_DELAY) {
+               /* obtain the minimum delay of more than sampling packets */
++              if (ca->curr_rtt > delay)
++                      ca->curr_rtt = delay;
+               if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
+                       if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
+                               ca->curr_rtt = delay;
diff --git a/queue-5.4/tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch b/queue-5.4/tg3-driver-sleeps-indefinitely-when-eeh-errors-exceed-eeh_max_freezes.patch
new file mode 100644 (file)
index 0000000..d682d3b
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Sun 28 Jun 2020 02:22:38 PM CEST
+From: David Christensen <drc@linux.vnet.ibm.com>
+Date: Wed, 17 Jun 2020 11:51:17 -0700
+Subject: tg3: driver sleeps indefinitely when EEH errors exceed eeh_max_freezes
+
+From: David Christensen <drc@linux.vnet.ibm.com>
+
+[ Upstream commit 3a2656a211caf35e56afc9425e6e518fa52f7fbc ]
+
+The driver function tg3_io_error_detected() calls napi_disable twice,
+without an intervening napi_enable, when the number of EEH errors exceeds
+eeh_max_freezes, resulting in an indefinite sleep while holding rtnl_lock.
+
+Add check for pcierr_recovery which skips code already executed for the
+"Frozen" state.
+
+Signed-off-by: David Christensen <drc@linux.vnet.ibm.com>
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -18176,8 +18176,8 @@ static pci_ers_result_t tg3_io_error_det
+       rtnl_lock();
+-      /* We probably don't have netdev yet */
+-      if (!netdev || !netif_running(netdev))
++      /* Could be second call or maybe we don't have netdev yet */
++      if (!netdev || tp->pcierr_recovery || !netif_running(netdev))
+               goto done;
+       /* We needn't recover from permanent error */