]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 29 Jul 2020 09:36:50 +0000 (11:36 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 29 Jul 2020 09:36:50 +0000 (11:36 +0200)
added patches:
ax.25-fix-out-of-bounds-read-in-ax25_connect.patch
ax.25-prevent-integer-overflows-in-connect-and-sendmsg.patch
ax.25-prevent-out-of-bounds-read-in-ax25_sendmsg.patch
dev-defer-free-of-skbs-in-flush_backlog.patch
drivers-net-wan-x25_asy-fix-to-make-it-work.patch
ip6_gre-fix-null-ptr-deref-in-ip6gre_init_net.patch
net-sysfs-add-a-newline-when-printing-tx_timeout-by-sysfs.patch
net-udp-fix-wrong-clean-up-for-is_udplite-macro.patch
qrtr-orphan-socket-in-qrtr_release.patch
rtnetlink-fix-memory-net_device-leak-when-newlink-fails.patch
rxrpc-fix-sendmsg-returning-epipe-due-to-recvmsg-returning-enodata.patch
sctp-shrink-stream-outq-only-when-new-outcnt-old-outcnt.patch
sctp-shrink-stream-outq-when-fails-to-do-addstream-reconf.patch
tcp-allow-at-most-one-tlp-probe-per-flight.patch
udp-copy-has_conns-in-reuseport_grow.patch
udp-improve-load-balancing-for-so_reuseport.patch

17 files changed:
queue-5.4/ax.25-fix-out-of-bounds-read-in-ax25_connect.patch [new file with mode: 0644]
queue-5.4/ax.25-prevent-integer-overflows-in-connect-and-sendmsg.patch [new file with mode: 0644]
queue-5.4/ax.25-prevent-out-of-bounds-read-in-ax25_sendmsg.patch [new file with mode: 0644]
queue-5.4/dev-defer-free-of-skbs-in-flush_backlog.patch [new file with mode: 0644]
queue-5.4/drivers-net-wan-x25_asy-fix-to-make-it-work.patch [new file with mode: 0644]
queue-5.4/ip6_gre-fix-null-ptr-deref-in-ip6gre_init_net.patch [new file with mode: 0644]
queue-5.4/net-sysfs-add-a-newline-when-printing-tx_timeout-by-sysfs.patch [new file with mode: 0644]
queue-5.4/net-udp-fix-wrong-clean-up-for-is_udplite-macro.patch [new file with mode: 0644]
queue-5.4/qrtr-orphan-socket-in-qrtr_release.patch [new file with mode: 0644]
queue-5.4/rtnetlink-fix-memory-net_device-leak-when-newlink-fails.patch [new file with mode: 0644]
queue-5.4/rxrpc-fix-sendmsg-returning-epipe-due-to-recvmsg-returning-enodata.patch [new file with mode: 0644]
queue-5.4/sctp-shrink-stream-outq-only-when-new-outcnt-old-outcnt.patch [new file with mode: 0644]
queue-5.4/sctp-shrink-stream-outq-when-fails-to-do-addstream-reconf.patch [new file with mode: 0644]
queue-5.4/series [new file with mode: 0644]
queue-5.4/tcp-allow-at-most-one-tlp-probe-per-flight.patch [new file with mode: 0644]
queue-5.4/udp-copy-has_conns-in-reuseport_grow.patch [new file with mode: 0644]
queue-5.4/udp-improve-load-balancing-for-so_reuseport.patch [new file with mode: 0644]

diff --git a/queue-5.4/ax.25-fix-out-of-bounds-read-in-ax25_connect.patch b/queue-5.4/ax.25-fix-out-of-bounds-read-in-ax25_connect.patch
new file mode 100644 (file)
index 0000000..c4adf42
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Wed 29 Jul 2020 11:19:55 AM CEST
+From: Peilin Ye <yepeilin.cs@gmail.com>
+Date: Wed, 22 Jul 2020 11:19:01 -0400
+Subject: AX.25: Fix out-of-bounds read in ax25_connect()
+
+From: Peilin Ye <yepeilin.cs@gmail.com>
+
+[ Upstream commit 2f2a7ffad5c6cbf3d438e813cfdc88230e185ba6 ]
+
+Checks on `addr_len` and `fsa->fsa_ax25.sax25_ndigis` are insufficient.
+ax25_connect() can go out of bounds when `fsa->fsa_ax25.sax25_ndigis`
+equals to 7 or 8. Fix it.
+
+This issue has been reported as a KMSAN uninit-value bug, because in such
+a case, ax25_connect() reaches into the uninitialized portion of the
+`struct sockaddr_storage` statically allocated in __sys_connect().
+
+It is safe to remove `fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS` because
+`addr_len` is guaranteed to be less than or equal to
+`sizeof(struct full_sockaddr_ax25)`.
+
+Reported-by: syzbot+c82752228ed975b0a623@syzkaller.appspotmail.com
+Link: https://syzkaller.appspot.com/bug?id=55ef9d629f3b3d7d70b69558015b63b48d01af66
+Signed-off-by: Peilin Ye <yepeilin.cs@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ax25/af_ax25.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/ax25/af_ax25.c
++++ b/net/ax25/af_ax25.c
+@@ -1187,7 +1187,9 @@ static int __must_check ax25_connect(str
+       if (addr_len > sizeof(struct sockaddr_ax25) &&
+           fsa->fsa_ax25.sax25_ndigis != 0) {
+               /* Valid number of digipeaters ? */
+-              if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) {
++              if (fsa->fsa_ax25.sax25_ndigis < 1 ||
++                  addr_len < sizeof(struct sockaddr_ax25) +
++                  sizeof(ax25_address) * fsa->fsa_ax25.sax25_ndigis) {
+                       err = -EINVAL;
+                       goto out_release;
+               }
diff --git a/queue-5.4/ax.25-prevent-integer-overflows-in-connect-and-sendmsg.patch b/queue-5.4/ax.25-prevent-integer-overflows-in-connect-and-sendmsg.patch
new file mode 100644 (file)
index 0000000..9fd4ce6
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Thu, 23 Jul 2020 17:49:57 +0300
+Subject: AX.25: Prevent integer overflows in connect and sendmsg
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+[ Upstream commit 17ad73e941b71f3bec7523ea4e9cbc3752461c2d ]
+
+We recently added some bounds checking in ax25_connect() and
+ax25_sendmsg() and we so we removed the AX25_MAX_DIGIS checks because
+they were no longer required.
+
+Unfortunately, I believe they are required to prevent integer overflows
+so I have added them back.
+
+Fixes: 8885bb0621f0 ("AX.25: Prevent out-of-bounds read in ax25_sendmsg()")
+Fixes: 2f2a7ffad5c6 ("AX.25: Fix out-of-bounds read in ax25_connect()")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ax25/af_ax25.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/net/ax25/af_ax25.c
++++ b/net/ax25/af_ax25.c
+@@ -1188,6 +1188,7 @@ static int __must_check ax25_connect(str
+           fsa->fsa_ax25.sax25_ndigis != 0) {
+               /* Valid number of digipeaters ? */
+               if (fsa->fsa_ax25.sax25_ndigis < 1 ||
++                  fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS ||
+                   addr_len < sizeof(struct sockaddr_ax25) +
+                   sizeof(ax25_address) * fsa->fsa_ax25.sax25_ndigis) {
+                       err = -EINVAL;
+@@ -1509,7 +1510,9 @@ static int ax25_sendmsg(struct socket *s
+                       struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax;
+                       /* Valid number of digipeaters ? */
+-                      if (usax->sax25_ndigis < 1 || addr_len < sizeof(struct sockaddr_ax25) +
++                      if (usax->sax25_ndigis < 1 ||
++                          usax->sax25_ndigis > AX25_MAX_DIGIS ||
++                          addr_len < sizeof(struct sockaddr_ax25) +
+                           sizeof(ax25_address) * usax->sax25_ndigis) {
+                               err = -EINVAL;
+                               goto out;
diff --git a/queue-5.4/ax.25-prevent-out-of-bounds-read-in-ax25_sendmsg.patch b/queue-5.4/ax.25-prevent-out-of-bounds-read-in-ax25_sendmsg.patch
new file mode 100644 (file)
index 0000000..54bc9d5
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Wed 29 Jul 2020 11:19:55 AM CEST
+From: Peilin Ye <yepeilin.cs@gmail.com>
+Date: Wed, 22 Jul 2020 12:05:12 -0400
+Subject: AX.25: Prevent out-of-bounds read in ax25_sendmsg()
+
+From: Peilin Ye <yepeilin.cs@gmail.com>
+
+[ Upstream commit 8885bb0621f01a6c82be60a91e5fc0f6e2f71186 ]
+
+Checks on `addr_len` and `usax->sax25_ndigis` are insufficient.
+ax25_sendmsg() can go out of bounds when `usax->sax25_ndigis` equals to 7
+or 8. Fix it.
+
+It is safe to remove `usax->sax25_ndigis > AX25_MAX_DIGIS`, since
+`addr_len` is guaranteed to be less than or equal to
+`sizeof(struct full_sockaddr_ax25)`
+
+Signed-off-by: Peilin Ye <yepeilin.cs@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ax25/af_ax25.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ax25/af_ax25.c
++++ b/net/ax25/af_ax25.c
+@@ -1509,7 +1509,8 @@ static int ax25_sendmsg(struct socket *s
+                       struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax;
+                       /* Valid number of digipeaters ? */
+-                      if (usax->sax25_ndigis < 1 || usax->sax25_ndigis > AX25_MAX_DIGIS) {
++                      if (usax->sax25_ndigis < 1 || addr_len < sizeof(struct sockaddr_ax25) +
++                          sizeof(ax25_address) * usax->sax25_ndigis) {
+                               err = -EINVAL;
+                               goto out;
+                       }
diff --git a/queue-5.4/dev-defer-free-of-skbs-in-flush_backlog.patch b/queue-5.4/dev-defer-free-of-skbs-in-flush_backlog.patch
new file mode 100644 (file)
index 0000000..c06a5fa
--- /dev/null
@@ -0,0 +1,31 @@
+From foo@baz Wed 29 Jul 2020 11:19:55 AM CEST
+From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Date: Thu, 23 Jul 2020 11:31:48 -0600
+Subject: dev: Defer free of skbs in flush_backlog
+
+From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+
+[ Upstream commit 7df5cb75cfb8acf96c7f2342530eb41e0c11f4c3 ]
+
+IRQs are disabled when freeing skbs in input queue.
+Use the IRQ safe variant to free skbs here.
+
+Fixes: 145dd5f9c88f ("net: flush the softnet backlog in process context")
+Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/dev.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -5229,7 +5229,7 @@ static void flush_backlog(struct work_st
+       skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
+               if (skb->dev->reg_state == NETREG_UNREGISTERING) {
+                       __skb_unlink(skb, &sd->input_pkt_queue);
+-                      kfree_skb(skb);
++                      dev_kfree_skb_irq(skb);
+                       input_queue_head_incr(sd);
+               }
+       }
diff --git a/queue-5.4/drivers-net-wan-x25_asy-fix-to-make-it-work.patch b/queue-5.4/drivers-net-wan-x25_asy-fix-to-make-it-work.patch
new file mode 100644 (file)
index 0000000..e2afcbf
--- /dev/null
@@ -0,0 +1,102 @@
+From foo@baz Wed 29 Jul 2020 11:19:55 AM CEST
+From: Xie He <xie.he.0141@gmail.com>
+Date: Thu, 16 Jul 2020 16:44:33 -0700
+Subject: drivers/net/wan/x25_asy: Fix to make it work
+
+From: Xie He <xie.he.0141@gmail.com>
+
+[ Upstream commit 8fdcabeac39824fe67480fd9508d80161c541854 ]
+
+This driver is not working because of problems of its receiving code.
+This patch fixes it to make it work.
+
+When the driver receives an LAPB frame, it should first pass the frame
+to the LAPB module to process. After processing, the LAPB module passes
+the data (the packet) back to the driver, the driver should then add a
+one-byte pseudo header and pass the data to upper layers.
+
+The changes to the "x25_asy_bump" function and the
+"x25_asy_data_indication" function are to correctly implement this
+procedure.
+
+Also, the "x25_asy_unesc" function ignores any frame that is shorter
+than 3 bytes. However the shortest frames are 2-byte long. So we need
+to change it to allow 2-byte frames to pass.
+
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Martin Schiller <ms@dev.tdt.de>
+Signed-off-by: Xie He <xie.he.0141@gmail.com>
+Reviewed-by: Martin Schiller <ms@dev.tdt.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wan/x25_asy.c |   21 ++++++++++++++-------
+ 1 file changed, 14 insertions(+), 7 deletions(-)
+
+--- a/drivers/net/wan/x25_asy.c
++++ b/drivers/net/wan/x25_asy.c
+@@ -183,7 +183,7 @@ static inline void x25_asy_unlock(struct
+       netif_wake_queue(sl->dev);
+ }
+-/* Send one completely decapsulated IP datagram to the IP layer. */
++/* Send an LAPB frame to the LAPB module to process. */
+ static void x25_asy_bump(struct x25_asy *sl)
+ {
+@@ -195,13 +195,12 @@ static void x25_asy_bump(struct x25_asy
+       count = sl->rcount;
+       dev->stats.rx_bytes += count;
+-      skb = dev_alloc_skb(count+1);
++      skb = dev_alloc_skb(count);
+       if (skb == NULL) {
+               netdev_warn(sl->dev, "memory squeeze, dropping packet\n");
+               dev->stats.rx_dropped++;
+               return;
+       }
+-      skb_push(skb, 1);       /* LAPB internal control */
+       skb_put_data(skb, sl->rbuff, count);
+       skb->protocol = x25_type_trans(skb, sl->dev);
+       err = lapb_data_received(skb->dev, skb);
+@@ -209,7 +208,6 @@ static void x25_asy_bump(struct x25_asy
+               kfree_skb(skb);
+               printk(KERN_DEBUG "x25_asy: data received err - %d\n", err);
+       } else {
+-              netif_rx(skb);
+               dev->stats.rx_packets++;
+       }
+ }
+@@ -356,12 +354,21 @@ static netdev_tx_t x25_asy_xmit(struct s
+  */
+ /*
+- *    Called when I frame data arrives. We did the work above - throw it
+- *    at the net layer.
++ *    Called when I frame data arrive. We add a pseudo header for upper
++ *    layers and pass it to upper layers.
+  */
+ static int x25_asy_data_indication(struct net_device *dev, struct sk_buff *skb)
+ {
++      if (skb_cow(skb, 1)) {
++              kfree_skb(skb);
++              return NET_RX_DROP;
++      }
++      skb_push(skb, 1);
++      skb->data[0] = X25_IFACE_DATA;
++
++      skb->protocol = x25_type_trans(skb, dev);
++
+       return netif_rx(skb);
+ }
+@@ -657,7 +664,7 @@ static void x25_asy_unesc(struct x25_asy
+       switch (s) {
+       case X25_END:
+               if (!test_and_clear_bit(SLF_ERROR, &sl->flags) &&
+-                  sl->rcount > 2)
++                  sl->rcount >= 2)
+                       x25_asy_bump(sl);
+               clear_bit(SLF_ESCAPE, &sl->flags);
+               sl->rcount = 0;
diff --git a/queue-5.4/ip6_gre-fix-null-ptr-deref-in-ip6gre_init_net.patch b/queue-5.4/ip6_gre-fix-null-ptr-deref-in-ip6gre_init_net.patch
new file mode 100644 (file)
index 0000000..8ff4324
--- /dev/null
@@ -0,0 +1,83 @@
+From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST
+From: Wei Yongjun <weiyongjun1@huawei.com>
+Date: Mon, 13 Jul 2020 23:59:50 +0800
+Subject: ip6_gre: fix null-ptr-deref in ip6gre_init_net()
+
+From: Wei Yongjun <weiyongjun1@huawei.com>
+
+[ Upstream commit 46ef5b89ec0ecf290d74c4aee844f063933c4da4 ]
+
+KASAN report null-ptr-deref error when register_netdev() failed:
+
+KASAN: null-ptr-deref in range [0x00000000000003c0-0x00000000000003c7]
+CPU: 2 PID: 422 Comm: ip Not tainted 5.8.0-rc4+ #12
+Call Trace:
+ ip6gre_init_net+0x4ab/0x580
+ ? ip6gre_tunnel_uninit+0x3f0/0x3f0
+ ops_init+0xa8/0x3c0
+ setup_net+0x2de/0x7e0
+ ? rcu_read_lock_bh_held+0xb0/0xb0
+ ? ops_init+0x3c0/0x3c0
+ ? kasan_unpoison_shadow+0x33/0x40
+ ? __kasan_kmalloc.constprop.0+0xc2/0xd0
+ copy_net_ns+0x27d/0x530
+ create_new_namespaces+0x382/0xa30
+ unshare_nsproxy_namespaces+0xa1/0x1d0
+ ksys_unshare+0x39c/0x780
+ ? walk_process_tree+0x2a0/0x2a0
+ ? trace_hardirqs_on+0x4a/0x1b0
+ ? _raw_spin_unlock_irq+0x1f/0x30
+ ? syscall_trace_enter+0x1a7/0x330
+ ? do_syscall_64+0x1c/0xa0
+ __x64_sys_unshare+0x2d/0x40
+ do_syscall_64+0x56/0xa0
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+ip6gre_tunnel_uninit() has set 'ign->fb_tunnel_dev' to NULL, later
+access to ign->fb_tunnel_dev cause null-ptr-deref. Fix it by saving
+'ign->fb_tunnel_dev' to local variable ndev.
+
+Fixes: dafabb6590cb ("ip6_gre: fix use-after-free in ip6gre_tunnel_lookup()")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |   11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1560,17 +1560,18 @@ static void ip6gre_destroy_tunnels(struc
+ static int __net_init ip6gre_init_net(struct net *net)
+ {
+       struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
++      struct net_device *ndev;
+       int err;
+       if (!net_has_fallback_tunnels(net))
+               return 0;
+-      ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
+-                                        NET_NAME_UNKNOWN,
+-                                        ip6gre_tunnel_setup);
+-      if (!ign->fb_tunnel_dev) {
++      ndev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
++                          NET_NAME_UNKNOWN, ip6gre_tunnel_setup);
++      if (!ndev) {
+               err = -ENOMEM;
+               goto err_alloc_dev;
+       }
++      ign->fb_tunnel_dev = ndev;
+       dev_net_set(ign->fb_tunnel_dev, net);
+       /* FB netdevice is special: we have one, and only one per netns.
+        * Allowing to move it to another netns is clearly unsafe.
+@@ -1590,7 +1591,7 @@ static int __net_init ip6gre_init_net(st
+       return 0;
+ err_reg_dev:
+-      free_netdev(ign->fb_tunnel_dev);
++      free_netdev(ndev);
+ err_alloc_dev:
+       return err;
+ }
diff --git a/queue-5.4/net-sysfs-add-a-newline-when-printing-tx_timeout-by-sysfs.patch b/queue-5.4/net-sysfs-add-a-newline-when-printing-tx_timeout-by-sysfs.patch
new file mode 100644 (file)
index 0000000..927b28c
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST
+From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
+Date: Tue, 21 Jul 2020 15:02:57 +0800
+Subject: net-sysfs: add a newline when printing 'tx_timeout' by sysfs
+
+From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
+
+[ Upstream commit 9bb5fbea59f36a589ef886292549ca4052fe676c ]
+
+When I cat 'tx_timeout' by sysfs, it displays as follows. It's better to
+add a newline for easy reading.
+
+root@syzkaller:~# cat /sys/devices/virtual/net/lo/queues/tx-0/tx_timeout
+0root@syzkaller:~#
+
+Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/net-sysfs.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/core/net-sysfs.c
++++ b/net/core/net-sysfs.c
+@@ -1036,7 +1036,7 @@ static ssize_t tx_timeout_show(struct ne
+       trans_timeout = queue->trans_timeout;
+       spin_unlock_irq(&queue->_xmit_lock);
+-      return sprintf(buf, "%lu", trans_timeout);
++      return sprintf(buf, fmt_ulong, trans_timeout);
+ }
+ static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
diff --git a/queue-5.4/net-udp-fix-wrong-clean-up-for-is_udplite-macro.patch b/queue-5.4/net-udp-fix-wrong-clean-up-for-is_udplite-macro.patch
new file mode 100644 (file)
index 0000000..3939d55
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Tue, 21 Jul 2020 17:11:44 +0800
+Subject: net: udp: Fix wrong clean up for IS_UDPLITE macro
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit b0a422772fec29811e293c7c0e6f991c0fd9241d ]
+
+We can't use IS_UDPLITE to replace udp_sk->pcflag when UDPLITE_RECV_CC is
+checked.
+
+Fixes: b2bf1e2659b1 ("[UDP]: Clean up for IS_UDPLITE macro")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c |    2 +-
+ net/ipv6/udp.c |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -2045,7 +2045,7 @@ static int udp_queue_rcv_one_skb(struct
+       /*
+        *      UDP-Lite specific tests, ignored on UDP sockets
+        */
+-      if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
++      if ((up->pcflag & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
+               /*
+                * MIB statistics other than incrementing the error count are
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -643,7 +643,7 @@ static int udpv6_queue_rcv_one_skb(struc
+       /*
+        * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c).
+        */
+-      if ((is_udplite & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
++      if ((up->pcflag & UDPLITE_RECV_CC)  &&  UDP_SKB_CB(skb)->partial_cov) {
+               if (up->pcrlen == 0) {          /* full coverage was set  */
+                       net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n",
diff --git a/queue-5.4/qrtr-orphan-socket-in-qrtr_release.patch b/queue-5.4/qrtr-orphan-socket-in-qrtr_release.patch
new file mode 100644 (file)
index 0000000..72eb080
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Fri, 24 Jul 2020 09:45:51 -0700
+Subject: qrtr: orphan socket in qrtr_release()
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit af9f691f0f5bdd1ade65a7b84927639882d7c3e5 ]
+
+We have to detach sock from socket in qrtr_release(),
+otherwise skb->sk may still reference to this socket
+when the skb is released in tun->queue, particularly
+sk->sk_wq still points to &sock->wq, which leads to
+a UAF.
+
+Reported-and-tested-by: syzbot+6720d64f31c081c2f708@syzkaller.appspotmail.com
+Fixes: 28fb4e59a47d ("net: qrtr: Expose tunneling endpoint to user space")
+Cc: Bjorn Andersson <bjorn.andersson@linaro.org>
+Cc: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/qrtr/qrtr.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/qrtr/qrtr.c
++++ b/net/qrtr/qrtr.c
+@@ -1004,6 +1004,7 @@ static int qrtr_release(struct socket *s
+               sk->sk_state_change(sk);
+       sock_set_flag(sk, SOCK_DEAD);
++      sock_orphan(sk);
+       sock->sk = NULL;
+       if (!sock_flag(sk, SOCK_ZAPPED))
diff --git a/queue-5.4/rtnetlink-fix-memory-net_device-leak-when-newlink-fails.patch b/queue-5.4/rtnetlink-fix-memory-net_device-leak-when-newlink-fails.patch
new file mode 100644 (file)
index 0000000..89ff607
--- /dev/null
@@ -0,0 +1,63 @@
+From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST
+From: Weilong Chen <chenweilong@huawei.com>
+Date: Wed, 15 Jul 2020 20:58:10 +0800
+Subject: rtnetlink: Fix memory(net_device) leak when ->newlink fails
+
+From: Weilong Chen <chenweilong@huawei.com>
+
+[ Upstream commit cebb69754f37d68e1355a5e726fdac317bcda302 ]
+
+When vlan_newlink call register_vlan_dev fails, it might return error
+with dev->reg_state = NETREG_UNREGISTERED. The rtnl_newlink should
+free the memory. But currently rtnl_newlink only free the memory which
+state is NETREG_UNINITIALIZED.
+
+BUG: memory leak
+unreferenced object 0xffff8881051de000 (size 4096):
+  comm "syz-executor139", pid 560, jiffies 4294745346 (age 32.445s)
+  hex dump (first 32 bytes):
+    76 6c 61 6e 32 00 00 00 00 00 00 00 00 00 00 00  vlan2...........
+    00 45 28 03 81 88 ff ff 00 00 00 00 00 00 00 00  .E(.............
+  backtrace:
+    [<0000000047527e31>] kmalloc_node include/linux/slab.h:578 [inline]
+    [<0000000047527e31>] kvmalloc_node+0x33/0xd0 mm/util.c:574
+    [<000000002b59e3bc>] kvmalloc include/linux/mm.h:753 [inline]
+    [<000000002b59e3bc>] kvzalloc include/linux/mm.h:761 [inline]
+    [<000000002b59e3bc>] alloc_netdev_mqs+0x83/0xd90 net/core/dev.c:9929
+    [<000000006076752a>] rtnl_create_link+0x2c0/0xa20 net/core/rtnetlink.c:3067
+    [<00000000572b3be5>] __rtnl_newlink+0xc9c/0x1330 net/core/rtnetlink.c:3329
+    [<00000000e84ea553>] rtnl_newlink+0x66/0x90 net/core/rtnetlink.c:3397
+    [<0000000052c7c0a9>] rtnetlink_rcv_msg+0x540/0x990 net/core/rtnetlink.c:5460
+    [<000000004b5cb379>] netlink_rcv_skb+0x12b/0x3a0 net/netlink/af_netlink.c:2469
+    [<00000000c71c20d3>] netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline]
+    [<00000000c71c20d3>] netlink_unicast+0x4c6/0x690 net/netlink/af_netlink.c:1329
+    [<00000000cca72fa9>] netlink_sendmsg+0x735/0xcc0 net/netlink/af_netlink.c:1918
+    [<000000009221ebf7>] sock_sendmsg_nosec net/socket.c:652 [inline]
+    [<000000009221ebf7>] sock_sendmsg+0x109/0x140 net/socket.c:672
+    [<000000001c30ffe4>] ____sys_sendmsg+0x5f5/0x780 net/socket.c:2352
+    [<00000000b71ca6f3>] ___sys_sendmsg+0x11d/0x1a0 net/socket.c:2406
+    [<0000000007297384>] __sys_sendmsg+0xeb/0x1b0 net/socket.c:2439
+    [<000000000eb29b11>] do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:359
+    [<000000006839b4d0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Fixes: cb626bf566eb ("net-sysfs: Fix reference count leak")
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Weilong Chen <chenweilong@huawei.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3231,7 +3231,8 @@ replay:
+                */
+               if (err < 0) {
+                       /* If device is not registered at all, free it now */
+-                      if (dev->reg_state == NETREG_UNINITIALIZED)
++                      if (dev->reg_state == NETREG_UNINITIALIZED ||
++                          dev->reg_state == NETREG_UNREGISTERED)
+                               free_netdev(dev);
+                       goto out;
+               }
diff --git a/queue-5.4/rxrpc-fix-sendmsg-returning-epipe-due-to-recvmsg-returning-enodata.patch b/queue-5.4/rxrpc-fix-sendmsg-returning-epipe-due-to-recvmsg-returning-enodata.patch
new file mode 100644 (file)
index 0000000..86b7abb
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST
+From: David Howells <dhowells@redhat.com>
+Date: Mon, 20 Jul 2020 12:41:46 +0100
+Subject: rxrpc: Fix sendmsg() returning EPIPE due to recvmsg() returning ENODATA
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit 639f181f0ee20d3249dbc55f740f0167267180f0 ]
+
+rxrpc_sendmsg() returns EPIPE if there's an outstanding error, such as if
+rxrpc_recvmsg() indicating ENODATA if there's nothing for it to read.
+
+Change rxrpc_recvmsg() to return EAGAIN instead if there's nothing to read
+as this particular error doesn't get stored in ->sk_err by the networking
+core.
+
+Also change rxrpc_sendmsg() so that it doesn't fail with delayed receive
+errors (there's no way for it to report which call, if any, the error was
+caused by).
+
+Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both")
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/rxrpc/recvmsg.c |    2 +-
+ net/rxrpc/sendmsg.c |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/rxrpc/recvmsg.c
++++ b/net/rxrpc/recvmsg.c
+@@ -464,7 +464,7 @@ try_again:
+           list_empty(&rx->recvmsg_q) &&
+           rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
+               release_sock(&rx->sk);
+-              return -ENODATA;
++              return -EAGAIN;
+       }
+       if (list_empty(&rx->recvmsg_q)) {
+--- a/net/rxrpc/sendmsg.c
++++ b/net/rxrpc/sendmsg.c
+@@ -306,7 +306,7 @@ static int rxrpc_send_data(struct rxrpc_
+       /* this should be in poll */
+       sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+-      if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
++      if (sk->sk_shutdown & SEND_SHUTDOWN)
+               return -EPIPE;
+       more = msg->msg_flags & MSG_MORE;
diff --git a/queue-5.4/sctp-shrink-stream-outq-only-when-new-outcnt-old-outcnt.patch b/queue-5.4/sctp-shrink-stream-outq-only-when-new-outcnt-old-outcnt.patch
new file mode 100644 (file)
index 0000000..b2adf48
--- /dev/null
@@ -0,0 +1,64 @@
+From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 22 Jul 2020 23:52:11 +0800
+Subject: sctp: shrink stream outq only when new outcnt < old outcnt
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 8f13399db22f909a35735bf8ae2f932e0c8f0e30 ]
+
+It's not necessary to go list_for_each for outq->out_chunk_list
+when new outcnt >= old outcnt, as no chunk with higher sid than
+new (outcnt - 1) exists in the outqueue.
+
+While at it, also move the list_for_each code in a new function
+sctp_stream_shrink_out(), which will be used in the next patch.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/stream.c |   21 ++++++++++++++-------
+ 1 file changed, 14 insertions(+), 7 deletions(-)
+
+--- a/net/sctp/stream.c
++++ b/net/sctp/stream.c
+@@ -22,17 +22,11 @@
+ #include <net/sctp/sm.h>
+ #include <net/sctp/stream_sched.h>
+-/* Migrates chunks from stream queues to new stream queues if needed,
+- * but not across associations. Also, removes those chunks to streams
+- * higher than the new max.
+- */
+-static void sctp_stream_outq_migrate(struct sctp_stream *stream,
+-                                   struct sctp_stream *new, __u16 outcnt)
++static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt)
+ {
+       struct sctp_association *asoc;
+       struct sctp_chunk *ch, *temp;
+       struct sctp_outq *outq;
+-      int i;
+       asoc = container_of(stream, struct sctp_association, stream);
+       outq = &asoc->outqueue;
+@@ -56,6 +50,19 @@ static void sctp_stream_outq_migrate(str
+               sctp_chunk_free(ch);
+       }
++}
++
++/* Migrates chunks from stream queues to new stream queues if needed,
++ * but not across associations. Also, removes those chunks to streams
++ * higher than the new max.
++ */
++static void sctp_stream_outq_migrate(struct sctp_stream *stream,
++                                   struct sctp_stream *new, __u16 outcnt)
++{
++      int i;
++
++      if (stream->outcnt > outcnt)
++              sctp_stream_shrink_out(stream, outcnt);
+       if (new) {
+               /* Here we actually move the old ext stuff into the new
diff --git a/queue-5.4/sctp-shrink-stream-outq-when-fails-to-do-addstream-reconf.patch b/queue-5.4/sctp-shrink-stream-outq-when-fails-to-do-addstream-reconf.patch
new file mode 100644 (file)
index 0000000..145c62c
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 22 Jul 2020 23:52:12 +0800
+Subject: sctp: shrink stream outq when fails to do addstream reconf
+
+From: Xin Long <lucien.xin@gmail.com>
+
+[ Upstream commit 3ecdda3e9ad837cf9cb41b6faa11b1af3a5abc0c ]
+
+When adding a stream with stream reconf, the new stream firstly is in
+CLOSED state but new out chunks can still be enqueued. Then once gets
+the confirmation from the peer, the state will change to OPEN.
+
+However, if the peer denies, it needs to roll back the stream. But when
+doing that, it only sets the stream outcnt back, and the chunks already
+in the new stream don't get purged. It caused these chunks can still be
+dequeued in sctp_outq_dequeue_data().
+
+As its stream is still in CLOSE, the chunk will be enqueued to the head
+again by sctp_outq_head_data(). This chunk will never be sent out, and
+the chunks after it can never be dequeued. The assoc will be 'hung' in
+a dead loop of sending this chunk.
+
+To fix it, this patch is to purge these chunks already in the new
+stream by calling sctp_stream_shrink_out() when failing to do the
+addstream reconf.
+
+Fixes: 11ae76e67a17 ("sctp: implement receiver-side procedures for the Reconf Response Parameter")
+Reported-by: Ying Xu <yinxu@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/stream.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/stream.c
++++ b/net/sctp/stream.c
+@@ -1045,11 +1045,13 @@ struct sctp_chunk *sctp_process_strreset
+               nums = ntohs(addstrm->number_of_streams);
+               number = stream->outcnt - nums;
+-              if (result == SCTP_STRRESET_PERFORMED)
++              if (result == SCTP_STRRESET_PERFORMED) {
+                       for (i = number; i < stream->outcnt; i++)
+                               SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
+-              else
++              } else {
++                      sctp_stream_shrink_out(stream, number);
+                       stream->outcnt = number;
++              }
+               *evp = sctp_ulpevent_make_stream_change_event(asoc, flags,
+                       0, nums, GFP_ATOMIC);
diff --git a/queue-5.4/series b/queue-5.4/series
new file mode 100644 (file)
index 0000000..9bf5718
--- /dev/null
@@ -0,0 +1,16 @@
+ax.25-fix-out-of-bounds-read-in-ax25_connect.patch
+ax.25-prevent-out-of-bounds-read-in-ax25_sendmsg.patch
+dev-defer-free-of-skbs-in-flush_backlog.patch
+drivers-net-wan-x25_asy-fix-to-make-it-work.patch
+ip6_gre-fix-null-ptr-deref-in-ip6gre_init_net.patch
+net-sysfs-add-a-newline-when-printing-tx_timeout-by-sysfs.patch
+net-udp-fix-wrong-clean-up-for-is_udplite-macro.patch
+qrtr-orphan-socket-in-qrtr_release.patch
+rtnetlink-fix-memory-net_device-leak-when-newlink-fails.patch
+rxrpc-fix-sendmsg-returning-epipe-due-to-recvmsg-returning-enodata.patch
+tcp-allow-at-most-one-tlp-probe-per-flight.patch
+ax.25-prevent-integer-overflows-in-connect-and-sendmsg.patch
+sctp-shrink-stream-outq-only-when-new-outcnt-old-outcnt.patch
+sctp-shrink-stream-outq-when-fails-to-do-addstream-reconf.patch
+udp-copy-has_conns-in-reuseport_grow.patch
+udp-improve-load-balancing-for-so_reuseport.patch
diff --git a/queue-5.4/tcp-allow-at-most-one-tlp-probe-per-flight.patch b/queue-5.4/tcp-allow-at-most-one-tlp-probe-per-flight.patch
new file mode 100644 (file)
index 0000000..0e9d167
--- /dev/null
@@ -0,0 +1,124 @@
+From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST
+From: Yuchung Cheng <ycheng@google.com>
+Date: Thu, 23 Jul 2020 12:00:06 -0700
+Subject: tcp: allow at most one TLP probe per flight
+
+From: Yuchung Cheng <ycheng@google.com>
+
+[ Upstream commit 76be93fc0702322179bb0ea87295d820ee46ad14 ]
+
+Previously TLP may send multiple probes of new data in one
+flight. This happens when the sender is cwnd limited. After the
+initial TLP containing new data is sent, the sender receives another
+ACK that acks partial inflight.  It may re-arm another TLP timer
+to send more, if no further ACK returns before the next TLP timeout
+(PTO) expires. The sender may send in theory a large amount of TLP
+until send queue is depleted. This only happens if the sender sees
+such irregular uncommon ACK pattern. But it is generally undesirable
+behavior during congestion especially.
+
+The original TLP design restrict only one TLP probe per inflight as
+published in "Reducing Web Latency: the Virtue of Gentle Aggression",
+SIGCOMM 2013. This patch changes TLP to send at most one probe
+per inflight.
+
+Note that if the sender is app-limited, TLP retransmits old data
+and did not have this issue.
+
+Signed-off-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: Neal Cardwell <ncardwell@google.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/tcp.h   |    4 +++-
+ net/ipv4/tcp_input.c  |   11 ++++++-----
+ net/ipv4/tcp_output.c |   13 ++++++++-----
+ 3 files changed, 17 insertions(+), 11 deletions(-)
+
+--- a/include/linux/tcp.h
++++ b/include/linux/tcp.h
+@@ -216,6 +216,8 @@ struct tcp_sock {
+       } rack;
+       u16     advmss;         /* Advertised MSS                       */
+       u8      compressed_ack;
++      u8      tlp_retrans:1,  /* TLP is a retransmission */
++              unused_1:7;
+       u32     chrono_start;   /* Start time in jiffies of a TCP chrono */
+       u32     chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
+       u8      chrono_type:2,  /* current chronograph type */
+@@ -238,7 +240,7 @@ struct tcp_sock {
+               save_syn:1,     /* Save headers of SYN packet */
+               is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
+               syn_smc:1;      /* SYN includes SMC */
+-      u32     tlp_high_seq;   /* snd_nxt at the time of TLP retransmit. */
++      u32     tlp_high_seq;   /* snd_nxt at the time of TLP */
+       u32     tcp_tx_delay;   /* delay (in usec) added to TX packets */
+       u64     tcp_wstamp_ns;  /* departure time for next sent data packet */
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -3505,10 +3505,8 @@ static void tcp_replace_ts_recent(struct
+       }
+ }
+-/* This routine deals with acks during a TLP episode.
+- * We mark the end of a TLP episode on receiving TLP dupack or when
+- * ack is after tlp_high_seq.
+- * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
++/* This routine deals with acks during a TLP episode and ends an episode by
++ * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack
+  */
+ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
+ {
+@@ -3517,7 +3515,10 @@ static void tcp_process_tlp_ack(struct s
+       if (before(ack, tp->tlp_high_seq))
+               return;
+-      if (flag & FLAG_DSACKING_ACK) {
++      if (!tp->tlp_retrans) {
++              /* TLP of new data has been acknowledged */
++              tp->tlp_high_seq = 0;
++      } else if (flag & FLAG_DSACKING_ACK) {
+               /* This DSACK means original and TLP probe arrived; no loss */
+               tp->tlp_high_seq = 0;
+       } else if (after(ack, tp->tlp_high_seq)) {
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2564,6 +2564,11 @@ void tcp_send_loss_probe(struct sock *sk
+       int pcount;
+       int mss = tcp_current_mss(sk);
++      /* At most one outstanding TLP */
++      if (tp->tlp_high_seq)
++              goto rearm_timer;
++
++      tp->tlp_retrans = 0;
+       skb = tcp_send_head(sk);
+       if (skb && tcp_snd_wnd_test(tp, skb, mss)) {
+               pcount = tp->packets_out;
+@@ -2581,10 +2586,6 @@ void tcp_send_loss_probe(struct sock *sk
+               return;
+       }
+-      /* At most one outstanding TLP retransmission. */
+-      if (tp->tlp_high_seq)
+-              goto rearm_timer;
+-
+       if (skb_still_in_host_queue(sk, skb))
+               goto rearm_timer;
+@@ -2606,10 +2607,12 @@ void tcp_send_loss_probe(struct sock *sk
+       if (__tcp_retransmit_skb(sk, skb, 1))
+               goto rearm_timer;
++      tp->tlp_retrans = 1;
++
++probe_sent:
+       /* Record snd_nxt for loss detection. */
+       tp->tlp_high_seq = tp->snd_nxt;
+-probe_sent:
+       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
+       /* Reset s.t. tcp_rearm_rto will restart timer from now */
+       inet_csk(sk)->icsk_pending = 0;
diff --git a/queue-5.4/udp-copy-has_conns-in-reuseport_grow.patch b/queue-5.4/udp-copy-has_conns-in-reuseport_grow.patch
new file mode 100644 (file)
index 0000000..83d8717
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST
+From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+Date: Tue, 21 Jul 2020 15:15:30 +0900
+Subject: udp: Copy has_conns in reuseport_grow().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+
+[ Upstream commit f2b2c55e512879a05456eaf5de4d1ed2f7757509 ]
+
+If an unconnected socket in a UDP reuseport group connect()s, has_conns is
+set to 1. Then, when a packet is received, udp[46]_lib_lookup2() scans all
+sockets in udp_hslot looking for the connected socket with the highest
+score.
+
+However, when the number of sockets bound to the port exceeds max_socks,
+reuseport_grow() resets has_conns to 0. It can cause udp[46]_lib_lookup2()
+to return without scanning all sockets, resulting in that packets sent to
+connected sockets may be distributed to unconnected sockets.
+
+Therefore, reuseport_grow() should copy has_conns.
+
+Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
+CC: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Benjamin Herrenschmidt <benh@amazon.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+Acked-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_reuseport.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/core/sock_reuseport.c
++++ b/net/core/sock_reuseport.c
+@@ -112,6 +112,7 @@ static struct sock_reuseport *reuseport_
+       more_reuse->prog = reuse->prog;
+       more_reuse->reuseport_id = reuse->reuseport_id;
+       more_reuse->bind_inany = reuse->bind_inany;
++      more_reuse->has_conns = reuse->has_conns;
+       memcpy(more_reuse->socks, reuse->socks,
+              reuse->num_socks * sizeof(struct sock *));
diff --git a/queue-5.4/udp-improve-load-balancing-for-so_reuseport.patch b/queue-5.4/udp-improve-load-balancing-for-so_reuseport.patch
new file mode 100644 (file)
index 0000000..7612cc7
--- /dev/null
@@ -0,0 +1,121 @@
+From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST
+From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+Date: Tue, 21 Jul 2020 15:15:31 +0900
+Subject: udp: Improve load balancing for SO_REUSEPORT.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+
+[ Upstream commit efc6b6f6c3113e8b203b9debfb72d81e0f3dcace ]
+
+Currently, SO_REUSEPORT does not work well if connected sockets are in a
+UDP reuseport group.
+
+Then reuseport_has_conns() returns true and the result of
+reuseport_select_sock() is discarded. Also, unconnected sockets have the
+same score, hence only does the first unconnected socket in udp_hslot
+always receive all packets sent to unconnected sockets.
+
+So, the result of reuseport_select_sock() should be used for load
+balancing.
+
+The noteworthy point is that the unconnected sockets placed after
+connected sockets in sock_reuseport.socks will receive more packets than
+others because of the algorithm in reuseport_select_sock().
+
+    index | connected | reciprocal_scale | result
+    ---------------------------------------------
+    0     | no        | 20%              | 40%
+    1     | no        | 20%              | 20%
+    2     | yes       | 20%              | 0%
+    3     | no        | 20%              | 40%
+    4     | yes       | 20%              | 0%
+
+If most of the sockets are connected, this can be a problem, but it still
+works better than now.
+
+Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
+CC: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Benjamin Herrenschmidt <benh@amazon.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+Acked-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/udp.c |   15 +++++++++------
+ net/ipv6/udp.c |   15 +++++++++------
+ 2 files changed, 18 insertions(+), 12 deletions(-)
+
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -413,7 +413,7 @@ static struct sock *udp4_lib_lookup2(str
+                                    struct udp_hslot *hslot2,
+                                    struct sk_buff *skb)
+ {
+-      struct sock *sk, *result;
++      struct sock *sk, *result, *reuseport_result;
+       int score, badness;
+       u32 hash = 0;
+@@ -423,17 +423,20 @@ static struct sock *udp4_lib_lookup2(str
+               score = compute_score(sk, net, saddr, sport,
+                                     daddr, hnum, dif, sdif);
+               if (score > badness) {
++                      reuseport_result = NULL;
++
+                       if (sk->sk_reuseport &&
+                           sk->sk_state != TCP_ESTABLISHED) {
+                               hash = udp_ehashfn(net, daddr, hnum,
+                                                  saddr, sport);
+-                              result = reuseport_select_sock(sk, hash, skb,
+-                                                      sizeof(struct udphdr));
+-                              if (result && !reuseport_has_conns(sk, false))
+-                                      return result;
++                              reuseport_result = reuseport_select_sock(sk, hash, skb,
++                                                                       sizeof(struct udphdr));
++                              if (reuseport_result && !reuseport_has_conns(sk, false))
++                                      return reuseport_result;
+                       }
++
++                      result = reuseport_result ? : sk;
+                       badness = score;
+-                      result = sk;
+               }
+       }
+       return result;
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -148,7 +148,7 @@ static struct sock *udp6_lib_lookup2(str
+               int dif, int sdif, struct udp_hslot *hslot2,
+               struct sk_buff *skb)
+ {
+-      struct sock *sk, *result;
++      struct sock *sk, *result, *reuseport_result;
+       int score, badness;
+       u32 hash = 0;
+@@ -158,17 +158,20 @@ static struct sock *udp6_lib_lookup2(str
+               score = compute_score(sk, net, saddr, sport,
+                                     daddr, hnum, dif, sdif);
+               if (score > badness) {
++                      reuseport_result = NULL;
++
+                       if (sk->sk_reuseport &&
+                           sk->sk_state != TCP_ESTABLISHED) {
+                               hash = udp6_ehashfn(net, daddr, hnum,
+                                                   saddr, sport);
+-                              result = reuseport_select_sock(sk, hash, skb,
+-                                                      sizeof(struct udphdr));
+-                              if (result && !reuseport_has_conns(sk, false))
+-                                      return result;
++                              reuseport_result = reuseport_select_sock(sk, hash, skb,
++                                                                       sizeof(struct udphdr));
++                              if (reuseport_result && !reuseport_has_conns(sk, false))
++                                      return reuseport_result;
+                       }
+-                      result = sk;
++
++                      result = reuseport_result ? : sk;
+                       badness = score;
+               }
+       }