From e6a4080de7bbf3b3c852b1c24503b229af7d1cb8 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 29 Jul 2020 11:36:50 +0200 Subject: [PATCH] 5.4-stable patches added patches: ax.25-fix-out-of-bounds-read-in-ax25_connect.patch ax.25-prevent-integer-overflows-in-connect-and-sendmsg.patch ax.25-prevent-out-of-bounds-read-in-ax25_sendmsg.patch dev-defer-free-of-skbs-in-flush_backlog.patch drivers-net-wan-x25_asy-fix-to-make-it-work.patch ip6_gre-fix-null-ptr-deref-in-ip6gre_init_net.patch net-sysfs-add-a-newline-when-printing-tx_timeout-by-sysfs.patch net-udp-fix-wrong-clean-up-for-is_udplite-macro.patch qrtr-orphan-socket-in-qrtr_release.patch rtnetlink-fix-memory-net_device-leak-when-newlink-fails.patch rxrpc-fix-sendmsg-returning-epipe-due-to-recvmsg-returning-enodata.patch sctp-shrink-stream-outq-only-when-new-outcnt-old-outcnt.patch sctp-shrink-stream-outq-when-fails-to-do-addstream-reconf.patch tcp-allow-at-most-one-tlp-probe-per-flight.patch udp-copy-has_conns-in-reuseport_grow.patch udp-improve-load-balancing-for-so_reuseport.patch --- ...x-out-of-bounds-read-in-ax25_connect.patch | 43 ++++++ ...ger-overflows-in-connect-and-sendmsg.patch | 46 +++++++ ...t-out-of-bounds-read-in-ax25_sendmsg.patch | 36 +++++ ...-defer-free-of-skbs-in-flush_backlog.patch | 31 +++++ ...-net-wan-x25_asy-fix-to-make-it-work.patch | 102 ++++++++++++++ ...ix-null-ptr-deref-in-ip6gre_init_net.patch | 83 ++++++++++++ ...ne-when-printing-tx_timeout-by-sysfs.patch | 33 +++++ ...-wrong-clean-up-for-is_udplite-macro.patch | 43 ++++++ .../qrtr-orphan-socket-in-qrtr_release.patch | 37 ++++++ ...y-net_device-leak-when-newlink-fails.patch | 63 +++++++++ ...ipe-due-to-recvmsg-returning-enodata.patch | 51 +++++++ ...outq-only-when-new-outcnt-old-outcnt.patch | 64 +++++++++ ...tq-when-fails-to-do-addstream-reconf.patch | 54 ++++++++ queue-5.4/series | 16 +++ ...low-at-most-one-tlp-probe-per-flight.patch | 124 ++++++++++++++++++ ...udp-copy-has_conns-in-reuseport_grow.patch | 42 ++++++ ...rove-load-balancing-for-so_reuseport.patch | 121 +++++++++++++++++ 17 files changed, 989 insertions(+) create mode 100644 queue-5.4/ax.25-fix-out-of-bounds-read-in-ax25_connect.patch create mode 100644 queue-5.4/ax.25-prevent-integer-overflows-in-connect-and-sendmsg.patch create mode 100644 queue-5.4/ax.25-prevent-out-of-bounds-read-in-ax25_sendmsg.patch create mode 100644 queue-5.4/dev-defer-free-of-skbs-in-flush_backlog.patch create mode 100644 queue-5.4/drivers-net-wan-x25_asy-fix-to-make-it-work.patch create mode 100644 queue-5.4/ip6_gre-fix-null-ptr-deref-in-ip6gre_init_net.patch create mode 100644 queue-5.4/net-sysfs-add-a-newline-when-printing-tx_timeout-by-sysfs.patch create mode 100644 queue-5.4/net-udp-fix-wrong-clean-up-for-is_udplite-macro.patch create mode 100644 queue-5.4/qrtr-orphan-socket-in-qrtr_release.patch create mode 100644 queue-5.4/rtnetlink-fix-memory-net_device-leak-when-newlink-fails.patch create mode 100644 queue-5.4/rxrpc-fix-sendmsg-returning-epipe-due-to-recvmsg-returning-enodata.patch create mode 100644 queue-5.4/sctp-shrink-stream-outq-only-when-new-outcnt-old-outcnt.patch create mode 100644 queue-5.4/sctp-shrink-stream-outq-when-fails-to-do-addstream-reconf.patch create mode 100644 queue-5.4/series create mode 100644 queue-5.4/tcp-allow-at-most-one-tlp-probe-per-flight.patch create mode 100644 queue-5.4/udp-copy-has_conns-in-reuseport_grow.patch create mode 100644 queue-5.4/udp-improve-load-balancing-for-so_reuseport.patch diff --git a/queue-5.4/ax.25-fix-out-of-bounds-read-in-ax25_connect.patch b/queue-5.4/ax.25-fix-out-of-bounds-read-in-ax25_connect.patch new file mode 100644 index 00000000000..c4adf422c8f --- /dev/null +++ b/queue-5.4/ax.25-fix-out-of-bounds-read-in-ax25_connect.patch @@ -0,0 +1,43 @@ +From foo@baz Wed 29 Jul 2020 11:19:55 AM CEST +From: Peilin Ye +Date: Wed, 22 Jul 2020 11:19:01 -0400 +Subject: AX.25: Fix out-of-bounds read in ax25_connect() + +From: Peilin Ye + +[ Upstream commit 2f2a7ffad5c6cbf3d438e813cfdc88230e185ba6 ] + +Checks on `addr_len` and `fsa->fsa_ax25.sax25_ndigis` are insufficient. +ax25_connect() can go out of bounds when `fsa->fsa_ax25.sax25_ndigis` +equals to 7 or 8. Fix it. + +This issue has been reported as a KMSAN uninit-value bug, because in such +a case, ax25_connect() reaches into the uninitialized portion of the +`struct sockaddr_storage` statically allocated in __sys_connect(). + +It is safe to remove `fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS` because +`addr_len` is guaranteed to be less than or equal to +`sizeof(struct full_sockaddr_ax25)`. + +Reported-by: syzbot+c82752228ed975b0a623@syzkaller.appspotmail.com +Link: https://syzkaller.appspot.com/bug?id=55ef9d629f3b3d7d70b69558015b63b48d01af66 +Signed-off-by: Peilin Ye +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ax25/af_ax25.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ax25/af_ax25.c ++++ b/net/ax25/af_ax25.c +@@ -1187,7 +1187,9 @@ static int __must_check ax25_connect(str + if (addr_len > sizeof(struct sockaddr_ax25) && + fsa->fsa_ax25.sax25_ndigis != 0) { + /* Valid number of digipeaters ? */ +- if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) { ++ if (fsa->fsa_ax25.sax25_ndigis < 1 || ++ addr_len < sizeof(struct sockaddr_ax25) + ++ sizeof(ax25_address) * fsa->fsa_ax25.sax25_ndigis) { + err = -EINVAL; + goto out_release; + } diff --git a/queue-5.4/ax.25-prevent-integer-overflows-in-connect-and-sendmsg.patch b/queue-5.4/ax.25-prevent-integer-overflows-in-connect-and-sendmsg.patch new file mode 100644 index 00000000000..9fd4ce6bf7b --- /dev/null +++ b/queue-5.4/ax.25-prevent-integer-overflows-in-connect-and-sendmsg.patch @@ -0,0 +1,46 @@ +From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST +From: Dan Carpenter +Date: Thu, 23 Jul 2020 17:49:57 +0300 +Subject: AX.25: Prevent integer overflows in connect and sendmsg + +From: Dan Carpenter + +[ Upstream commit 17ad73e941b71f3bec7523ea4e9cbc3752461c2d ] + +We recently added some bounds checking in ax25_connect() and +ax25_sendmsg() and we so we removed the AX25_MAX_DIGIS checks because +they were no longer required. + +Unfortunately, I believe they are required to prevent integer overflows +so I have added them back. + +Fixes: 8885bb0621f0 ("AX.25: Prevent out-of-bounds read in ax25_sendmsg()") +Fixes: 2f2a7ffad5c6 ("AX.25: Fix out-of-bounds read in ax25_connect()") +Signed-off-by: Dan Carpenter +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ax25/af_ax25.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/ax25/af_ax25.c ++++ b/net/ax25/af_ax25.c +@@ -1188,6 +1188,7 @@ static int __must_check ax25_connect(str + fsa->fsa_ax25.sax25_ndigis != 0) { + /* Valid number of digipeaters ? */ + if (fsa->fsa_ax25.sax25_ndigis < 1 || ++ fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS || + addr_len < sizeof(struct sockaddr_ax25) + + sizeof(ax25_address) * fsa->fsa_ax25.sax25_ndigis) { + err = -EINVAL; +@@ -1509,7 +1510,9 @@ static int ax25_sendmsg(struct socket *s + struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax; + + /* Valid number of digipeaters ? */ +- if (usax->sax25_ndigis < 1 || addr_len < sizeof(struct sockaddr_ax25) + ++ if (usax->sax25_ndigis < 1 || ++ usax->sax25_ndigis > AX25_MAX_DIGIS || ++ addr_len < sizeof(struct sockaddr_ax25) + + sizeof(ax25_address) * usax->sax25_ndigis) { + err = -EINVAL; + goto out; diff --git a/queue-5.4/ax.25-prevent-out-of-bounds-read-in-ax25_sendmsg.patch b/queue-5.4/ax.25-prevent-out-of-bounds-read-in-ax25_sendmsg.patch new file mode 100644 index 00000000000..54bc9d5d0ed --- /dev/null +++ b/queue-5.4/ax.25-prevent-out-of-bounds-read-in-ax25_sendmsg.patch @@ -0,0 +1,36 @@ +From foo@baz Wed 29 Jul 2020 11:19:55 AM CEST +From: Peilin Ye +Date: Wed, 22 Jul 2020 12:05:12 -0400 +Subject: AX.25: Prevent out-of-bounds read in ax25_sendmsg() + +From: Peilin Ye + +[ Upstream commit 8885bb0621f01a6c82be60a91e5fc0f6e2f71186 ] + +Checks on `addr_len` and `usax->sax25_ndigis` are insufficient. +ax25_sendmsg() can go out of bounds when `usax->sax25_ndigis` equals to 7 +or 8. Fix it. + +It is safe to remove `usax->sax25_ndigis > AX25_MAX_DIGIS`, since +`addr_len` is guaranteed to be less than or equal to +`sizeof(struct full_sockaddr_ax25)` + +Signed-off-by: Peilin Ye +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ax25/af_ax25.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ax25/af_ax25.c ++++ b/net/ax25/af_ax25.c +@@ -1509,7 +1509,8 @@ static int ax25_sendmsg(struct socket *s + struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax; + + /* Valid number of digipeaters ? */ +- if (usax->sax25_ndigis < 1 || usax->sax25_ndigis > AX25_MAX_DIGIS) { ++ if (usax->sax25_ndigis < 1 || addr_len < sizeof(struct sockaddr_ax25) + ++ sizeof(ax25_address) * usax->sax25_ndigis) { + err = -EINVAL; + goto out; + } diff --git a/queue-5.4/dev-defer-free-of-skbs-in-flush_backlog.patch b/queue-5.4/dev-defer-free-of-skbs-in-flush_backlog.patch new file mode 100644 index 00000000000..c06a5fa5be0 --- /dev/null +++ b/queue-5.4/dev-defer-free-of-skbs-in-flush_backlog.patch @@ -0,0 +1,31 @@ +From foo@baz Wed 29 Jul 2020 11:19:55 AM CEST +From: Subash Abhinov Kasiviswanathan +Date: Thu, 23 Jul 2020 11:31:48 -0600 +Subject: dev: Defer free of skbs in flush_backlog + +From: Subash Abhinov Kasiviswanathan + +[ Upstream commit 7df5cb75cfb8acf96c7f2342530eb41e0c11f4c3 ] + +IRQs are disabled when freeing skbs in input queue. +Use the IRQ safe variant to free skbs here. + +Fixes: 145dd5f9c88f ("net: flush the softnet backlog in process context") +Signed-off-by: Subash Abhinov Kasiviswanathan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -5229,7 +5229,7 @@ static void flush_backlog(struct work_st + skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { + if (skb->dev->reg_state == NETREG_UNREGISTERING) { + __skb_unlink(skb, &sd->input_pkt_queue); +- kfree_skb(skb); ++ dev_kfree_skb_irq(skb); + input_queue_head_incr(sd); + } + } diff --git a/queue-5.4/drivers-net-wan-x25_asy-fix-to-make-it-work.patch b/queue-5.4/drivers-net-wan-x25_asy-fix-to-make-it-work.patch new file mode 100644 index 00000000000..e2afcbf24ea --- /dev/null +++ b/queue-5.4/drivers-net-wan-x25_asy-fix-to-make-it-work.patch @@ -0,0 +1,102 @@ +From foo@baz Wed 29 Jul 2020 11:19:55 AM CEST +From: Xie He +Date: Thu, 16 Jul 2020 16:44:33 -0700 +Subject: drivers/net/wan/x25_asy: Fix to make it work + +From: Xie He + +[ Upstream commit 8fdcabeac39824fe67480fd9508d80161c541854 ] + +This driver is not working because of problems of its receiving code. +This patch fixes it to make it work. + +When the driver receives an LAPB frame, it should first pass the frame +to the LAPB module to process. After processing, the LAPB module passes +the data (the packet) back to the driver, the driver should then add a +one-byte pseudo header and pass the data to upper layers. + +The changes to the "x25_asy_bump" function and the +"x25_asy_data_indication" function are to correctly implement this +procedure. + +Also, the "x25_asy_unesc" function ignores any frame that is shorter +than 3 bytes. However the shortest frames are 2-byte long. So we need +to change it to allow 2-byte frames to pass. + +Cc: Eric Dumazet +Cc: Martin Schiller +Signed-off-by: Xie He +Reviewed-by: Martin Schiller +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wan/x25_asy.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +--- a/drivers/net/wan/x25_asy.c ++++ b/drivers/net/wan/x25_asy.c +@@ -183,7 +183,7 @@ static inline void x25_asy_unlock(struct + netif_wake_queue(sl->dev); + } + +-/* Send one completely decapsulated IP datagram to the IP layer. */ ++/* Send an LAPB frame to the LAPB module to process. */ + + static void x25_asy_bump(struct x25_asy *sl) + { +@@ -195,13 +195,12 @@ static void x25_asy_bump(struct x25_asy + count = sl->rcount; + dev->stats.rx_bytes += count; + +- skb = dev_alloc_skb(count+1); ++ skb = dev_alloc_skb(count); + if (skb == NULL) { + netdev_warn(sl->dev, "memory squeeze, dropping packet\n"); + dev->stats.rx_dropped++; + return; + } +- skb_push(skb, 1); /* LAPB internal control */ + skb_put_data(skb, sl->rbuff, count); + skb->protocol = x25_type_trans(skb, sl->dev); + err = lapb_data_received(skb->dev, skb); +@@ -209,7 +208,6 @@ static void x25_asy_bump(struct x25_asy + kfree_skb(skb); + printk(KERN_DEBUG "x25_asy: data received err - %d\n", err); + } else { +- netif_rx(skb); + dev->stats.rx_packets++; + } + } +@@ -356,12 +354,21 @@ static netdev_tx_t x25_asy_xmit(struct s + */ + + /* +- * Called when I frame data arrives. We did the work above - throw it +- * at the net layer. ++ * Called when I frame data arrive. We add a pseudo header for upper ++ * layers and pass it to upper layers. + */ + + static int x25_asy_data_indication(struct net_device *dev, struct sk_buff *skb) + { ++ if (skb_cow(skb, 1)) { ++ kfree_skb(skb); ++ return NET_RX_DROP; ++ } ++ skb_push(skb, 1); ++ skb->data[0] = X25_IFACE_DATA; ++ ++ skb->protocol = x25_type_trans(skb, dev); ++ + return netif_rx(skb); + } + +@@ -657,7 +664,7 @@ static void x25_asy_unesc(struct x25_asy + switch (s) { + case X25_END: + if (!test_and_clear_bit(SLF_ERROR, &sl->flags) && +- sl->rcount > 2) ++ sl->rcount >= 2) + x25_asy_bump(sl); + clear_bit(SLF_ESCAPE, &sl->flags); + sl->rcount = 0; diff --git a/queue-5.4/ip6_gre-fix-null-ptr-deref-in-ip6gre_init_net.patch b/queue-5.4/ip6_gre-fix-null-ptr-deref-in-ip6gre_init_net.patch new file mode 100644 index 00000000000..8ff4324965c --- /dev/null +++ b/queue-5.4/ip6_gre-fix-null-ptr-deref-in-ip6gre_init_net.patch @@ -0,0 +1,83 @@ +From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST +From: Wei Yongjun +Date: Mon, 13 Jul 2020 23:59:50 +0800 +Subject: ip6_gre: fix null-ptr-deref in ip6gre_init_net() + +From: Wei Yongjun + +[ Upstream commit 46ef5b89ec0ecf290d74c4aee844f063933c4da4 ] + +KASAN report null-ptr-deref error when register_netdev() failed: + +KASAN: null-ptr-deref in range [0x00000000000003c0-0x00000000000003c7] +CPU: 2 PID: 422 Comm: ip Not tainted 5.8.0-rc4+ #12 +Call Trace: + ip6gre_init_net+0x4ab/0x580 + ? ip6gre_tunnel_uninit+0x3f0/0x3f0 + ops_init+0xa8/0x3c0 + setup_net+0x2de/0x7e0 + ? rcu_read_lock_bh_held+0xb0/0xb0 + ? ops_init+0x3c0/0x3c0 + ? kasan_unpoison_shadow+0x33/0x40 + ? __kasan_kmalloc.constprop.0+0xc2/0xd0 + copy_net_ns+0x27d/0x530 + create_new_namespaces+0x382/0xa30 + unshare_nsproxy_namespaces+0xa1/0x1d0 + ksys_unshare+0x39c/0x780 + ? walk_process_tree+0x2a0/0x2a0 + ? trace_hardirqs_on+0x4a/0x1b0 + ? _raw_spin_unlock_irq+0x1f/0x30 + ? syscall_trace_enter+0x1a7/0x330 + ? do_syscall_64+0x1c/0xa0 + __x64_sys_unshare+0x2d/0x40 + do_syscall_64+0x56/0xa0 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +ip6gre_tunnel_uninit() has set 'ign->fb_tunnel_dev' to NULL, later +access to ign->fb_tunnel_dev cause null-ptr-deref. Fix it by saving +'ign->fb_tunnel_dev' to local variable ndev. + +Fixes: dafabb6590cb ("ip6_gre: fix use-after-free in ip6gre_tunnel_lookup()") +Reported-by: Hulk Robot +Signed-off-by: Wei Yongjun +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -1560,17 +1560,18 @@ static void ip6gre_destroy_tunnels(struc + static int __net_init ip6gre_init_net(struct net *net) + { + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); ++ struct net_device *ndev; + int err; + + if (!net_has_fallback_tunnels(net)) + return 0; +- ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", +- NET_NAME_UNKNOWN, +- ip6gre_tunnel_setup); +- if (!ign->fb_tunnel_dev) { ++ ndev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", ++ NET_NAME_UNKNOWN, ip6gre_tunnel_setup); ++ if (!ndev) { + err = -ENOMEM; + goto err_alloc_dev; + } ++ ign->fb_tunnel_dev = ndev; + dev_net_set(ign->fb_tunnel_dev, net); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. +@@ -1590,7 +1591,7 @@ static int __net_init ip6gre_init_net(st + return 0; + + err_reg_dev: +- free_netdev(ign->fb_tunnel_dev); ++ free_netdev(ndev); + err_alloc_dev: + return err; + } diff --git a/queue-5.4/net-sysfs-add-a-newline-when-printing-tx_timeout-by-sysfs.patch b/queue-5.4/net-sysfs-add-a-newline-when-printing-tx_timeout-by-sysfs.patch new file mode 100644 index 00000000000..927b28cb9ce --- /dev/null +++ b/queue-5.4/net-sysfs-add-a-newline-when-printing-tx_timeout-by-sysfs.patch @@ -0,0 +1,33 @@ +From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST +From: Xiongfeng Wang +Date: Tue, 21 Jul 2020 15:02:57 +0800 +Subject: net-sysfs: add a newline when printing 'tx_timeout' by sysfs + +From: Xiongfeng Wang + +[ Upstream commit 9bb5fbea59f36a589ef886292549ca4052fe676c ] + +When I cat 'tx_timeout' by sysfs, it displays as follows. It's better to +add a newline for easy reading. + +root@syzkaller:~# cat /sys/devices/virtual/net/lo/queues/tx-0/tx_timeout +0root@syzkaller:~# + +Signed-off-by: Xiongfeng Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/net-sysfs.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/core/net-sysfs.c ++++ b/net/core/net-sysfs.c +@@ -1036,7 +1036,7 @@ static ssize_t tx_timeout_show(struct ne + trans_timeout = queue->trans_timeout; + spin_unlock_irq(&queue->_xmit_lock); + +- return sprintf(buf, "%lu", trans_timeout); ++ return sprintf(buf, fmt_ulong, trans_timeout); + } + + static unsigned int get_netdev_queue_index(struct netdev_queue *queue) diff --git a/queue-5.4/net-udp-fix-wrong-clean-up-for-is_udplite-macro.patch b/queue-5.4/net-udp-fix-wrong-clean-up-for-is_udplite-macro.patch new file mode 100644 index 00000000000..3939d556d78 --- /dev/null +++ b/queue-5.4/net-udp-fix-wrong-clean-up-for-is_udplite-macro.patch @@ -0,0 +1,43 @@ +From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST +From: Miaohe Lin +Date: Tue, 21 Jul 2020 17:11:44 +0800 +Subject: net: udp: Fix wrong clean up for IS_UDPLITE macro + +From: Miaohe Lin + +[ Upstream commit b0a422772fec29811e293c7c0e6f991c0fd9241d ] + +We can't use IS_UDPLITE to replace udp_sk->pcflag when UDPLITE_RECV_CC is +checked. + +Fixes: b2bf1e2659b1 ("[UDP]: Clean up for IS_UDPLITE macro") +Signed-off-by: Miaohe Lin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 2 +- + net/ipv6/udp.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -2045,7 +2045,7 @@ static int udp_queue_rcv_one_skb(struct + /* + * UDP-Lite specific tests, ignored on UDP sockets + */ +- if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { ++ if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + + /* + * MIB statistics other than incrementing the error count are +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -643,7 +643,7 @@ static int udpv6_queue_rcv_one_skb(struc + /* + * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). + */ +- if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { ++ if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + + if (up->pcrlen == 0) { /* full coverage was set */ + net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", diff --git a/queue-5.4/qrtr-orphan-socket-in-qrtr_release.patch b/queue-5.4/qrtr-orphan-socket-in-qrtr_release.patch new file mode 100644 index 00000000000..72eb080cbea --- /dev/null +++ b/queue-5.4/qrtr-orphan-socket-in-qrtr_release.patch @@ -0,0 +1,37 @@ +From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST +From: Cong Wang +Date: Fri, 24 Jul 2020 09:45:51 -0700 +Subject: qrtr: orphan socket in qrtr_release() + +From: Cong Wang + +[ Upstream commit af9f691f0f5bdd1ade65a7b84927639882d7c3e5 ] + +We have to detach sock from socket in qrtr_release(), +otherwise skb->sk may still reference to this socket +when the skb is released in tun->queue, particularly +sk->sk_wq still points to &sock->wq, which leads to +a UAF. + +Reported-and-tested-by: syzbot+6720d64f31c081c2f708@syzkaller.appspotmail.com +Fixes: 28fb4e59a47d ("net: qrtr: Expose tunneling endpoint to user space") +Cc: Bjorn Andersson +Cc: Eric Dumazet +Signed-off-by: Cong Wang +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/qrtr/qrtr.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/qrtr/qrtr.c ++++ b/net/qrtr/qrtr.c +@@ -1004,6 +1004,7 @@ static int qrtr_release(struct socket *s + sk->sk_state_change(sk); + + sock_set_flag(sk, SOCK_DEAD); ++ sock_orphan(sk); + sock->sk = NULL; + + if (!sock_flag(sk, SOCK_ZAPPED)) diff --git a/queue-5.4/rtnetlink-fix-memory-net_device-leak-when-newlink-fails.patch b/queue-5.4/rtnetlink-fix-memory-net_device-leak-when-newlink-fails.patch new file mode 100644 index 00000000000..89ff6076294 --- /dev/null +++ b/queue-5.4/rtnetlink-fix-memory-net_device-leak-when-newlink-fails.patch @@ -0,0 +1,63 @@ +From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST +From: Weilong Chen +Date: Wed, 15 Jul 2020 20:58:10 +0800 +Subject: rtnetlink: Fix memory(net_device) leak when ->newlink fails + +From: Weilong Chen + +[ Upstream commit cebb69754f37d68e1355a5e726fdac317bcda302 ] + +When vlan_newlink call register_vlan_dev fails, it might return error +with dev->reg_state = NETREG_UNREGISTERED. The rtnl_newlink should +free the memory. But currently rtnl_newlink only free the memory which +state is NETREG_UNINITIALIZED. + +BUG: memory leak +unreferenced object 0xffff8881051de000 (size 4096): + comm "syz-executor139", pid 560, jiffies 4294745346 (age 32.445s) + hex dump (first 32 bytes): + 76 6c 61 6e 32 00 00 00 00 00 00 00 00 00 00 00 vlan2........... + 00 45 28 03 81 88 ff ff 00 00 00 00 00 00 00 00 .E(............. + backtrace: + [<0000000047527e31>] kmalloc_node include/linux/slab.h:578 [inline] + [<0000000047527e31>] kvmalloc_node+0x33/0xd0 mm/util.c:574 + [<000000002b59e3bc>] kvmalloc include/linux/mm.h:753 [inline] + [<000000002b59e3bc>] kvzalloc include/linux/mm.h:761 [inline] + [<000000002b59e3bc>] alloc_netdev_mqs+0x83/0xd90 net/core/dev.c:9929 + [<000000006076752a>] rtnl_create_link+0x2c0/0xa20 net/core/rtnetlink.c:3067 + [<00000000572b3be5>] __rtnl_newlink+0xc9c/0x1330 net/core/rtnetlink.c:3329 + [<00000000e84ea553>] rtnl_newlink+0x66/0x90 net/core/rtnetlink.c:3397 + [<0000000052c7c0a9>] rtnetlink_rcv_msg+0x540/0x990 net/core/rtnetlink.c:5460 + [<000000004b5cb379>] netlink_rcv_skb+0x12b/0x3a0 net/netlink/af_netlink.c:2469 + [<00000000c71c20d3>] netlink_unicast_kernel net/netlink/af_netlink.c:1303 [inline] + [<00000000c71c20d3>] netlink_unicast+0x4c6/0x690 net/netlink/af_netlink.c:1329 + [<00000000cca72fa9>] netlink_sendmsg+0x735/0xcc0 net/netlink/af_netlink.c:1918 + [<000000009221ebf7>] sock_sendmsg_nosec net/socket.c:652 [inline] + [<000000009221ebf7>] sock_sendmsg+0x109/0x140 net/socket.c:672 + [<000000001c30ffe4>] ____sys_sendmsg+0x5f5/0x780 net/socket.c:2352 + [<00000000b71ca6f3>] ___sys_sendmsg+0x11d/0x1a0 net/socket.c:2406 + [<0000000007297384>] __sys_sendmsg+0xeb/0x1b0 net/socket.c:2439 + [<000000000eb29b11>] do_syscall_64+0x56/0xa0 arch/x86/entry/common.c:359 + [<000000006839b4d0>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Fixes: cb626bf566eb ("net-sysfs: Fix reference count leak") +Reported-by: Hulk Robot +Signed-off-by: Weilong Chen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -3231,7 +3231,8 @@ replay: + */ + if (err < 0) { + /* If device is not registered at all, free it now */ +- if (dev->reg_state == NETREG_UNINITIALIZED) ++ if (dev->reg_state == NETREG_UNINITIALIZED || ++ dev->reg_state == NETREG_UNREGISTERED) + free_netdev(dev); + goto out; + } diff --git a/queue-5.4/rxrpc-fix-sendmsg-returning-epipe-due-to-recvmsg-returning-enodata.patch b/queue-5.4/rxrpc-fix-sendmsg-returning-epipe-due-to-recvmsg-returning-enodata.patch new file mode 100644 index 00000000000..86b7abb126d --- /dev/null +++ b/queue-5.4/rxrpc-fix-sendmsg-returning-epipe-due-to-recvmsg-returning-enodata.patch @@ -0,0 +1,51 @@ +From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST +From: David Howells +Date: Mon, 20 Jul 2020 12:41:46 +0100 +Subject: rxrpc: Fix sendmsg() returning EPIPE due to recvmsg() returning ENODATA + +From: David Howells + +[ Upstream commit 639f181f0ee20d3249dbc55f740f0167267180f0 ] + +rxrpc_sendmsg() returns EPIPE if there's an outstanding error, such as if +rxrpc_recvmsg() indicating ENODATA if there's nothing for it to read. + +Change rxrpc_recvmsg() to return EAGAIN instead if there's nothing to read +as this particular error doesn't get stored in ->sk_err by the networking +core. + +Also change rxrpc_sendmsg() so that it doesn't fail with delayed receive +errors (there's no way for it to report which call, if any, the error was +caused by). + +Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") +Signed-off-by: David Howells +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rxrpc/recvmsg.c | 2 +- + net/rxrpc/sendmsg.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +--- a/net/rxrpc/recvmsg.c ++++ b/net/rxrpc/recvmsg.c +@@ -464,7 +464,7 @@ try_again: + list_empty(&rx->recvmsg_q) && + rx->sk.sk_state != RXRPC_SERVER_LISTENING) { + release_sock(&rx->sk); +- return -ENODATA; ++ return -EAGAIN; + } + + if (list_empty(&rx->recvmsg_q)) { +--- a/net/rxrpc/sendmsg.c ++++ b/net/rxrpc/sendmsg.c +@@ -306,7 +306,7 @@ static int rxrpc_send_data(struct rxrpc_ + /* this should be in poll */ + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); + +- if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) ++ if (sk->sk_shutdown & SEND_SHUTDOWN) + return -EPIPE; + + more = msg->msg_flags & MSG_MORE; diff --git a/queue-5.4/sctp-shrink-stream-outq-only-when-new-outcnt-old-outcnt.patch b/queue-5.4/sctp-shrink-stream-outq-only-when-new-outcnt-old-outcnt.patch new file mode 100644 index 00000000000..b2adf48ca7d --- /dev/null +++ b/queue-5.4/sctp-shrink-stream-outq-only-when-new-outcnt-old-outcnt.patch @@ -0,0 +1,64 @@ +From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST +From: Xin Long +Date: Wed, 22 Jul 2020 23:52:11 +0800 +Subject: sctp: shrink stream outq only when new outcnt < old outcnt + +From: Xin Long + +[ Upstream commit 8f13399db22f909a35735bf8ae2f932e0c8f0e30 ] + +It's not necessary to go list_for_each for outq->out_chunk_list +when new outcnt >= old outcnt, as no chunk with higher sid than +new (outcnt - 1) exists in the outqueue. + +While at it, also move the list_for_each code in a new function +sctp_stream_shrink_out(), which will be used in the next patch. + +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/stream.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +--- a/net/sctp/stream.c ++++ b/net/sctp/stream.c +@@ -22,17 +22,11 @@ + #include + #include + +-/* Migrates chunks from stream queues to new stream queues if needed, +- * but not across associations. Also, removes those chunks to streams +- * higher than the new max. +- */ +-static void sctp_stream_outq_migrate(struct sctp_stream *stream, +- struct sctp_stream *new, __u16 outcnt) ++static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) + { + struct sctp_association *asoc; + struct sctp_chunk *ch, *temp; + struct sctp_outq *outq; +- int i; + + asoc = container_of(stream, struct sctp_association, stream); + outq = &asoc->outqueue; +@@ -56,6 +50,19 @@ static void sctp_stream_outq_migrate(str + + sctp_chunk_free(ch); + } ++} ++ ++/* Migrates chunks from stream queues to new stream queues if needed, ++ * but not across associations. Also, removes those chunks to streams ++ * higher than the new max. ++ */ ++static void sctp_stream_outq_migrate(struct sctp_stream *stream, ++ struct sctp_stream *new, __u16 outcnt) ++{ ++ int i; ++ ++ if (stream->outcnt > outcnt) ++ sctp_stream_shrink_out(stream, outcnt); + + if (new) { + /* Here we actually move the old ext stuff into the new diff --git a/queue-5.4/sctp-shrink-stream-outq-when-fails-to-do-addstream-reconf.patch b/queue-5.4/sctp-shrink-stream-outq-when-fails-to-do-addstream-reconf.patch new file mode 100644 index 00000000000..145c62c589c --- /dev/null +++ b/queue-5.4/sctp-shrink-stream-outq-when-fails-to-do-addstream-reconf.patch @@ -0,0 +1,54 @@ +From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST +From: Xin Long +Date: Wed, 22 Jul 2020 23:52:12 +0800 +Subject: sctp: shrink stream outq when fails to do addstream reconf + +From: Xin Long + +[ Upstream commit 3ecdda3e9ad837cf9cb41b6faa11b1af3a5abc0c ] + +When adding a stream with stream reconf, the new stream firstly is in +CLOSED state but new out chunks can still be enqueued. Then once gets +the confirmation from the peer, the state will change to OPEN. + +However, if the peer denies, it needs to roll back the stream. But when +doing that, it only sets the stream outcnt back, and the chunks already +in the new stream don't get purged. It caused these chunks can still be +dequeued in sctp_outq_dequeue_data(). + +As its stream is still in CLOSE, the chunk will be enqueued to the head +again by sctp_outq_head_data(). This chunk will never be sent out, and +the chunks after it can never be dequeued. The assoc will be 'hung' in +a dead loop of sending this chunk. + +To fix it, this patch is to purge these chunks already in the new +stream by calling sctp_stream_shrink_out() when failing to do the +addstream reconf. + +Fixes: 11ae76e67a17 ("sctp: implement receiver-side procedures for the Reconf Response Parameter") +Reported-by: Ying Xu +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/stream.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/sctp/stream.c ++++ b/net/sctp/stream.c +@@ -1045,11 +1045,13 @@ struct sctp_chunk *sctp_process_strreset + nums = ntohs(addstrm->number_of_streams); + number = stream->outcnt - nums; + +- if (result == SCTP_STRRESET_PERFORMED) ++ if (result == SCTP_STRRESET_PERFORMED) { + for (i = number; i < stream->outcnt; i++) + SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; +- else ++ } else { ++ sctp_stream_shrink_out(stream, number); + stream->outcnt = number; ++ } + + *evp = sctp_ulpevent_make_stream_change_event(asoc, flags, + 0, nums, GFP_ATOMIC); diff --git a/queue-5.4/series b/queue-5.4/series new file mode 100644 index 00000000000..9bf57187f0f --- /dev/null +++ b/queue-5.4/series @@ -0,0 +1,16 @@ +ax.25-fix-out-of-bounds-read-in-ax25_connect.patch +ax.25-prevent-out-of-bounds-read-in-ax25_sendmsg.patch +dev-defer-free-of-skbs-in-flush_backlog.patch +drivers-net-wan-x25_asy-fix-to-make-it-work.patch +ip6_gre-fix-null-ptr-deref-in-ip6gre_init_net.patch +net-sysfs-add-a-newline-when-printing-tx_timeout-by-sysfs.patch +net-udp-fix-wrong-clean-up-for-is_udplite-macro.patch +qrtr-orphan-socket-in-qrtr_release.patch +rtnetlink-fix-memory-net_device-leak-when-newlink-fails.patch +rxrpc-fix-sendmsg-returning-epipe-due-to-recvmsg-returning-enodata.patch +tcp-allow-at-most-one-tlp-probe-per-flight.patch +ax.25-prevent-integer-overflows-in-connect-and-sendmsg.patch +sctp-shrink-stream-outq-only-when-new-outcnt-old-outcnt.patch +sctp-shrink-stream-outq-when-fails-to-do-addstream-reconf.patch +udp-copy-has_conns-in-reuseport_grow.patch +udp-improve-load-balancing-for-so_reuseport.patch diff --git a/queue-5.4/tcp-allow-at-most-one-tlp-probe-per-flight.patch b/queue-5.4/tcp-allow-at-most-one-tlp-probe-per-flight.patch new file mode 100644 index 00000000000..0e9d16755ce --- /dev/null +++ b/queue-5.4/tcp-allow-at-most-one-tlp-probe-per-flight.patch @@ -0,0 +1,124 @@ +From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST +From: Yuchung Cheng +Date: Thu, 23 Jul 2020 12:00:06 -0700 +Subject: tcp: allow at most one TLP probe per flight + +From: Yuchung Cheng + +[ Upstream commit 76be93fc0702322179bb0ea87295d820ee46ad14 ] + +Previously TLP may send multiple probes of new data in one +flight. This happens when the sender is cwnd limited. After the +initial TLP containing new data is sent, the sender receives another +ACK that acks partial inflight. It may re-arm another TLP timer +to send more, if no further ACK returns before the next TLP timeout +(PTO) expires. The sender may send in theory a large amount of TLP +until send queue is depleted. This only happens if the sender sees +such irregular uncommon ACK pattern. But it is generally undesirable +behavior during congestion especially. + +The original TLP design restrict only one TLP probe per inflight as +published in "Reducing Web Latency: the Virtue of Gentle Aggression", +SIGCOMM 2013. This patch changes TLP to send at most one probe +per inflight. + +Note that if the sender is app-limited, TLP retransmits old data +and did not have this issue. + +Signed-off-by: Yuchung Cheng +Signed-off-by: Neal Cardwell +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/tcp.h | 4 +++- + net/ipv4/tcp_input.c | 11 ++++++----- + net/ipv4/tcp_output.c | 13 ++++++++----- + 3 files changed, 17 insertions(+), 11 deletions(-) + +--- a/include/linux/tcp.h ++++ b/include/linux/tcp.h +@@ -216,6 +216,8 @@ struct tcp_sock { + } rack; + u16 advmss; /* Advertised MSS */ + u8 compressed_ack; ++ u8 tlp_retrans:1, /* TLP is a retransmission */ ++ unused_1:7; + u32 chrono_start; /* Start time in jiffies of a TCP chrono */ + u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ + u8 chrono_type:2, /* current chronograph type */ +@@ -238,7 +240,7 @@ struct tcp_sock { + save_syn:1, /* Save headers of SYN packet */ + is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ + syn_smc:1; /* SYN includes SMC */ +- u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ ++ u32 tlp_high_seq; /* snd_nxt at the time of TLP */ + + u32 tcp_tx_delay; /* delay (in usec) added to TX packets */ + u64 tcp_wstamp_ns; /* departure time for next sent data packet */ +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3505,10 +3505,8 @@ static void tcp_replace_ts_recent(struct + } + } + +-/* This routine deals with acks during a TLP episode. +- * We mark the end of a TLP episode on receiving TLP dupack or when +- * ack is after tlp_high_seq. +- * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. ++/* This routine deals with acks during a TLP episode and ends an episode by ++ * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack + */ + static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) + { +@@ -3517,7 +3515,10 @@ static void tcp_process_tlp_ack(struct s + if (before(ack, tp->tlp_high_seq)) + return; + +- if (flag & FLAG_DSACKING_ACK) { ++ if (!tp->tlp_retrans) { ++ /* TLP of new data has been acknowledged */ ++ tp->tlp_high_seq = 0; ++ } else if (flag & FLAG_DSACKING_ACK) { + /* This DSACK means original and TLP probe arrived; no loss */ + tp->tlp_high_seq = 0; + } else if (after(ack, tp->tlp_high_seq)) { +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2564,6 +2564,11 @@ void tcp_send_loss_probe(struct sock *sk + int pcount; + int mss = tcp_current_mss(sk); + ++ /* At most one outstanding TLP */ ++ if (tp->tlp_high_seq) ++ goto rearm_timer; ++ ++ tp->tlp_retrans = 0; + skb = tcp_send_head(sk); + if (skb && tcp_snd_wnd_test(tp, skb, mss)) { + pcount = tp->packets_out; +@@ -2581,10 +2586,6 @@ void tcp_send_loss_probe(struct sock *sk + return; + } + +- /* At most one outstanding TLP retransmission. */ +- if (tp->tlp_high_seq) +- goto rearm_timer; +- + if (skb_still_in_host_queue(sk, skb)) + goto rearm_timer; + +@@ -2606,10 +2607,12 @@ void tcp_send_loss_probe(struct sock *sk + if (__tcp_retransmit_skb(sk, skb, 1)) + goto rearm_timer; + ++ tp->tlp_retrans = 1; ++ ++probe_sent: + /* Record snd_nxt for loss detection. */ + tp->tlp_high_seq = tp->snd_nxt; + +-probe_sent: + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); + /* Reset s.t. tcp_rearm_rto will restart timer from now */ + inet_csk(sk)->icsk_pending = 0; diff --git a/queue-5.4/udp-copy-has_conns-in-reuseport_grow.patch b/queue-5.4/udp-copy-has_conns-in-reuseport_grow.patch new file mode 100644 index 00000000000..83d871785c4 --- /dev/null +++ b/queue-5.4/udp-copy-has_conns-in-reuseport_grow.patch @@ -0,0 +1,42 @@ +From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST +From: Kuniyuki Iwashima +Date: Tue, 21 Jul 2020 15:15:30 +0900 +Subject: udp: Copy has_conns in reuseport_grow(). + +From: Kuniyuki Iwashima + +[ Upstream commit f2b2c55e512879a05456eaf5de4d1ed2f7757509 ] + +If an unconnected socket in a UDP reuseport group connect()s, has_conns is +set to 1. Then, when a packet is received, udp[46]_lib_lookup2() scans all +sockets in udp_hslot looking for the connected socket with the highest +score. + +However, when the number of sockets bound to the port exceeds max_socks, +reuseport_grow() resets has_conns to 0. It can cause udp[46]_lib_lookup2() +to return without scanning all sockets, resulting in that packets sent to +connected sockets may be distributed to unconnected sockets. + +Therefore, reuseport_grow() should copy has_conns. + +Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets") +CC: Willem de Bruijn +Reviewed-by: Benjamin Herrenschmidt +Signed-off-by: Kuniyuki Iwashima +Acked-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock_reuseport.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/core/sock_reuseport.c ++++ b/net/core/sock_reuseport.c +@@ -112,6 +112,7 @@ static struct sock_reuseport *reuseport_ + more_reuse->prog = reuse->prog; + more_reuse->reuseport_id = reuse->reuseport_id; + more_reuse->bind_inany = reuse->bind_inany; ++ more_reuse->has_conns = reuse->has_conns; + + memcpy(more_reuse->socks, reuse->socks, + reuse->num_socks * sizeof(struct sock *)); diff --git a/queue-5.4/udp-improve-load-balancing-for-so_reuseport.patch b/queue-5.4/udp-improve-load-balancing-for-so_reuseport.patch new file mode 100644 index 00000000000..7612cc7e45d --- /dev/null +++ b/queue-5.4/udp-improve-load-balancing-for-so_reuseport.patch @@ -0,0 +1,121 @@ +From foo@baz Wed 29 Jul 2020 11:19:56 AM CEST +From: Kuniyuki Iwashima +Date: Tue, 21 Jul 2020 15:15:31 +0900 +Subject: udp: Improve load balancing for SO_REUSEPORT. + +From: Kuniyuki Iwashima + +[ Upstream commit efc6b6f6c3113e8b203b9debfb72d81e0f3dcace ] + +Currently, SO_REUSEPORT does not work well if connected sockets are in a +UDP reuseport group. + +Then reuseport_has_conns() returns true and the result of +reuseport_select_sock() is discarded. Also, unconnected sockets have the +same score, hence only does the first unconnected socket in udp_hslot +always receive all packets sent to unconnected sockets. + +So, the result of reuseport_select_sock() should be used for load +balancing. + +The noteworthy point is that the unconnected sockets placed after +connected sockets in sock_reuseport.socks will receive more packets than +others because of the algorithm in reuseport_select_sock(). + + index | connected | reciprocal_scale | result + --------------------------------------------- + 0 | no | 20% | 40% + 1 | no | 20% | 20% + 2 | yes | 20% | 0% + 3 | no | 20% | 40% + 4 | yes | 20% | 0% + +If most of the sockets are connected, this can be a problem, but it still +works better than now. + +Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets") +CC: Willem de Bruijn +Reviewed-by: Benjamin Herrenschmidt +Signed-off-by: Kuniyuki Iwashima +Acked-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 15 +++++++++------ + net/ipv6/udp.c | 15 +++++++++------ + 2 files changed, 18 insertions(+), 12 deletions(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -413,7 +413,7 @@ static struct sock *udp4_lib_lookup2(str + struct udp_hslot *hslot2, + struct sk_buff *skb) + { +- struct sock *sk, *result; ++ struct sock *sk, *result, *reuseport_result; + int score, badness; + u32 hash = 0; + +@@ -423,17 +423,20 @@ static struct sock *udp4_lib_lookup2(str + score = compute_score(sk, net, saddr, sport, + daddr, hnum, dif, sdif); + if (score > badness) { ++ reuseport_result = NULL; ++ + if (sk->sk_reuseport && + sk->sk_state != TCP_ESTABLISHED) { + hash = udp_ehashfn(net, daddr, hnum, + saddr, sport); +- result = reuseport_select_sock(sk, hash, skb, +- sizeof(struct udphdr)); +- if (result && !reuseport_has_conns(sk, false)) +- return result; ++ reuseport_result = reuseport_select_sock(sk, hash, skb, ++ sizeof(struct udphdr)); ++ if (reuseport_result && !reuseport_has_conns(sk, false)) ++ return reuseport_result; + } ++ ++ result = reuseport_result ? : sk; + badness = score; +- result = sk; + } + } + return result; +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -148,7 +148,7 @@ static struct sock *udp6_lib_lookup2(str + int dif, int sdif, struct udp_hslot *hslot2, + struct sk_buff *skb) + { +- struct sock *sk, *result; ++ struct sock *sk, *result, *reuseport_result; + int score, badness; + u32 hash = 0; + +@@ -158,17 +158,20 @@ static struct sock *udp6_lib_lookup2(str + score = compute_score(sk, net, saddr, sport, + daddr, hnum, dif, sdif); + if (score > badness) { ++ reuseport_result = NULL; ++ + if (sk->sk_reuseport && + sk->sk_state != TCP_ESTABLISHED) { + hash = udp6_ehashfn(net, daddr, hnum, + saddr, sport); + +- result = reuseport_select_sock(sk, hash, skb, +- sizeof(struct udphdr)); +- if (result && !reuseport_has_conns(sk, false)) +- return result; ++ reuseport_result = reuseport_select_sock(sk, hash, skb, ++ sizeof(struct udphdr)); ++ if (reuseport_result && !reuseport_has_conns(sk, false)) ++ return reuseport_result; + } +- result = sk; ++ ++ result = reuseport_result ? : sk; + badness = score; + } + } -- 2.47.3