From: Greg Kroah-Hartman Date: Mon, 27 Jan 2020 13:32:45 +0000 (+0100) Subject: 5.4-stable patches X-Git-Tag: v4.14.168~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=fbfeac295d698a80f883c599545f0e63dca2044d;p=thirdparty%2Fkernel%2Fstable-queue.git 5.4-stable patches added patches: airo-add-missing-cap_net_admin-check-in-airooldioctl-siocdevprivate.patch airo-fix-possible-info-leak-in-airooldioctl-siocdevprivate.patch can-slip-protect-tty-disc_data-in-write_wakeup-and-close-with-rcu.patch firestream-fix-memory-leaks.patch fou-fix-ipv6-netlink-policy.patch gtp-make-sure-only-sock_dgram-udp-sockets-are-accepted.patch ipv4-detect-rollover-in-specific-fib-table-dump.patch ipv6-sr-remove-skb_gso_ipxip6-on-end.d-actions.patch mlxsw-spectrum_acl-fix-use-after-free-during-reload.patch net-bcmgenet-use-netif_tx_napi_add-for-tx-napi.patch net-cxgb3_main-add-cap_net_admin-check-to-chelsio_get_mem.patch net-fix-packet-reordering-caused-by-gro-and-listified-rx-cooperation.patch net-ip6_gre-fix-moving-ip6gre-between-namespaces.patch net-ip6_tunnel-fix-namespaces-move.patch net-ip_tunnel-fix-namespaces-move.patch net-mlx5-dr-enable-counter-on-non-fwd-dest-objects.patch net-mlx5-dr-use-non-preemptible-call-to-get-the-current-cpu-number.patch net-mlx5-e-switch-prevent-ingress-rate-configuration-of-uplink-rep.patch net-mlx5-fix-lowest-fdb-pool-size.patch net-mlx5-update-the-list-of-the-pci-supported-devices.patch net-mlx5e-ktls-do-not-send-decrypted-marked-skbs-via-non-accel-path.patch net-mlx5e-ktls-fix-corner-case-checks-in-tx-resync-flow.patch net-mlx5e-ktls-remove-redundant-posts-in-tx-resync-flow.patch net-rtnetlink-validate-ifla_mtu-attribute-in-rtnl_create_link.patch net-sysfs-fix-reference-count-leak.patch net-usb-lan78xx-add-.ndo_features_check.patch net_sched-fix-datalen-for-ematch.patch net_sched-use-validated-tca_kind-attribute-in-tc_new_tfilter.patch revert-udp-do-rmem-bulk-free-even-if-the-rx-sk-queue-is-empty.patch tcp-do-not-leave-dangling-pointers-in-tp-highest_sack.patch tcp_bbr-improve-arithmetic-division-in-bbr_update_bw.patch tun-add-mutex_unlock-call-and-napi.skb-clearing-in-tun_get_user.patch --- diff --git a/queue-5.4/airo-add-missing-cap_net_admin-check-in-airooldioctl-siocdevprivate.patch b/queue-5.4/airo-add-missing-cap_net_admin-check-in-airooldioctl-siocdevprivate.patch new file mode 100644 index 00000000000..fa4433afe3b --- /dev/null +++ b/queue-5.4/airo-add-missing-cap_net_admin-check-in-airooldioctl-siocdevprivate.patch @@ -0,0 +1,78 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Michael Ellerman +Date: Wed, 22 Jan 2020 15:07:28 +1100 +Subject: airo: Add missing CAP_NET_ADMIN check in AIROOLDIOCTL/SIOCDEVPRIVATE + +From: Michael Ellerman + +[ Upstream commit 78f7a7566f5eb59321e99b55a6fdb16ea05b37d1 ] + +The driver for Cisco Aironet 4500 and 4800 series cards (airo.c), +implements AIROOLDIOCTL/SIOCDEVPRIVATE in airo_ioctl(). + +The ioctl handler copies an aironet_ioctl struct from userspace, which +includes a command. Some of the commands are handled in readrids(), +where the user controlled command is converted into a driver-internal +value called "ridcode". + +There are two command values, AIROGWEPKTMP and AIROGWEPKNV, which +correspond to ridcode values of RID_WEP_TEMP and RID_WEP_PERM +respectively. These commands both have checks that the user has +CAP_NET_ADMIN, with the comment that "Only super-user can read WEP +keys", otherwise they return -EPERM. + +However there is another command value, AIRORRID, that lets the user +specify the ridcode value directly, with no other checks. This means +the user can bypass the CAP_NET_ADMIN check on AIROGWEPKTMP and +AIROGWEPKNV. + +Fix it by moving the CAP_NET_ADMIN check out of the command handling +and instead do it later based on the ridcode. That way regardless of +whether the ridcode is set via AIROGWEPKTMP or AIROGWEPKNV, or passed +in using AIRORID, we always do the CAP_NET_ADMIN check. + +Found by Ilja by code inspection, not tested as I don't have the +required hardware. + +Reported-by: Ilja Van Sprundel +Signed-off-by: Michael Ellerman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/cisco/airo.c | 18 ++++++++---------- + 1 file changed, 8 insertions(+), 10 deletions(-) + +--- a/drivers/net/wireless/cisco/airo.c ++++ b/drivers/net/wireless/cisco/airo.c +@@ -7790,16 +7790,8 @@ static int readrids(struct net_device *d + case AIROGVLIST: ridcode = RID_APLIST; break; + case AIROGDRVNAM: ridcode = RID_DRVNAME; break; + case AIROGEHTENC: ridcode = RID_ETHERENCAP; break; +- case AIROGWEPKTMP: ridcode = RID_WEP_TEMP; +- /* Only super-user can read WEP keys */ +- if (!capable(CAP_NET_ADMIN)) +- return -EPERM; +- break; +- case AIROGWEPKNV: ridcode = RID_WEP_PERM; +- /* Only super-user can read WEP keys */ +- if (!capable(CAP_NET_ADMIN)) +- return -EPERM; +- break; ++ case AIROGWEPKTMP: ridcode = RID_WEP_TEMP; break; ++ case AIROGWEPKNV: ridcode = RID_WEP_PERM; break; + case AIROGSTAT: ridcode = RID_STATUS; break; + case AIROGSTATSD32: ridcode = RID_STATSDELTA; break; + case AIROGSTATSC32: ridcode = RID_STATS; break; +@@ -7813,6 +7805,12 @@ static int readrids(struct net_device *d + return -EINVAL; + } + ++ if (ridcode == RID_WEP_TEMP || ridcode == RID_WEP_PERM) { ++ /* Only super-user can read WEP keys */ ++ if (!capable(CAP_NET_ADMIN)) ++ return -EPERM; ++ } ++ + if ((iobuf = kzalloc(RIDSIZE, GFP_KERNEL)) == NULL) + return -ENOMEM; + diff --git a/queue-5.4/airo-fix-possible-info-leak-in-airooldioctl-siocdevprivate.patch b/queue-5.4/airo-fix-possible-info-leak-in-airooldioctl-siocdevprivate.patch new file mode 100644 index 00000000000..5477a0ea6da --- /dev/null +++ b/queue-5.4/airo-fix-possible-info-leak-in-airooldioctl-siocdevprivate.patch @@ -0,0 +1,65 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Michael Ellerman +Date: Wed, 22 Jan 2020 15:07:27 +1100 +Subject: airo: Fix possible info leak in AIROOLDIOCTL/SIOCDEVPRIVATE + +From: Michael Ellerman + +[ Upstream commit d6bce2137f5d6bb1093e96d2f801479099b28094 ] + +The driver for Cisco Aironet 4500 and 4800 series cards (airo.c), +implements AIROOLDIOCTL/SIOCDEVPRIVATE in airo_ioctl(). + +The ioctl handler copies an aironet_ioctl struct from userspace, which +includes a command and a length. Some of the commands are handled in +readrids(), which kmalloc()'s a buffer of RIDSIZE (2048) bytes. + +That buffer is then passed to PC4500_readrid(), which has two cases. +The else case does some setup and then reads up to RIDSIZE bytes from +the hardware into the kmalloc()'ed buffer. + +Here len == RIDSIZE, pBuf is the kmalloc()'ed buffer: + + // read the rid length field + bap_read(ai, pBuf, 2, BAP1); + // length for remaining part of rid + len = min(len, (int)le16_to_cpu(*(__le16*)pBuf)) - 2; + ... + // read remainder of the rid + rc = bap_read(ai, ((__le16*)pBuf)+1, len, BAP1); + +PC4500_readrid() then returns to readrids() which does: + + len = comp->len; + if (copy_to_user(comp->data, iobuf, min(len, (int)RIDSIZE))) { + +Where comp->len is the user controlled length field. + +So if the "rid length field" returned by the hardware is < 2048, and +the user requests 2048 bytes in comp->len, we will leak the previous +contents of the kmalloc()'ed buffer to userspace. + +Fix it by kzalloc()'ing the buffer. + +Found by Ilja by code inspection, not tested as I don't have the +required hardware. + +Reported-by: Ilja Van Sprundel +Signed-off-by: Michael Ellerman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/cisco/airo.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/wireless/cisco/airo.c ++++ b/drivers/net/wireless/cisco/airo.c +@@ -7813,7 +7813,7 @@ static int readrids(struct net_device *d + return -EINVAL; + } + +- if ((iobuf = kmalloc(RIDSIZE, GFP_KERNEL)) == NULL) ++ if ((iobuf = kzalloc(RIDSIZE, GFP_KERNEL)) == NULL) + return -ENOMEM; + + PC4500_readrid(ai,ridcode,iobuf,RIDSIZE, 1); diff --git a/queue-5.4/can-slip-protect-tty-disc_data-in-write_wakeup-and-close-with-rcu.patch b/queue-5.4/can-slip-protect-tty-disc_data-in-write_wakeup-and-close-with-rcu.patch new file mode 100644 index 00000000000..fa9b1613fc2 --- /dev/null +++ b/queue-5.4/can-slip-protect-tty-disc_data-in-write_wakeup-and-close-with-rcu.patch @@ -0,0 +1,109 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Richard Palethorpe +Date: Tue, 21 Jan 2020 14:42:58 +0100 +Subject: can, slip: Protect tty->disc_data in write_wakeup and close with RCU + +From: Richard Palethorpe + +[ Upstream commit 0ace17d56824165c7f4c68785d6b58971db954dd ] + +write_wakeup can happen in parallel with close/hangup where tty->disc_data +is set to NULL and the netdevice is freed thus also freeing +disc_data. write_wakeup accesses disc_data so we must prevent close from +freeing the netdev while write_wakeup has a non-NULL view of +tty->disc_data. + +We also need to make sure that accesses to disc_data are atomic. Which can +all be done with RCU. + +This problem was found by Syzkaller on SLCAN, but the same issue is +reproducible with the SLIP line discipline using an LTP test based on the +Syzkaller reproducer. + +A fix which didn't use RCU was posted by Hillf Danton. + +Fixes: 661f7fda21b1 ("slip: Fix deadlock in write_wakeup") +Fixes: a8e83b17536a ("slcan: Port write_wakeup deadlock fix from slip") +Reported-by: syzbot+017e491ae13c0068598a@syzkaller.appspotmail.com +Signed-off-by: Richard Palethorpe +Cc: Wolfgang Grandegger +Cc: Marc Kleine-Budde +Cc: "David S. Miller" +Cc: Tyler Hall +Cc: linux-can@vger.kernel.org +Cc: netdev@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Cc: syzkaller@googlegroups.com +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/can/slcan.c | 12 ++++++++++-- + drivers/net/slip/slip.c | 12 ++++++++++-- + 2 files changed, 20 insertions(+), 4 deletions(-) + +--- a/drivers/net/can/slcan.c ++++ b/drivers/net/can/slcan.c +@@ -344,9 +344,16 @@ static void slcan_transmit(struct work_s + */ + static void slcan_write_wakeup(struct tty_struct *tty) + { +- struct slcan *sl = tty->disc_data; ++ struct slcan *sl; ++ ++ rcu_read_lock(); ++ sl = rcu_dereference(tty->disc_data); ++ if (!sl) ++ goto out; + + schedule_work(&sl->tx_work); ++out: ++ rcu_read_unlock(); + } + + /* Send a can_frame to a TTY queue. */ +@@ -644,10 +651,11 @@ static void slcan_close(struct tty_struc + return; + + spin_lock_bh(&sl->lock); +- tty->disc_data = NULL; ++ rcu_assign_pointer(tty->disc_data, NULL); + sl->tty = NULL; + spin_unlock_bh(&sl->lock); + ++ synchronize_rcu(); + flush_work(&sl->tx_work); + + /* Flush network side */ +--- a/drivers/net/slip/slip.c ++++ b/drivers/net/slip/slip.c +@@ -452,9 +452,16 @@ static void slip_transmit(struct work_st + */ + static void slip_write_wakeup(struct tty_struct *tty) + { +- struct slip *sl = tty->disc_data; ++ struct slip *sl; ++ ++ rcu_read_lock(); ++ sl = rcu_dereference(tty->disc_data); ++ if (!sl) ++ goto out; + + schedule_work(&sl->tx_work); ++out: ++ rcu_read_unlock(); + } + + static void sl_tx_timeout(struct net_device *dev) +@@ -882,10 +889,11 @@ static void slip_close(struct tty_struct + return; + + spin_lock_bh(&sl->lock); +- tty->disc_data = NULL; ++ rcu_assign_pointer(tty->disc_data, NULL); + sl->tty = NULL; + spin_unlock_bh(&sl->lock); + ++ synchronize_rcu(); + flush_work(&sl->tx_work); + + /* VSV = very important to remove timers */ diff --git a/queue-5.4/firestream-fix-memory-leaks.patch b/queue-5.4/firestream-fix-memory-leaks.patch new file mode 100644 index 00000000000..0faf2cbb17b --- /dev/null +++ b/queue-5.4/firestream-fix-memory-leaks.patch @@ -0,0 +1,52 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Wenwen Wang +Date: Sat, 25 Jan 2020 14:33:29 +0000 +Subject: firestream: fix memory leaks + +From: Wenwen Wang + +[ Upstream commit fa865ba183d61c1ec8cbcab8573159c3b72b89a4 ] + +In fs_open(), 'vcc' is allocated through kmalloc() and assigned to +'atm_vcc->dev_data.' In the following execution, if an error occurs, e.g., +there is no more free channel, an error code EBUSY or ENOMEM will be +returned. However, 'vcc' is not deallocated, leading to memory leaks. Note +that, in normal cases where fs_open() returns 0, 'vcc' will be deallocated +in fs_close(). But, if fs_open() fails, there is no guarantee that +fs_close() will be invoked. + +To fix this issue, deallocate 'vcc' before the error code is returned. + +Signed-off-by: Wenwen Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/atm/firestream.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/atm/firestream.c ++++ b/drivers/atm/firestream.c +@@ -912,6 +912,7 @@ static int fs_open(struct atm_vcc *atm_v + } + if (!to) { + printk ("No more free channels for FS50..\n"); ++ kfree(vcc); + return -EBUSY; + } + vcc->channo = dev->channo; +@@ -922,6 +923,7 @@ static int fs_open(struct atm_vcc *atm_v + if (((DO_DIRECTION(rxtp) && dev->atm_vccs[vcc->channo])) || + ( DO_DIRECTION(txtp) && test_bit (vcc->channo, dev->tx_inuse))) { + printk ("Channel is in use for FS155.\n"); ++ kfree(vcc); + return -EBUSY; + } + } +@@ -935,6 +937,7 @@ static int fs_open(struct atm_vcc *atm_v + tc, sizeof (struct fs_transmit_config)); + if (!tc) { + fs_dprintk (FS_DEBUG_OPEN, "fs: can't alloc transmit_config.\n"); ++ kfree(vcc); + return -ENOMEM; + } + diff --git a/queue-5.4/fou-fix-ipv6-netlink-policy.patch b/queue-5.4/fou-fix-ipv6-netlink-policy.patch new file mode 100644 index 00000000000..822e33a3b70 --- /dev/null +++ b/queue-5.4/fou-fix-ipv6-netlink-policy.patch @@ -0,0 +1,37 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Kristian Evensen +Date: Thu, 23 Jan 2020 13:20:18 +0100 +Subject: fou: Fix IPv6 netlink policy + +From: Kristian Evensen + +[ Upstream commit bb48eb9b12a95db9d679025927269d4adda6dbd1 ] + +When submitting v2 of "fou: Support binding FoU socket" (1713cb37bf67), +I accidentally sent the wrong version of the patch and one fix was +missing. In the initial version of the patch, as well as the version 2 +that I submitted, I incorrectly used ".type" for the two V6-attributes. +The correct is to use ".len". + +Reported-by: Dmitry Vyukov +Fixes: 1713cb37bf67 ("fou: Support binding FoU socket") +Signed-off-by: Kristian Evensen +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fou.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/fou.c ++++ b/net/ipv4/fou.c +@@ -662,8 +662,8 @@ static const struct nla_policy fou_nl_po + [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, }, + [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, }, + [FOU_ATTR_PEER_V4] = { .type = NLA_U32, }, +- [FOU_ATTR_LOCAL_V6] = { .type = sizeof(struct in6_addr), }, +- [FOU_ATTR_PEER_V6] = { .type = sizeof(struct in6_addr), }, ++ [FOU_ATTR_LOCAL_V6] = { .len = sizeof(struct in6_addr), }, ++ [FOU_ATTR_PEER_V6] = { .len = sizeof(struct in6_addr), }, + [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, }, + [FOU_ATTR_IFINDEX] = { .type = NLA_S32, }, + }; diff --git a/queue-5.4/gtp-make-sure-only-sock_dgram-udp-sockets-are-accepted.patch b/queue-5.4/gtp-make-sure-only-sock_dgram-udp-sockets-are-accepted.patch new file mode 100644 index 00000000000..9456bac6815 --- /dev/null +++ b/queue-5.4/gtp-make-sure-only-sock_dgram-udp-sockets-are-accepted.patch @@ -0,0 +1,119 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Eric Dumazet +Date: Tue, 21 Jan 2020 23:17:14 -0800 +Subject: gtp: make sure only SOCK_DGRAM UDP sockets are accepted + +From: Eric Dumazet + +[ Upstream commit 940ba14986657a50c15f694efca1beba31fa568f ] + +A malicious user could use RAW sockets and fool +GTP using them as standard SOCK_DGRAM UDP sockets. + +BUG: KMSAN: uninit-value in udp_tunnel_encap_enable include/net/udp_tunnel.h:174 [inline] +BUG: KMSAN: uninit-value in setup_udp_tunnel_sock+0x45e/0x6f0 net/ipv4/udp_tunnel.c:85 +CPU: 0 PID: 11262 Comm: syz-executor613 Not tainted 5.5.0-rc5-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x220 lib/dump_stack.c:118 + kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 + __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 + udp_tunnel_encap_enable include/net/udp_tunnel.h:174 [inline] + setup_udp_tunnel_sock+0x45e/0x6f0 net/ipv4/udp_tunnel.c:85 + gtp_encap_enable_socket+0x37f/0x5a0 drivers/net/gtp.c:827 + gtp_encap_enable drivers/net/gtp.c:844 [inline] + gtp_newlink+0xfb/0x1e50 drivers/net/gtp.c:666 + __rtnl_newlink net/core/rtnetlink.c:3305 [inline] + rtnl_newlink+0x2973/0x3920 net/core/rtnetlink.c:3363 + rtnetlink_rcv_msg+0x1153/0x1570 net/core/rtnetlink.c:5424 + netlink_rcv_skb+0x451/0x650 net/netlink/af_netlink.c:2477 + rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5442 + netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] + netlink_unicast+0xf9e/0x1100 net/netlink/af_netlink.c:1328 + netlink_sendmsg+0x1248/0x14d0 net/netlink/af_netlink.c:1917 + sock_sendmsg_nosec net/socket.c:639 [inline] + sock_sendmsg net/socket.c:659 [inline] + ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 + ___sys_sendmsg net/socket.c:2384 [inline] + __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 + __do_sys_sendmsg net/socket.c:2426 [inline] + __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 + __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 + do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 +RIP: 0033:0x441359 +Code: e8 ac e8 ff ff 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 eb 08 fc ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007fff1cd0ac28 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000441359 +RDX: 0000000000000000 RSI: 0000000020000100 RDI: 0000000000000003 +RBP: 00000000006cb018 R08: 00000000004002c8 R09: 00000000004002c8 +R10: 00000000004002c8 R11: 0000000000000246 R12: 00000000004020d0 +R13: 0000000000402160 R14: 0000000000000000 R15: 0000000000000000 + +Uninit was created at: + kmsan_save_stack_with_flags+0x3c/0x90 mm/kmsan/kmsan.c:144 + kmsan_internal_alloc_meta_for_pages mm/kmsan/kmsan_shadow.c:307 [inline] + kmsan_alloc_page+0x12a/0x310 mm/kmsan/kmsan_shadow.c:336 + __alloc_pages_nodemask+0x57f2/0x5f60 mm/page_alloc.c:4800 + alloc_pages_current+0x67d/0x990 mm/mempolicy.c:2207 + alloc_pages include/linux/gfp.h:534 [inline] + alloc_slab_page+0x111/0x12f0 mm/slub.c:1511 + allocate_slab mm/slub.c:1656 [inline] + new_slab+0x2bc/0x1130 mm/slub.c:1722 + new_slab_objects mm/slub.c:2473 [inline] + ___slab_alloc+0x1533/0x1f30 mm/slub.c:2624 + __slab_alloc mm/slub.c:2664 [inline] + slab_alloc_node mm/slub.c:2738 [inline] + slab_alloc mm/slub.c:2783 [inline] + kmem_cache_alloc+0xb23/0xd70 mm/slub.c:2788 + sk_prot_alloc+0xf2/0x620 net/core/sock.c:1597 + sk_alloc+0xf0/0xbe0 net/core/sock.c:1657 + inet_create+0x7c7/0x1370 net/ipv4/af_inet.c:321 + __sock_create+0x8eb/0xf00 net/socket.c:1420 + sock_create net/socket.c:1471 [inline] + __sys_socket+0x1a1/0x600 net/socket.c:1513 + __do_sys_socket net/socket.c:1522 [inline] + __se_sys_socket+0x8d/0xb0 net/socket.c:1520 + __x64_sys_socket+0x4a/0x70 net/socket.c:1520 + do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Fixes: 459aa660eb1d ("gtp: add initial driver for datapath of GPRS Tunneling Protocol (GTP-U)") +Signed-off-by: Eric Dumazet +Cc: Pablo Neira +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/gtp.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/net/gtp.c ++++ b/drivers/net/gtp.c +@@ -804,19 +804,21 @@ static struct sock *gtp_encap_enable_soc + return NULL; + } + +- if (sock->sk->sk_protocol != IPPROTO_UDP) { ++ sk = sock->sk; ++ if (sk->sk_protocol != IPPROTO_UDP || ++ sk->sk_type != SOCK_DGRAM || ++ (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)) { + pr_debug("socket fd=%d not UDP\n", fd); + sk = ERR_PTR(-EINVAL); + goto out_sock; + } + +- lock_sock(sock->sk); +- if (sock->sk->sk_user_data) { ++ lock_sock(sk); ++ if (sk->sk_user_data) { + sk = ERR_PTR(-EBUSY); + goto out_rel_sock; + } + +- sk = sock->sk; + sock_hold(sk); + + tuncfg.sk_user_data = gtp; diff --git a/queue-5.4/ipv4-detect-rollover-in-specific-fib-table-dump.patch b/queue-5.4/ipv4-detect-rollover-in-specific-fib-table-dump.patch new file mode 100644 index 00000000000..15329fe2707 --- /dev/null +++ b/queue-5.4/ipv4-detect-rollover-in-specific-fib-table-dump.patch @@ -0,0 +1,46 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: David Ahern +Date: Fri, 10 Jan 2020 09:03:58 -0800 +Subject: ipv4: Detect rollover in specific fib table dump + +From: David Ahern + +[ Upstream commit 9827c0634e461703abf81e8cc8b7adf5da5886d0 ] + +Sven-Haegar reported looping on fib dumps when 255.255.255.255 route has +been added to a table. The looping is caused by the key rolling over from +FFFFFFFF to 0. When dumping a specific table only, we need a means to detect +when the table dump is done. The key and count saved to cb args are both 0 +only at the start of the table dump. If key is 0 and count > 0, then we are +in the rollover case. Detect and return to avoid looping. + +This only affects dumps of a specific table; for dumps of all tables +(the case prior to the change in the Fixes tag) inet_dump_fib moved +the entry counter to the next table and reset the cb args used by +fib_table_dump and fn_trie_dump_leaf, so the rollover ffffffff back +to 0 did not cause looping with the dumps. + +Fixes: effe67926624 ("net: Enable kernel side filtering of route dumps") +Reported-by: Sven-Haegar Koch +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/fib_trie.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -2175,6 +2175,12 @@ int fib_table_dump(struct fib_table *tb, + int count = cb->args[2]; + t_key key = cb->args[3]; + ++ /* First time here, count and key are both always 0. Count > 0 ++ * and key == 0 means the dump has wrapped around and we are done. ++ */ ++ if (count && !key) ++ return skb->len; ++ + while ((l = leaf_walk_rcu(&tp, key)) != NULL) { + int err; + diff --git a/queue-5.4/ipv6-sr-remove-skb_gso_ipxip6-on-end.d-actions.patch b/queue-5.4/ipv6-sr-remove-skb_gso_ipxip6-on-end.d-actions.patch new file mode 100644 index 00000000000..fb81db6584d --- /dev/null +++ b/queue-5.4/ipv6-sr-remove-skb_gso_ipxip6-on-end.d-actions.patch @@ -0,0 +1,53 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Yuki Taguchi +Date: Mon, 20 Jan 2020 13:48:37 +0900 +Subject: ipv6: sr: remove SKB_GSO_IPXIP6 on End.D* actions + +From: Yuki Taguchi + +[ Upstream commit 62ebaeaedee7591c257543d040677a60e35c7aec ] + +After LRO/GRO is applied, SRv6 encapsulated packets have +SKB_GSO_IPXIP6 feature flag, and this flag must be removed right after +decapulation procedure. + +Currently, SKB_GSO_IPXIP6 flag is not removed on End.D* actions, which +creates inconsistent packet state, that is, a normal TCP/IP packets +have the SKB_GSO_IPXIP6 flag. This behavior can cause unexpected +fallback to GSO on routing to netdevices that do not support +SKB_GSO_IPXIP6. For example, on inter-VRF forwarding, decapsulated +packets separated into small packets by GSO because VRF devices do not +support TSO for packets with SKB_GSO_IPXIP6 flag, and this degrades +forwarding performance. + +This patch removes encapsulation related GSO flags from the skb right +after the End.D* action is applied. + +Fixes: d7a669dd2f8b ("ipv6: sr: add helper functions for seg6local") +Signed-off-by: Yuki Taguchi +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/seg6_local.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv6/seg6_local.c ++++ b/net/ipv6/seg6_local.c +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + #ifdef CONFIG_IPV6_SEG6_HMAC + #include + #endif +@@ -135,7 +136,8 @@ static bool decap_and_validate(struct sk + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); +- skb->encapsulation = 0; ++ if (iptunnel_pull_offloads(skb)) ++ return false; + + return true; + } diff --git a/queue-5.4/mlxsw-spectrum_acl-fix-use-after-free-during-reload.patch b/queue-5.4/mlxsw-spectrum_acl-fix-use-after-free-during-reload.patch new file mode 100644 index 00000000000..0e78040e203 --- /dev/null +++ b/queue-5.4/mlxsw-spectrum_acl-fix-use-after-free-during-reload.patch @@ -0,0 +1,201 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Ido Schimmel +Date: Wed, 22 Jan 2020 20:09:52 +0200 +Subject: mlxsw: spectrum_acl: Fix use-after-free during reload + +From: Ido Schimmel + +[ Upstream commit 971de2e572118c1128bff295341e37b6c8b8f108 ] + +During reload (or module unload), the router block is de-initialized. +Among other things, this results in the removal of a default multicast +route from each active virtual router (VRF). These default routes are +configured during initialization to trap packets to the CPU. In +Spectrum-2, unlike Spectrum-1, multicast routes are implemented using +ACL rules. + +Since the router block is de-initialized before the ACL block, it is +possible that the ACL rules corresponding to the default routes are +deleted while being accessed by the ACL delayed work that queries rules' +activity from the device. This can result in a rare use-after-free [1]. + +Fix this by protecting the rules list accessed by the delayed work with +a lock. We cannot use a spinlock as the activity read operation is +blocking. + +[1] +[ 123.331662] ================================================================== +[ 123.339920] BUG: KASAN: use-after-free in mlxsw_sp_acl_rule_activity_update_work+0x330/0x3b0 +[ 123.349381] Read of size 8 at addr ffff8881f3bb4520 by task kworker/0:2/78 +[ 123.357080] +[ 123.358773] CPU: 0 PID: 78 Comm: kworker/0:2 Not tainted 5.5.0-rc5-custom-33108-gf5df95d3ef41 #2209 +[ 123.368898] Hardware name: Mellanox Technologies Ltd. MSN3700C/VMOD0008, BIOS 5.11 10/10/2018 +[ 123.378456] Workqueue: mlxsw_core mlxsw_sp_acl_rule_activity_update_work +[ 123.385970] Call Trace: +[ 123.388734] dump_stack+0xc6/0x11e +[ 123.392568] print_address_description.constprop.4+0x21/0x340 +[ 123.403236] __kasan_report.cold.8+0x76/0xb1 +[ 123.414884] kasan_report+0xe/0x20 +[ 123.418716] mlxsw_sp_acl_rule_activity_update_work+0x330/0x3b0 +[ 123.444034] process_one_work+0xb06/0x19a0 +[ 123.453731] worker_thread+0x91/0xe90 +[ 123.467348] kthread+0x348/0x410 +[ 123.476847] ret_from_fork+0x24/0x30 +[ 123.480863] +[ 123.482545] Allocated by task 73: +[ 123.486273] save_stack+0x19/0x80 +[ 123.490000] __kasan_kmalloc.constprop.6+0xc1/0xd0 +[ 123.495379] mlxsw_sp_acl_rule_create+0xa7/0x230 +[ 123.500566] mlxsw_sp2_mr_tcam_route_create+0xf6/0x3e0 +[ 123.506334] mlxsw_sp_mr_tcam_route_create+0x5b4/0x820 +[ 123.512102] mlxsw_sp_mr_table_create+0x3b5/0x690 +[ 123.517389] mlxsw_sp_vr_get+0x289/0x4d0 +[ 123.521797] mlxsw_sp_fib_node_get+0xa2/0x990 +[ 123.526692] mlxsw_sp_router_fib4_event_work+0x54c/0x2d60 +[ 123.532752] process_one_work+0xb06/0x19a0 +[ 123.537352] worker_thread+0x91/0xe90 +[ 123.541471] kthread+0x348/0x410 +[ 123.545103] ret_from_fork+0x24/0x30 +[ 123.549113] +[ 123.550795] Freed by task 518: +[ 123.554231] save_stack+0x19/0x80 +[ 123.557958] __kasan_slab_free+0x125/0x170 +[ 123.562556] kfree+0xd7/0x3a0 +[ 123.565895] mlxsw_sp_acl_rule_destroy+0x63/0xd0 +[ 123.571081] mlxsw_sp2_mr_tcam_route_destroy+0xd5/0x130 +[ 123.576946] mlxsw_sp_mr_tcam_route_destroy+0xba/0x260 +[ 123.582714] mlxsw_sp_mr_table_destroy+0x1ab/0x290 +[ 123.588091] mlxsw_sp_vr_put+0x1db/0x350 +[ 123.592496] mlxsw_sp_fib_node_put+0x298/0x4c0 +[ 123.597486] mlxsw_sp_vr_fib_flush+0x15b/0x360 +[ 123.602476] mlxsw_sp_router_fib_flush+0xba/0x470 +[ 123.607756] mlxsw_sp_vrs_fini+0xaa/0x120 +[ 123.612260] mlxsw_sp_router_fini+0x137/0x384 +[ 123.617152] mlxsw_sp_fini+0x30a/0x4a0 +[ 123.621374] mlxsw_core_bus_device_unregister+0x159/0x600 +[ 123.627435] mlxsw_devlink_core_bus_device_reload_down+0x7e/0xb0 +[ 123.634176] devlink_reload+0xb4/0x380 +[ 123.638391] devlink_nl_cmd_reload+0x610/0x700 +[ 123.643382] genl_rcv_msg+0x6a8/0xdc0 +[ 123.647497] netlink_rcv_skb+0x134/0x3a0 +[ 123.651904] genl_rcv+0x29/0x40 +[ 123.655436] netlink_unicast+0x4d4/0x700 +[ 123.659843] netlink_sendmsg+0x7c0/0xc70 +[ 123.664251] __sys_sendto+0x265/0x3c0 +[ 123.668367] __x64_sys_sendto+0xe2/0x1b0 +[ 123.672773] do_syscall_64+0xa0/0x530 +[ 123.676892] entry_SYSCALL_64_after_hwframe+0x49/0xbe +[ 123.682552] +[ 123.684238] The buggy address belongs to the object at ffff8881f3bb4500 +[ 123.684238] which belongs to the cache kmalloc-128 of size 128 +[ 123.698261] The buggy address is located 32 bytes inside of +[ 123.698261] 128-byte region [ffff8881f3bb4500, ffff8881f3bb4580) +[ 123.711303] The buggy address belongs to the page: +[ 123.716682] page:ffffea0007ceed00 refcount:1 mapcount:0 mapping:ffff888236403500 index:0x0 +[ 123.725958] raw: 0200000000000200 dead000000000100 dead000000000122 ffff888236403500 +[ 123.734646] raw: 0000000000000000 0000000000100010 00000001ffffffff 0000000000000000 +[ 123.743315] page dumped because: kasan: bad access detected +[ 123.749562] +[ 123.751241] Memory state around the buggy address: +[ 123.756620] ffff8881f3bb4400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 123.764716] ffff8881f3bb4480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 123.772812] >ffff8881f3bb4500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb +[ 123.780904] ^ +[ 123.785697] ffff8881f3bb4580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +[ 123.793793] ffff8881f3bb4600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 123.801883] ================================================================== + +Fixes: cf7221a4f5a5 ("mlxsw: spectrum_router: Add Multicast routing support for Spectrum-2") +Signed-off-by: Ido Schimmel +Acked-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -25,6 +26,7 @@ struct mlxsw_sp_acl { + struct mlxsw_sp_fid *dummy_fid; + struct rhashtable ruleset_ht; + struct list_head rules; ++ struct mutex rules_lock; /* Protects rules list */ + struct { + struct delayed_work dw; + unsigned long interval; /* ms */ +@@ -701,7 +703,9 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_s + goto err_ruleset_block_bind; + } + ++ mutex_lock(&mlxsw_sp->acl->rules_lock); + list_add_tail(&rule->list, &mlxsw_sp->acl->rules); ++ mutex_unlock(&mlxsw_sp->acl->rules_lock); + block->rule_count++; + block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker; + return 0; +@@ -723,7 +727,9 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_ + + block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker; + ruleset->ht_key.block->rule_count--; ++ mutex_lock(&mlxsw_sp->acl->rules_lock); + list_del(&rule->list); ++ mutex_unlock(&mlxsw_sp->acl->rules_lock); + if (!ruleset->ht_key.chain_index && + mlxsw_sp_acl_ruleset_is_singular(ruleset)) + mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset, +@@ -783,19 +789,18 @@ static int mlxsw_sp_acl_rules_activity_u + struct mlxsw_sp_acl_rule *rule; + int err; + +- /* Protect internal structures from changes */ +- rtnl_lock(); ++ mutex_lock(&acl->rules_lock); + list_for_each_entry(rule, &acl->rules, list) { + err = mlxsw_sp_acl_rule_activity_update(acl->mlxsw_sp, + rule); + if (err) + goto err_rule_update; + } +- rtnl_unlock(); ++ mutex_unlock(&acl->rules_lock); + return 0; + + err_rule_update: +- rtnl_unlock(); ++ mutex_unlock(&acl->rules_lock); + return err; + } + +@@ -880,6 +885,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *m + acl->dummy_fid = fid; + + INIT_LIST_HEAD(&acl->rules); ++ mutex_init(&acl->rules_lock); + err = mlxsw_sp_acl_tcam_init(mlxsw_sp, &acl->tcam); + if (err) + goto err_acl_ops_init; +@@ -892,6 +898,7 @@ int mlxsw_sp_acl_init(struct mlxsw_sp *m + return 0; + + err_acl_ops_init: ++ mutex_destroy(&acl->rules_lock); + mlxsw_sp_fid_put(fid); + err_fid_get: + rhashtable_destroy(&acl->ruleset_ht); +@@ -908,6 +915,7 @@ void mlxsw_sp_acl_fini(struct mlxsw_sp * + + cancel_delayed_work_sync(&mlxsw_sp->acl->rule_activity_update.dw); + mlxsw_sp_acl_tcam_fini(mlxsw_sp, &acl->tcam); ++ mutex_destroy(&acl->rules_lock); + WARN_ON(!list_empty(&acl->rules)); + mlxsw_sp_fid_put(acl->dummy_fid); + rhashtable_destroy(&acl->ruleset_ht); diff --git a/queue-5.4/net-bcmgenet-use-netif_tx_napi_add-for-tx-napi.patch b/queue-5.4/net-bcmgenet-use-netif_tx_napi_add-for-tx-napi.patch new file mode 100644 index 00000000000..bf411f29efa --- /dev/null +++ b/queue-5.4/net-bcmgenet-use-netif_tx_napi_add-for-tx-napi.patch @@ -0,0 +1,36 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Florian Fainelli +Date: Thu, 23 Jan 2020 09:49:34 -0800 +Subject: net: bcmgenet: Use netif_tx_napi_add() for TX NAPI + +From: Florian Fainelli + +[ Upstream commit 148965df1a990af98b2c84092c2a2274c7489284 ] + +Before commit 7587935cfa11 ("net: bcmgenet: move NAPI initialization to +ring initialization") moved the code, this used to be +netif_tx_napi_add(), but we lost that small semantic change in the +process, restore that. + +Fixes: 7587935cfa11 ("net: bcmgenet: move NAPI initialization to ring initialization") +Signed-off-by: Florian Fainelli +Acked-by: Doug Berger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c ++++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c +@@ -2164,8 +2164,8 @@ static void bcmgenet_init_tx_ring(struct + DMA_END_ADDR); + + /* Initialize Tx NAPI */ +- netif_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, +- NAPI_POLL_WEIGHT); ++ netif_tx_napi_add(priv->dev, &ring->napi, bcmgenet_tx_poll, ++ NAPI_POLL_WEIGHT); + } + + /* Initialize a RDMA ring */ diff --git a/queue-5.4/net-cxgb3_main-add-cap_net_admin-check-to-chelsio_get_mem.patch b/queue-5.4/net-cxgb3_main-add-cap_net_admin-check-to-chelsio_get_mem.patch new file mode 100644 index 00000000000..410c20049a9 --- /dev/null +++ b/queue-5.4/net-cxgb3_main-add-cap_net_admin-check-to-chelsio_get_mem.patch @@ -0,0 +1,45 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Michael Ellerman +Date: Fri, 24 Jan 2020 20:41:44 +1100 +Subject: net: cxgb3_main: Add CAP_NET_ADMIN check to CHELSIO_GET_MEM + +From: Michael Ellerman + +[ Upstream commit 3546d8f1bbe992488ed91592cf6bf76e7114791a = + +The cxgb3 driver for "Chelsio T3-based gigabit and 10Gb Ethernet +adapters" implements a custom ioctl as SIOCCHIOCTL/SIOCDEVPRIVATE in +cxgb_extension_ioctl(). + +One of the subcommands of the ioctl is CHELSIO_GET_MEM, which appears +to read memory directly out of the adapter and return it to userspace. +It's not entirely clear what the contents of the adapter memory +contains, but the assumption is that it shouldn't be accessible to all +users. + +So add a CAP_NET_ADMIN check to the CHELSIO_GET_MEM case. Put it after +the is_offload() check, which matches two of the other subcommands in +the same function which also check for is_offload() and CAP_NET_ADMIN. + +Found by Ilja by code inspection, not tested as I don't have the +required hardware. + +Reported-by: Ilja Van Sprundel +Signed-off-by: Michael Ellerman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c ++++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +@@ -2448,6 +2448,8 @@ static int cxgb_extension_ioctl(struct n + + if (!is_offload(adapter)) + return -EOPNOTSUPP; ++ if (!capable(CAP_NET_ADMIN)) ++ return -EPERM; + if (!(adapter->flags & FULL_INIT_DONE)) + return -EIO; /* need the memory controllers */ + if (copy_from_user(&t, useraddr, sizeof(t))) diff --git a/queue-5.4/net-fix-packet-reordering-caused-by-gro-and-listified-rx-cooperation.patch b/queue-5.4/net-fix-packet-reordering-caused-by-gro-and-listified-rx-cooperation.patch new file mode 100644 index 00000000000..b6b30ea454d --- /dev/null +++ b/queue-5.4/net-fix-packet-reordering-caused-by-gro-and-listified-rx-cooperation.patch @@ -0,0 +1,221 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Maxim Mikityanskiy +Date: Tue, 21 Jan 2020 15:09:40 +0000 +Subject: net: Fix packet reordering caused by GRO and listified RX cooperation + +From: Maxim Mikityanskiy + +[ Upstream commit c80794323e82ac6ab45052ebba5757ce47b4b588 ] + +Commit 323ebb61e32b ("net: use listified RX for handling GRO_NORMAL +skbs") introduces batching of GRO_NORMAL packets in napi_frags_finish, +and commit 6570bc79c0df ("net: core: use listified Rx for GRO_NORMAL in +napi_gro_receive()") adds the same to napi_skb_finish. However, +dev_gro_receive (that is called just before napi_{frags,skb}_finish) can +also pass skbs to the networking stack: e.g., when the GRO session is +flushed, napi_gro_complete is called, which passes pp directly to +netif_receive_skb_internal, skipping napi->rx_list. It means that the +packet stored in pp will be handled by the stack earlier than the +packets that arrived before, but are still waiting in napi->rx_list. It +leads to TCP reorderings that can be observed in the TCPOFOQueue counter +in netstat. + +This commit fixes the reordering issue by making napi_gro_complete also +use napi->rx_list, so that all packets going through GRO will keep their +order. In order to keep napi_gro_flush working properly, gro_normal_list +calls are moved after the flush to clear napi->rx_list. + +iwlwifi calls napi_gro_flush directly and does the same thing that is +done by gro_normal_list, so the same change is applied there: +napi_gro_flush is moved to be before the flush of napi->rx_list. + +A few other drivers also use napi_gro_flush (brocade/bna/bnad.c, +cortina/gemini.c, hisilicon/hns3/hns3_enet.c). The first two also use +napi_complete_done afterwards, which performs the gro_normal_list flush, +so they are fine. The latter calls napi_gro_receive right after +napi_gro_flush, so it can end up with non-empty napi->rx_list anyway. + +Fixes: 323ebb61e32b ("net: use listified RX for handling GRO_NORMAL skbs") +Signed-off-by: Maxim Mikityanskiy +Cc: Alexander Lobakin +Cc: Edward Cree +Acked-by: Alexander Lobakin +Acked-by: Saeed Mahameed +Acked-by: Edward Cree +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/wireless/intel/iwlwifi/pcie/rx.c | 4 - + net/core/dev.c | 64 +++++++++++++-------------- + 2 files changed, 35 insertions(+), 33 deletions(-) + +--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c ++++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +@@ -1537,13 +1537,13 @@ out: + + napi = &rxq->napi; + if (napi->poll) { ++ napi_gro_flush(napi, false); ++ + if (napi->rx_count) { + netif_receive_skb_list(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; + } +- +- napi_gro_flush(napi, false); + } + + iwl_pcie_rxq_restock(trans, rxq); +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -5270,9 +5270,29 @@ static void flush_all_backlogs(void) + put_online_cpus(); + } + ++/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ ++static void gro_normal_list(struct napi_struct *napi) ++{ ++ if (!napi->rx_count) ++ return; ++ netif_receive_skb_list_internal(&napi->rx_list); ++ INIT_LIST_HEAD(&napi->rx_list); ++ napi->rx_count = 0; ++} ++ ++/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, ++ * pass the whole batch up to the stack. ++ */ ++static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) ++{ ++ list_add_tail(&skb->list, &napi->rx_list); ++ if (++napi->rx_count >= gro_normal_batch) ++ gro_normal_list(napi); ++} ++ + INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int)); + INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int)); +-static int napi_gro_complete(struct sk_buff *skb) ++static int napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb) + { + struct packet_offload *ptype; + __be16 type = skb->protocol; +@@ -5305,7 +5325,8 @@ static int napi_gro_complete(struct sk_b + } + + out: +- return netif_receive_skb_internal(skb); ++ gro_normal_one(napi, skb); ++ return NET_RX_SUCCESS; + } + + static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index, +@@ -5318,7 +5339,7 @@ static void __napi_gro_flush_chain(struc + if (flush_old && NAPI_GRO_CB(skb)->age == jiffies) + return; + skb_list_del_init(skb); +- napi_gro_complete(skb); ++ napi_gro_complete(napi, skb); + napi->gro_hash[index].count--; + } + +@@ -5421,7 +5442,7 @@ static void gro_pull_from_frag0(struct s + } + } + +-static void gro_flush_oldest(struct list_head *head) ++static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) + { + struct sk_buff *oldest; + +@@ -5437,7 +5458,7 @@ static void gro_flush_oldest(struct list + * SKB to the chain. + */ + skb_list_del_init(oldest); +- napi_gro_complete(oldest); ++ napi_gro_complete(napi, oldest); + } + + INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *, +@@ -5513,7 +5534,7 @@ static enum gro_result dev_gro_receive(s + + if (pp) { + skb_list_del_init(pp); +- napi_gro_complete(pp); ++ napi_gro_complete(napi, pp); + napi->gro_hash[hash].count--; + } + +@@ -5524,7 +5545,7 @@ static enum gro_result dev_gro_receive(s + goto normal; + + if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) { +- gro_flush_oldest(gro_head); ++ gro_flush_oldest(napi, gro_head); + } else { + napi->gro_hash[hash].count++; + } +@@ -5672,26 +5693,6 @@ struct sk_buff *napi_get_frags(struct na + } + EXPORT_SYMBOL(napi_get_frags); + +-/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +-static void gro_normal_list(struct napi_struct *napi) +-{ +- if (!napi->rx_count) +- return; +- netif_receive_skb_list_internal(&napi->rx_list); +- INIT_LIST_HEAD(&napi->rx_list); +- napi->rx_count = 0; +-} +- +-/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, +- * pass the whole batch up to the stack. +- */ +-static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) +-{ +- list_add_tail(&skb->list, &napi->rx_list); +- if (++napi->rx_count >= gro_normal_batch) +- gro_normal_list(napi); +-} +- + static gro_result_t napi_frags_finish(struct napi_struct *napi, + struct sk_buff *skb, + gro_result_t ret) +@@ -5979,8 +5980,6 @@ bool napi_complete_done(struct napi_stru + NAPIF_STATE_IN_BUSY_POLL))) + return false; + +- gro_normal_list(n); +- + if (n->gro_bitmask) { + unsigned long timeout = 0; + +@@ -5996,6 +5995,9 @@ bool napi_complete_done(struct napi_stru + hrtimer_start(&n->timer, ns_to_ktime(timeout), + HRTIMER_MODE_REL_PINNED); + } ++ ++ gro_normal_list(n); ++ + if (unlikely(!list_empty(&n->poll_list))) { + /* If n->poll_list is not empty, we need to mask irqs */ + local_irq_save(flags); +@@ -6327,8 +6329,6 @@ static int napi_poll(struct napi_struct + goto out_unlock; + } + +- gro_normal_list(n); +- + if (n->gro_bitmask) { + /* flush too old packets + * If HZ < 1000, flush all packets. +@@ -6336,6 +6336,8 @@ static int napi_poll(struct napi_struct + napi_gro_flush(n, HZ >= 1000); + } + ++ gro_normal_list(n); ++ + /* Some drivers may have called napi_schedule + * prior to exhausting their budget. + */ diff --git a/queue-5.4/net-ip6_gre-fix-moving-ip6gre-between-namespaces.patch b/queue-5.4/net-ip6_gre-fix-moving-ip6gre-between-namespaces.patch new file mode 100644 index 00000000000..e6ac8b74ea7 --- /dev/null +++ b/queue-5.4/net-ip6_gre-fix-moving-ip6gre-between-namespaces.patch @@ -0,0 +1,52 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Niko Kortstrom +Date: Thu, 16 Jan 2020 11:43:27 +0200 +Subject: net: ip6_gre: fix moving ip6gre between namespaces + +From: Niko Kortstrom + +[ Upstream commit 690afc165bb314354667f67157c1a1aea7dc797a ] + +Support for moving IPv4 GRE tunnels between namespaces was added in +commit b57708add314 ("gre: add x-netns support"). The respective change +for IPv6 tunnels, commit 22f08069e8b4 ("ip6gre: add x-netns support") +did not drop NETIF_F_NETNS_LOCAL flag so moving them from one netns to +another is still denied in IPv6 case. Drop NETIF_F_NETNS_LOCAL flag from +ip6gre tunnels to allow moving ip6gre tunnel endpoints between network +namespaces. + +Signed-off-by: Niko Kortstrom +Acked-by: Nicolas Dichtel +Acked-by: William Tu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 3 --- + 1 file changed, 3 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -1466,7 +1466,6 @@ static int ip6gre_tunnel_init_common(str + dev->mtu -= 8; + + if (tunnel->parms.collect_md) { +- dev->features |= NETIF_F_NETNS_LOCAL; + netif_keep_dst(dev); + } + ip6gre_tnl_init_features(dev); +@@ -1894,7 +1893,6 @@ static void ip6gre_tap_setup(struct net_ + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; + +- dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); +@@ -2197,7 +2195,6 @@ static void ip6erspan_tap_setup(struct n + dev->needs_free_netdev = true; + dev->priv_destructor = ip6gre_dev_free; + +- dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); diff --git a/queue-5.4/net-ip6_tunnel-fix-namespaces-move.patch b/queue-5.4/net-ip6_tunnel-fix-namespaces-move.patch new file mode 100644 index 00000000000..b09f81124ee --- /dev/null +++ b/queue-5.4/net-ip6_tunnel-fix-namespaces-move.patch @@ -0,0 +1,37 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: William Dauchy +Date: Tue, 21 Jan 2020 21:49:54 +0100 +Subject: net, ip6_tunnel: fix namespaces move + +From: William Dauchy + +[ Upstream commit 5311a69aaca30fa849c3cc46fb25f75727fb72d0 ] + +in the same manner as commit d0f418516022 ("net, ip_tunnel: fix +namespaces move"), fix namespace moving as it was broken since commit +8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnel"), but for +ipv6 this time; there is no reason to keep it for ip6_tunnel. + +Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnel") +Signed-off-by: William Dauchy +Acked-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1877,10 +1877,8 @@ static int ip6_tnl_dev_init(struct net_d + if (err) + return err; + ip6_tnl_link_config(t); +- if (t->parms.collect_md) { +- dev->features |= NETIF_F_NETNS_LOCAL; ++ if (t->parms.collect_md) + netif_keep_dst(dev); +- } + return 0; + } + diff --git a/queue-5.4/net-ip_tunnel-fix-namespaces-move.patch b/queue-5.4/net-ip_tunnel-fix-namespaces-move.patch new file mode 100644 index 00000000000..466272caddd --- /dev/null +++ b/queue-5.4/net-ip_tunnel-fix-namespaces-move.patch @@ -0,0 +1,40 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: William Dauchy +Date: Tue, 21 Jan 2020 15:26:24 +0100 +Subject: net, ip_tunnel: fix namespaces move + +From: William Dauchy + +[ Upstream commit d0f418516022c32ecceaf4275423e5bd3f8743a9 ] + +in the same manner as commit 690afc165bb3 ("net: ip6_gre: fix moving +ip6gre between namespaces"), fix namespace moving as it was broken since +commit 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata."). +Indeed, the ip6_gre commit removed the local flag for collect_md +condition, so there is no reason to keep it for ip_gre/ip_tunnel. + +this patch will fix both ip_tunnel and ip_gre modules. + +Fixes: 2e15ea390e6f ("ip_gre: Add support to collect tunnel metadata.") +Signed-off-by: William Dauchy +Acked-by: Nicolas Dichtel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_tunnel.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/net/ipv4/ip_tunnel.c ++++ b/net/ipv4/ip_tunnel.c +@@ -1236,10 +1236,8 @@ int ip_tunnel_init(struct net_device *de + iph->version = 4; + iph->ihl = 5; + +- if (tunnel->collect_md) { +- dev->features |= NETIF_F_NETNS_LOCAL; ++ if (tunnel->collect_md) + netif_keep_dst(dev); +- } + return 0; + } + EXPORT_SYMBOL_GPL(ip_tunnel_init); diff --git a/queue-5.4/net-mlx5-dr-enable-counter-on-non-fwd-dest-objects.patch b/queue-5.4/net-mlx5-dr-enable-counter-on-non-fwd-dest-objects.patch new file mode 100644 index 00000000000..97b1abfae68 --- /dev/null +++ b/queue-5.4/net-mlx5-dr-enable-counter-on-non-fwd-dest-objects.patch @@ -0,0 +1,85 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Erez Shitrit +Date: Wed, 8 Jan 2020 14:17:32 +0200 +Subject: net/mlx5: DR, Enable counter on non-fwd-dest objects + +From: Erez Shitrit + +The current code handles only counters that attached to dest, we still +have the cases where we have counter on non-dest, like over drop etc. + +Fixes: 6a48faeeca10 ("net/mlx5: Add direct rule fs_cmd implementation") +Signed-off-by: Hamdan Igbaria +Signed-off-by: Erez Shitrit +Reviewed-by: Alex Vesker +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c | 42 ++++++++++----- + 1 file changed, 29 insertions(+), 13 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c +@@ -352,26 +352,16 @@ static int mlx5_cmd_dr_create_fte(struct + if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { + list_for_each_entry(dst, &fte->node.children, node.list) { + enum mlx5_flow_destination_type type = dst->dest_attr.type; +- u32 id; + + if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { + err = -ENOSPC; + goto free_actions; + } + +- switch (type) { +- case MLX5_FLOW_DESTINATION_TYPE_COUNTER: +- id = dst->dest_attr.counter_id; ++ if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER) ++ continue; + +- tmp_action = +- mlx5dr_action_create_flow_counter(id); +- if (!tmp_action) { +- err = -ENOMEM; +- goto free_actions; +- } +- fs_dr_actions[fs_dr_num_actions++] = tmp_action; +- actions[num_actions++] = tmp_action; +- break; ++ switch (type) { + case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE: + tmp_action = create_ft_action(dev, dst); + if (!tmp_action) { +@@ -397,6 +387,32 @@ static int mlx5_cmd_dr_create_fte(struct + } + } + ++ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { ++ list_for_each_entry(dst, &fte->node.children, node.list) { ++ u32 id; ++ ++ if (dst->dest_attr.type != ++ MLX5_FLOW_DESTINATION_TYPE_COUNTER) ++ continue; ++ ++ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { ++ err = -ENOSPC; ++ goto free_actions; ++ } ++ ++ id = dst->dest_attr.counter_id; ++ tmp_action = ++ mlx5dr_action_create_flow_counter(id); ++ if (!tmp_action) { ++ err = -ENOMEM; ++ goto free_actions; ++ } ++ ++ fs_dr_actions[fs_dr_num_actions++] = tmp_action; ++ actions[num_actions++] = tmp_action; ++ } ++ } ++ + params.match_sz = match_sz; + params.match_buf = (u64 *)fte->val; + diff --git a/queue-5.4/net-mlx5-dr-use-non-preemptible-call-to-get-the-current-cpu-number.patch b/queue-5.4/net-mlx5-dr-use-non-preemptible-call-to-get-the-current-cpu-number.patch new file mode 100644 index 00000000000..f1b49ab3f3d --- /dev/null +++ b/queue-5.4/net-mlx5-dr-use-non-preemptible-call-to-get-the-current-cpu-number.patch @@ -0,0 +1,46 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Erez Shitrit +Date: Sun, 12 Jan 2020 08:57:59 +0200 +Subject: net/mlx5: DR, use non preemptible call to get the current cpu number + +From: Erez Shitrit + +Use raw_smp_processor_id instead of smp_processor_id() otherwise we will +get the following trace in debug-kernel: + BUG: using smp_processor_id() in preemptible [00000000] code: devlink + caller is dr_create_cq.constprop.2+0x31d/0x970 [mlx5_core] + Call Trace: + dump_stack+0x9a/0xf0 + debug_smp_processor_id+0x1f3/0x200 + dr_create_cq.constprop.2+0x31d/0x970 + genl_family_rcv_msg+0x5fd/0x1170 + genl_rcv_msg+0xb8/0x160 + netlink_rcv_skb+0x11e/0x340 + +Fixes: 297cccebdc5a ("net/mlx5: DR, Expose an internal API to issue RDMA operations") +Signed-off-by: Erez Shitrit +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c +@@ -1,6 +1,7 @@ + // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + /* Copyright (c) 2019 Mellanox Technologies. */ + ++#include + #include "dr_types.h" + + #define QUEUE_SIZE 128 +@@ -729,7 +730,7 @@ static struct mlx5dr_cq *dr_create_cq(st + if (!in) + goto err_cqwq; + +- vector = smp_processor_id() % mlx5_comp_vectors_count(mdev); ++ vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev); + err = mlx5_vector2eqn(mdev, vector, &eqn, &irqn); + if (err) { + kvfree(in); diff --git a/queue-5.4/net-mlx5-e-switch-prevent-ingress-rate-configuration-of-uplink-rep.patch b/queue-5.4/net-mlx5-e-switch-prevent-ingress-rate-configuration-of-uplink-rep.patch new file mode 100644 index 00000000000..5ea69616d27 --- /dev/null +++ b/queue-5.4/net-mlx5-e-switch-prevent-ingress-rate-configuration-of-uplink-rep.patch @@ -0,0 +1,46 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Eli Cohen +Date: Sun, 12 Jan 2020 13:43:37 +0200 +Subject: net/mlx5: E-Switch, Prevent ingress rate configuration of uplink rep + +From: Eli Cohen + +Since the implementation relies on limiting the VF transmit rate to +simulate ingress rate limiting, and since either uplink representor or +ecpf are not associated with a VF, we limit the rate limit configuration +for those ports. + +Fixes: fcb64c0f5640 ("net/mlx5: E-Switch, add ingress rate support") +Signed-off-by: Eli Cohen +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -3951,6 +3951,13 @@ static int apply_police_params(struct ml + u32 rate_mbps; + int err; + ++ vport_num = rpriv->rep->vport; ++ if (vport_num >= MLX5_VPORT_ECPF) { ++ NL_SET_ERR_MSG_MOD(extack, ++ "Ingress rate limit is supported only for Eswitch ports connected to VFs"); ++ return -EOPNOTSUPP; ++ } ++ + esw = priv->mdev->priv.eswitch; + /* rate is given in bytes/sec. + * First convert to bits/sec and then round to the nearest mbit/secs. +@@ -3959,8 +3966,6 @@ static int apply_police_params(struct ml + * 1 mbit/sec. + */ + rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0; +- vport_num = rpriv->rep->vport; +- + err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); + if (err) + NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); diff --git a/queue-5.4/net-mlx5-fix-lowest-fdb-pool-size.patch b/queue-5.4/net-mlx5-fix-lowest-fdb-pool-size.patch new file mode 100644 index 00000000000..e622f9ab67a --- /dev/null +++ b/queue-5.4/net-mlx5-fix-lowest-fdb-pool-size.patch @@ -0,0 +1,35 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Paul Blakey +Date: Tue, 31 Dec 2019 17:04:15 +0200 +Subject: net/mlx5: Fix lowest FDB pool size + +From: Paul Blakey + +The pool sizes represent the pool sizes in the fw. when we request +a pool size from fw, it will return the next possible group. +We track how many pools the fw has left and start requesting groups +from the big to the small. +When we start request 4k group, which doesn't exists in fw, fw +wants to allocate the next possible size, 64k, but will fail since +its exhausted. The correct smallest pool size in fw is 128 and not 4k. + +Fixes: e52c28024008 ("net/mlx5: E-Switch, Add chains and priorities") +Signed-off-by: Paul Blakey +Reviewed-by: Roi Dayan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +@@ -858,7 +858,7 @@ out: + */ + #define ESW_SIZE (16 * 1024 * 1024) + const unsigned int ESW_POOLS[4] = { 4 * 1024 * 1024, 1 * 1024 * 1024, +- 64 * 1024, 4 * 1024 }; ++ 64 * 1024, 128 }; + + static int + get_sz_from_pool(struct mlx5_eswitch *esw) diff --git a/queue-5.4/net-mlx5-update-the-list-of-the-pci-supported-devices.patch b/queue-5.4/net-mlx5-update-the-list-of-the-pci-supported-devices.patch new file mode 100644 index 00000000000..9b99dd78dbc --- /dev/null +++ b/queue-5.4/net-mlx5-update-the-list-of-the-pci-supported-devices.patch @@ -0,0 +1,28 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Meir Lichtinger +Date: Thu, 12 Dec 2019 16:09:33 +0200 +Subject: net/mlx5: Update the list of the PCI supported devices + +From: Meir Lichtinger + +Add the upcoming ConnectX-7 device ID. + +Fixes: 85327a9c4150 ("net/mlx5: Update the list of the PCI supported devices") +Signed-off-by: Meir Lichtinger +Reviewed-by: Eran Ben Elisha +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -1569,6 +1569,7 @@ static const struct pci_device_id mlx5_c + { PCI_VDEVICE(MELLANOX, 0x101d) }, /* ConnectX-6 Dx */ + { PCI_VDEVICE(MELLANOX, 0x101e), MLX5_PCI_DEV_IS_VF}, /* ConnectX Family mlx5Gen Virtual Function */ + { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */ ++ { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ + { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ + { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ + { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ diff --git a/queue-5.4/net-mlx5e-ktls-do-not-send-decrypted-marked-skbs-via-non-accel-path.patch b/queue-5.4/net-mlx5e-ktls-do-not-send-decrypted-marked-skbs-via-non-accel-path.patch new file mode 100644 index 00000000000..f466a0cd428 --- /dev/null +++ b/queue-5.4/net-mlx5e-ktls-do-not-send-decrypted-marked-skbs-via-non-accel-path.patch @@ -0,0 +1,53 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Tariq Toukan +Date: Mon, 20 Jan 2020 13:42:00 +0200 +Subject: net/mlx5e: kTLS, Do not send decrypted-marked SKBs via non-accel path + +From: Tariq Toukan + +When TCP out-of-order is identified (unexpected tcp seq mismatch), driver +analyzes the packet and decides what handling should it get: +1. go to accelerated path (to be encrypted in HW), +2. go to regular xmit path (send w/o encryption), +3. drop. + +Packets marked with skb->decrypted by the TLS stack in the TX flow skips +SW encryption, and rely on the HW offload. +Verify that such packets are never sent un-encrypted on the wire. +Add a WARN to catch such bugs, and prefer dropping the packet in these cases. + +Fixes: 46a3ea98074e ("net/mlx5e: kTLS, Enhance TX resync flow") +Signed-off-by: Tariq Toukan +Signed-off-by: Boris Pismenny +Reviewed-by: Boris Pismenny +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 14 +++++++++---- + 1 file changed, 10 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +@@ -458,12 +458,18 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb + enum mlx5e_ktls_sync_retval ret = + mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq); + +- if (likely(ret == MLX5E_KTLS_SYNC_DONE)) ++ switch (ret) { ++ case MLX5E_KTLS_SYNC_DONE: + *wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi); +- else if (ret == MLX5E_KTLS_SYNC_FAIL) ++ break; ++ case MLX5E_KTLS_SYNC_SKIP_NO_DATA: ++ if (likely(!skb->decrypted)) ++ goto out; ++ WARN_ON_ONCE(1); ++ /* fall-through */ ++ default: /* MLX5E_KTLS_SYNC_FAIL */ + goto err_out; +- else /* ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA */ +- goto out; ++ } + } + + priv_tx->expected_seq = seq + datalen; diff --git a/queue-5.4/net-mlx5e-ktls-fix-corner-case-checks-in-tx-resync-flow.patch b/queue-5.4/net-mlx5e-ktls-fix-corner-case-checks-in-tx-resync-flow.patch new file mode 100644 index 00000000000..648d22c5c72 --- /dev/null +++ b/queue-5.4/net-mlx5e-ktls-fix-corner-case-checks-in-tx-resync-flow.patch @@ -0,0 +1,111 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Tariq Toukan +Date: Sun, 12 Jan 2020 16:22:14 +0200 +Subject: net/mlx5e: kTLS, Fix corner-case checks in TX resync flow + +From: Tariq Toukan + +There are the following cases: + +1. Packet ends before start marker: bypass offload. +2. Packet starts before start marker and ends after it: drop, + not supported, breaks contract with kernel. +3. packet ends before tls record info starts: drop, + this packet was already acknowledged and its record info + was released. + +Add the above as comment in code. + +Mind possible wraparounds of the TCP seq, replace the simple comparison +with a call to the TCP before() method. + +In addition, remove logic that handles negative sync_len values, +as it became impossible. + +Fixes: d2ead1f360e8 ("net/mlx5e: Add kTLS TX HW offload support") +Fixes: 46a3ea98074e ("net/mlx5e: kTLS, Enhance TX resync flow") +Signed-off-by: Tariq Toukan +Signed-off-by: Boris Pismenny +Reviewed-by: Boris Pismenny +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 33 +++++++------ + 1 file changed, 19 insertions(+), 14 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +@@ -180,7 +180,7 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx + + struct tx_sync_info { + u64 rcd_sn; +- s32 sync_len; ++ u32 sync_len; + int nr_frags; + skb_frag_t frags[MAX_SKB_FRAGS]; + }; +@@ -193,13 +193,14 @@ enum mlx5e_ktls_sync_retval { + + static enum mlx5e_ktls_sync_retval + tx_sync_info_get(struct mlx5e_ktls_offload_context_tx *priv_tx, +- u32 tcp_seq, struct tx_sync_info *info) ++ u32 tcp_seq, int datalen, struct tx_sync_info *info) + { + struct tls_offload_context_tx *tx_ctx = priv_tx->tx_ctx; + enum mlx5e_ktls_sync_retval ret = MLX5E_KTLS_SYNC_DONE; + struct tls_record_info *record; + int remaining, i = 0; + unsigned long flags; ++ bool ends_before; + + spin_lock_irqsave(&tx_ctx->lock, flags); + record = tls_get_record(tx_ctx, tcp_seq, &info->rcd_sn); +@@ -209,9 +210,21 @@ tx_sync_info_get(struct mlx5e_ktls_offlo + goto out; + } + +- if (unlikely(tcp_seq < tls_record_start_seq(record))) { +- ret = tls_record_is_start_marker(record) ? +- MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL; ++ /* There are the following cases: ++ * 1. packet ends before start marker: bypass offload. ++ * 2. packet starts before start marker and ends after it: drop, ++ * not supported, breaks contract with kernel. ++ * 3. packet ends before tls record info starts: drop, ++ * this packet was already acknowledged and its record info ++ * was released. ++ */ ++ ends_before = before(tcp_seq + datalen, tls_record_start_seq(record)); ++ ++ if (unlikely(tls_record_is_start_marker(record))) { ++ ret = ends_before ? MLX5E_KTLS_SYNC_SKIP_NO_DATA : MLX5E_KTLS_SYNC_FAIL; ++ goto out; ++ } else if (ends_before) { ++ ret = MLX5E_KTLS_SYNC_FAIL; + goto out; + } + +@@ -337,7 +350,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_kt + u8 num_wqebbs; + int i = 0; + +- ret = tx_sync_info_get(priv_tx, seq, &info); ++ ret = tx_sync_info_get(priv_tx, seq, datalen, &info); + if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) { + if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) { + stats->tls_skip_no_sync_data++; +@@ -351,14 +364,6 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_kt + goto err_out; + } + +- if (unlikely(info.sync_len < 0)) { +- if (likely(datalen <= -info.sync_len)) +- return MLX5E_KTLS_SYNC_DONE; +- +- stats->tls_drop_bypass_req++; +- goto err_out; +- } +- + stats->tls_ooo++; + + tx_post_resync_params(sq, priv_tx, info.rcd_sn); diff --git a/queue-5.4/net-mlx5e-ktls-remove-redundant-posts-in-tx-resync-flow.patch b/queue-5.4/net-mlx5e-ktls-remove-redundant-posts-in-tx-resync-flow.patch new file mode 100644 index 00000000000..85cc014bf27 --- /dev/null +++ b/queue-5.4/net-mlx5e-ktls-remove-redundant-posts-in-tx-resync-flow.patch @@ -0,0 +1,31 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Tariq Toukan +Date: Mon, 13 Jan 2020 14:46:09 +0200 +Subject: net/mlx5e: kTLS, Remove redundant posts in TX resync flow + +From: Tariq Toukan + +The call to tx_post_resync_params() is done earlier in the flow, +the post of the control WQEs is unnecessarily repeated. Remove it. + +Fixes: 700ec4974240 ("net/mlx5e: kTLS, Fix missing SQ edge fill") +Signed-off-by: Tariq Toukan +Signed-off-by: Boris Pismenny +Reviewed-by: Boris Pismenny +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +@@ -383,8 +383,6 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_kt + if (unlikely(contig_wqebbs_room < num_wqebbs)) + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); + +- tx_post_resync_params(sq, priv_tx, info.rcd_sn); +- + for (; i < info.nr_frags; i++) { + unsigned int orig_fsz, frag_offset = 0, n = 0; + skb_frag_t *f = &info.frags[i]; diff --git a/queue-5.4/net-rtnetlink-validate-ifla_mtu-attribute-in-rtnl_create_link.patch b/queue-5.4/net-rtnetlink-validate-ifla_mtu-attribute-in-rtnl_create_link.patch new file mode 100644 index 00000000000..bf346053554 --- /dev/null +++ b/queue-5.4/net-rtnetlink-validate-ifla_mtu-attribute-in-rtnl_create_link.patch @@ -0,0 +1,177 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Eric Dumazet +Date: Tue, 21 Jan 2020 22:47:29 -0800 +Subject: net: rtnetlink: validate IFLA_MTU attribute in rtnl_create_link() + +From: Eric Dumazet + +[ Upstream commit d836f5c69d87473ff65c06a6123e5b2cf5e56f5b ] + +rtnl_create_link() needs to apply dev->min_mtu and dev->max_mtu +checks that we apply in do_setlink() + +Otherwise malicious users can crash the kernel, for example after +an integer overflow : + +BUG: KASAN: use-after-free in memset include/linux/string.h:365 [inline] +BUG: KASAN: use-after-free in __alloc_skb+0x37b/0x5e0 net/core/skbuff.c:238 +Write of size 32 at addr ffff88819f20b9c0 by task swapper/0/0 + +CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.5.0-rc1-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x197/0x210 lib/dump_stack.c:118 + print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 + __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 + kasan_report+0x12/0x20 mm/kasan/common.c:639 + check_memory_region_inline mm/kasan/generic.c:185 [inline] + check_memory_region+0x134/0x1a0 mm/kasan/generic.c:192 + memset+0x24/0x40 mm/kasan/common.c:108 + memset include/linux/string.h:365 [inline] + __alloc_skb+0x37b/0x5e0 net/core/skbuff.c:238 + alloc_skb include/linux/skbuff.h:1049 [inline] + alloc_skb_with_frags+0x93/0x590 net/core/skbuff.c:5664 + sock_alloc_send_pskb+0x7ad/0x920 net/core/sock.c:2242 + sock_alloc_send_skb+0x32/0x40 net/core/sock.c:2259 + mld_newpack+0x1d7/0x7f0 net/ipv6/mcast.c:1609 + add_grhead.isra.0+0x299/0x370 net/ipv6/mcast.c:1713 + add_grec+0x7db/0x10b0 net/ipv6/mcast.c:1844 + mld_send_cr net/ipv6/mcast.c:1970 [inline] + mld_ifc_timer_expire+0x3d3/0x950 net/ipv6/mcast.c:2477 + call_timer_fn+0x1ac/0x780 kernel/time/timer.c:1404 + expire_timers kernel/time/timer.c:1449 [inline] + __run_timers kernel/time/timer.c:1773 [inline] + __run_timers kernel/time/timer.c:1740 [inline] + run_timer_softirq+0x6c3/0x1790 kernel/time/timer.c:1786 + __do_softirq+0x262/0x98c kernel/softirq.c:292 + invoke_softirq kernel/softirq.c:373 [inline] + irq_exit+0x19b/0x1e0 kernel/softirq.c:413 + exiting_irq arch/x86/include/asm/apic.h:536 [inline] + smp_apic_timer_interrupt+0x1a3/0x610 arch/x86/kernel/apic/apic.c:1137 + apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:829 + +RIP: 0010:native_safe_halt+0xe/0x10 arch/x86/include/asm/irqflags.h:61 +Code: 98 6b ea f9 eb 8a cc cc cc cc cc cc e9 07 00 00 00 0f 00 2d 44 1c 60 00 f4 c3 66 90 e9 07 00 00 00 0f 00 2d 34 1c 60 00 fb f4 cc 55 48 89 e5 41 57 41 56 41 55 41 54 53 e8 4e 5d 9a f9 e8 79 +RSP: 0018:ffffffff89807ce8 EFLAGS: 00000286 ORIG_RAX: ffffffffffffff13 +RAX: 1ffffffff13266ae RBX: ffffffff8987a1c0 RCX: 0000000000000000 +RDX: dffffc0000000000 RSI: 0000000000000006 RDI: ffffffff8987aa54 +RBP: ffffffff89807d18 R08: ffffffff8987a1c0 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000000 R12: dffffc0000000000 +R13: ffffffff8a799980 R14: 0000000000000000 R15: 0000000000000000 + arch_cpu_idle+0xa/0x10 arch/x86/kernel/process.c:690 + default_idle_call+0x84/0xb0 kernel/sched/idle.c:94 + cpuidle_idle_call kernel/sched/idle.c:154 [inline] + do_idle+0x3c8/0x6e0 kernel/sched/idle.c:269 + cpu_startup_entry+0x1b/0x20 kernel/sched/idle.c:361 + rest_init+0x23b/0x371 init/main.c:451 + arch_call_rest_init+0xe/0x1b + start_kernel+0x904/0x943 init/main.c:784 + x86_64_start_reservations+0x29/0x2b arch/x86/kernel/head64.c:490 + x86_64_start_kernel+0x77/0x7b arch/x86/kernel/head64.c:471 + secondary_startup_64+0xa4/0xb0 arch/x86/kernel/head_64.S:242 + +The buggy address belongs to the page: +page:ffffea00067c82c0 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 +raw: 057ffe0000000000 ffffea00067c82c8 ffffea00067c82c8 0000000000000000 +raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff88819f20b880: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff88819f20b900: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff +>ffff88819f20b980: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ^ + ffff88819f20ba00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + ffff88819f20ba80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff + +Fixes: 61e84623ace3 ("net: centralize net_device min/max MTU checking") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/netdevice.h | 2 ++ + net/core/dev.c | 29 +++++++++++++++++++---------- + net/core/rtnetlink.c | 13 +++++++++++-- + 3 files changed, 32 insertions(+), 12 deletions(-) + +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3666,6 +3666,8 @@ int dev_set_alias(struct net_device *, c + int dev_get_alias(const struct net_device *, char *, size_t); + int dev_change_net_namespace(struct net_device *, struct net *, const char *); + int __dev_set_mtu(struct net_device *, int); ++int dev_validate_mtu(struct net_device *dev, int mtu, ++ struct netlink_ext_ack *extack); + int dev_set_mtu_ext(struct net_device *dev, int mtu, + struct netlink_ext_ack *extack); + int dev_set_mtu(struct net_device *, int); +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -7973,6 +7973,22 @@ int __dev_set_mtu(struct net_device *dev + } + EXPORT_SYMBOL(__dev_set_mtu); + ++int dev_validate_mtu(struct net_device *dev, int new_mtu, ++ struct netlink_ext_ack *extack) ++{ ++ /* MTU must be positive, and in range */ ++ if (new_mtu < 0 || new_mtu < dev->min_mtu) { ++ NL_SET_ERR_MSG(extack, "mtu less than device minimum"); ++ return -EINVAL; ++ } ++ ++ if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { ++ NL_SET_ERR_MSG(extack, "mtu greater than device maximum"); ++ return -EINVAL; ++ } ++ return 0; ++} ++ + /** + * dev_set_mtu_ext - Change maximum transfer unit + * @dev: device +@@ -7989,16 +8005,9 @@ int dev_set_mtu_ext(struct net_device *d + if (new_mtu == dev->mtu) + return 0; + +- /* MTU must be positive, and in range */ +- if (new_mtu < 0 || new_mtu < dev->min_mtu) { +- NL_SET_ERR_MSG(extack, "mtu less than device minimum"); +- return -EINVAL; +- } +- +- if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { +- NL_SET_ERR_MSG(extack, "mtu greater than device maximum"); +- return -EINVAL; +- } ++ err = dev_validate_mtu(dev, new_mtu, extack); ++ if (err) ++ return err; + + if (!netif_device_present(dev)) + return -ENODEV; +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -2959,8 +2959,17 @@ struct net_device *rtnl_create_link(stru + dev->rtnl_link_ops = ops; + dev->rtnl_link_state = RTNL_LINK_INITIALIZING; + +- if (tb[IFLA_MTU]) +- dev->mtu = nla_get_u32(tb[IFLA_MTU]); ++ if (tb[IFLA_MTU]) { ++ u32 mtu = nla_get_u32(tb[IFLA_MTU]); ++ int err; ++ ++ err = dev_validate_mtu(dev, mtu, extack); ++ if (err) { ++ free_netdev(dev); ++ return ERR_PTR(err); ++ } ++ dev->mtu = mtu; ++ } + if (tb[IFLA_ADDRESS]) { + memcpy(dev->dev_addr, nla_data(tb[IFLA_ADDRESS]), + nla_len(tb[IFLA_ADDRESS])); diff --git a/queue-5.4/net-sysfs-fix-reference-count-leak.patch b/queue-5.4/net-sysfs-fix-reference-count-leak.patch new file mode 100644 index 00000000000..2bcf5254600 --- /dev/null +++ b/queue-5.4/net-sysfs-fix-reference-count-leak.patch @@ -0,0 +1,99 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Jouni Hogander +Date: Mon, 20 Jan 2020 09:51:03 +0200 +Subject: net-sysfs: Fix reference count leak + +From: Jouni Hogander + +[ Upstream commit cb626bf566eb4433318d35681286c494f04fedcc ] + +Netdev_register_kobject is calling device_initialize. In case of error +reference taken by device_initialize is not given up. + +Drivers are supposed to call free_netdev in case of error. In non-error +case the last reference is given up there and device release sequence +is triggered. In error case this reference is kept and the release +sequence is never started. + +Fix this by setting reg_state as NETREG_UNREGISTERED if registering +fails. + +This is the rootcause for couple of memory leaks reported by Syzkaller: + +BUG: memory leak unreferenced object 0xffff8880675ca008 (size 256): + comm "netdev_register", pid 281, jiffies 4294696663 (age 6.808s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [<0000000058ca4711>] kmem_cache_alloc_trace+0x167/0x280 + [<000000002340019b>] device_add+0x882/0x1750 + [<000000001d588c3a>] netdev_register_kobject+0x128/0x380 + [<0000000011ef5535>] register_netdevice+0xa1b/0xf00 + [<000000007fcf1c99>] __tun_chr_ioctl+0x20d5/0x3dd0 + [<000000006a5b7b2b>] tun_chr_ioctl+0x2f/0x40 + [<00000000f30f834a>] do_vfs_ioctl+0x1c7/0x1510 + [<00000000fba062ea>] ksys_ioctl+0x99/0xb0 + [<00000000b1c1b8d2>] __x64_sys_ioctl+0x78/0xb0 + [<00000000984cabb9>] do_syscall_64+0x16f/0x580 + [<000000000bde033d>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + [<00000000e6ca2d9f>] 0xffffffffffffffff + +BUG: memory leak +unreferenced object 0xffff8880668ba588 (size 8): + comm "kobject_set_nam", pid 286, jiffies 4294725297 (age 9.871s) + hex dump (first 8 bytes): + 6e 72 30 00 cc be df 2b nr0....+ + backtrace: + [<00000000a322332a>] __kmalloc_track_caller+0x16e/0x290 + [<00000000236fd26b>] kstrdup+0x3e/0x70 + [<00000000dd4a2815>] kstrdup_const+0x3e/0x50 + [<0000000049a377fc>] kvasprintf_const+0x10e/0x160 + [<00000000627fc711>] kobject_set_name_vargs+0x5b/0x140 + [<0000000019eeab06>] dev_set_name+0xc0/0xf0 + [<0000000069cb12bc>] netdev_register_kobject+0xc8/0x320 + [<00000000f2e83732>] register_netdevice+0xa1b/0xf00 + [<000000009e1f57cc>] __tun_chr_ioctl+0x20d5/0x3dd0 + [<000000009c560784>] tun_chr_ioctl+0x2f/0x40 + [<000000000d759e02>] do_vfs_ioctl+0x1c7/0x1510 + [<00000000351d7c31>] ksys_ioctl+0x99/0xb0 + [<000000008390040a>] __x64_sys_ioctl+0x78/0xb0 + [<0000000052d196b7>] do_syscall_64+0x16f/0x580 + [<0000000019af9236>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + [<00000000bc384531>] 0xffffffffffffffff + +v3 -> v4: + Set reg_state to NETREG_UNREGISTERED if registering fails + +v2 -> v3: +* Replaced BUG_ON with WARN_ON in free_netdev and netdev_release + +v1 -> v2: +* Relying on driver calling free_netdev rather than calling + put_device directly in error path + +Reported-by: syzbot+ad8ca40ecd77896d51e2@syzkaller.appspotmail.com +Cc: David Miller +Cc: Greg Kroah-Hartman +Cc: Lukas Bulwahn +Signed-off-by: Jouni Hogander +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -9082,8 +9082,10 @@ int register_netdevice(struct net_device + goto err_uninit; + + ret = netdev_register_kobject(dev); +- if (ret) ++ if (ret) { ++ dev->reg_state = NETREG_UNREGISTERED; + goto err_uninit; ++ } + dev->reg_state = NETREG_REGISTERED; + + __netdev_update_features(dev); diff --git a/queue-5.4/net-usb-lan78xx-add-.ndo_features_check.patch b/queue-5.4/net-usb-lan78xx-add-.ndo_features_check.patch new file mode 100644 index 00000000000..5f74b43a05a --- /dev/null +++ b/queue-5.4/net-usb-lan78xx-add-.ndo_features_check.patch @@ -0,0 +1,62 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: James Hughes +Date: Mon, 20 Jan 2020 11:12:40 +0000 +Subject: net: usb: lan78xx: Add .ndo_features_check + +From: James Hughes + +[ Upstream commit ce896476c65d72b4b99fa09c2f33436b4198f034 ] + +As reported by Eric Dumazet, there are still some outstanding +cases where the driver does not handle TSO correctly when skb's +are over a certain size. Most cases have been fixed, this patch +should ensure that forwarded SKB's that are greater than +MAX_SINGLE_PACKET_SIZE - TX_OVERHEAD are software segmented +and handled correctly. + +Signed-off-by: James Hughes +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/lan78xx.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -3668,6 +3669,19 @@ static void lan78xx_tx_timeout(struct ne + tasklet_schedule(&dev->bh); + } + ++static netdev_features_t lan78xx_features_check(struct sk_buff *skb, ++ struct net_device *netdev, ++ netdev_features_t features) ++{ ++ if (skb->len + TX_OVERHEAD > MAX_SINGLE_PACKET_SIZE) ++ features &= ~NETIF_F_GSO_MASK; ++ ++ features = vlan_features_check(skb, features); ++ features = vxlan_features_check(skb, features); ++ ++ return features; ++} ++ + static const struct net_device_ops lan78xx_netdev_ops = { + .ndo_open = lan78xx_open, + .ndo_stop = lan78xx_stop, +@@ -3681,6 +3695,7 @@ static const struct net_device_ops lan78 + .ndo_set_features = lan78xx_set_features, + .ndo_vlan_rx_add_vid = lan78xx_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = lan78xx_vlan_rx_kill_vid, ++ .ndo_features_check = lan78xx_features_check, + }; + + static void lan78xx_stat_monitor(struct timer_list *t) diff --git a/queue-5.4/net_sched-fix-datalen-for-ematch.patch b/queue-5.4/net_sched-fix-datalen-for-ematch.patch new file mode 100644 index 00000000000..5c61c044e81 --- /dev/null +++ b/queue-5.4/net_sched-fix-datalen-for-ematch.patch @@ -0,0 +1,47 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Cong Wang +Date: Wed, 22 Jan 2020 15:42:02 -0800 +Subject: net_sched: fix datalen for ematch + +From: Cong Wang + +[ Upstream commit 61678d28d4a45ef376f5d02a839cc37509ae9281 ] + +syzbot reported an out-of-bound access in em_nbyte. As initially +analyzed by Eric, this is because em_nbyte sets its own em->datalen +in em_nbyte_change() other than the one specified by user, but this +value gets overwritten later by its caller tcf_em_validate(). +We should leave em->datalen untouched to respect their choices. + +I audit all the in-tree ematch users, all of those implement +->change() set em->datalen, so we can just avoid setting it twice +in this case. + +Reported-and-tested-by: syzbot+5af9a90dad568aa9f611@syzkaller.appspotmail.com +Reported-by: syzbot+2f07903a5b05e7f36410@syzkaller.appspotmail.com +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Cc: Eric Dumazet +Signed-off-by: Cong Wang +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/ematch.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sched/ematch.c ++++ b/net/sched/ematch.c +@@ -263,12 +263,12 @@ static int tcf_em_validate(struct tcf_pr + } + em->data = (unsigned long) v; + } ++ em->datalen = data_len; + } + } + + em->matchid = em_hdr->matchid; + em->flags = em_hdr->flags; +- em->datalen = data_len; + em->net = net; + + err = 0; diff --git a/queue-5.4/net_sched-use-validated-tca_kind-attribute-in-tc_new_tfilter.patch b/queue-5.4/net_sched-use-validated-tca_kind-attribute-in-tc_new_tfilter.patch new file mode 100644 index 00000000000..65d051fe3cb --- /dev/null +++ b/queue-5.4/net_sched-use-validated-tca_kind-attribute-in-tc_new_tfilter.patch @@ -0,0 +1,103 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Eric Dumazet +Date: Tue, 21 Jan 2020 11:02:20 -0800 +Subject: net_sched: use validated TCA_KIND attribute in tc_new_tfilter() + +From: Eric Dumazet + +[ Upstream commit 36d79af7fb59d6d9106feb9c1855eb93d6d53fe6 ] + +sysbot found another issue in tc_new_tfilter(). +We probably should use @name which contains the sanitized +version of TCA_KIND. + +BUG: KMSAN: uninit-value in string_nocheck lib/vsprintf.c:608 [inline] +BUG: KMSAN: uninit-value in string+0x522/0x690 lib/vsprintf.c:689 +CPU: 1 PID: 10753 Comm: syz-executor.1 Not tainted 5.5.0-rc5-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c9/0x220 lib/dump_stack.c:118 + kmsan_report+0xf7/0x1e0 mm/kmsan/kmsan_report.c:118 + __msan_warning+0x58/0xa0 mm/kmsan/kmsan_instr.c:215 + string_nocheck lib/vsprintf.c:608 [inline] + string+0x522/0x690 lib/vsprintf.c:689 + vsnprintf+0x207d/0x31b0 lib/vsprintf.c:2574 + __request_module+0x2ad/0x11c0 kernel/kmod.c:143 + tcf_proto_lookup_ops+0x241/0x720 net/sched/cls_api.c:139 + tcf_proto_create net/sched/cls_api.c:262 [inline] + tc_new_tfilter+0x2a4e/0x5010 net/sched/cls_api.c:2058 + rtnetlink_rcv_msg+0xcb7/0x1570 net/core/rtnetlink.c:5415 + netlink_rcv_skb+0x451/0x650 net/netlink/af_netlink.c:2477 + rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:5442 + netlink_unicast_kernel net/netlink/af_netlink.c:1302 [inline] + netlink_unicast+0xf9e/0x1100 net/netlink/af_netlink.c:1328 + netlink_sendmsg+0x1248/0x14d0 net/netlink/af_netlink.c:1917 + sock_sendmsg_nosec net/socket.c:639 [inline] + sock_sendmsg net/socket.c:659 [inline] + ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 + ___sys_sendmsg net/socket.c:2384 [inline] + __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 + __do_sys_sendmsg net/socket.c:2426 [inline] + __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 + __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 + do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 +RIP: 0033:0x45b349 +Code: ad b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b6 fb ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007f88b3948c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007f88b39496d4 RCX: 000000000045b349 +RDX: 0000000000000000 RSI: 00000000200001c0 RDI: 0000000000000003 +RBP: 000000000075bfc8 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 000000000000099f R14: 00000000004cb163 R15: 000000000075bfd4 + +Uninit was created at: + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:144 [inline] + kmsan_internal_poison_shadow+0x66/0xd0 mm/kmsan/kmsan.c:127 + kmsan_slab_alloc+0x8a/0xe0 mm/kmsan/kmsan_hooks.c:82 + slab_alloc_node mm/slub.c:2774 [inline] + __kmalloc_node_track_caller+0xb40/0x1200 mm/slub.c:4382 + __kmalloc_reserve net/core/skbuff.c:141 [inline] + __alloc_skb+0x2fd/0xac0 net/core/skbuff.c:209 + alloc_skb include/linux/skbuff.h:1049 [inline] + netlink_alloc_large_skb net/netlink/af_netlink.c:1174 [inline] + netlink_sendmsg+0x7d3/0x14d0 net/netlink/af_netlink.c:1892 + sock_sendmsg_nosec net/socket.c:639 [inline] + sock_sendmsg net/socket.c:659 [inline] + ____sys_sendmsg+0x12b6/0x1350 net/socket.c:2330 + ___sys_sendmsg net/socket.c:2384 [inline] + __sys_sendmsg+0x451/0x5f0 net/socket.c:2417 + __do_sys_sendmsg net/socket.c:2426 [inline] + __se_sys_sendmsg+0x97/0xb0 net/socket.c:2424 + __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2424 + do_syscall_64+0xb8/0x160 arch/x86/entry/common.c:296 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +Fixes: 6f96c3c6904c ("net_sched: fix backward compatibility for TCA_KIND") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Cong Wang +Cc: Marcelo Ricardo Leitner +Cc: Jamal Hadi Salim +Cc: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_api.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -2055,9 +2055,8 @@ replay: + &chain_info)); + + mutex_unlock(&chain->filter_chain_lock); +- tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]), +- protocol, prio, chain, rtnl_held, +- extack); ++ tp_new = tcf_proto_create(name, protocol, prio, chain, ++ rtnl_held, extack); + if (IS_ERR(tp_new)) { + err = PTR_ERR(tp_new); + goto errout_tp; diff --git a/queue-5.4/revert-udp-do-rmem-bulk-free-even-if-the-rx-sk-queue-is-empty.patch b/queue-5.4/revert-udp-do-rmem-bulk-free-even-if-the-rx-sk-queue-is-empty.patch new file mode 100644 index 00000000000..0c9fceaf84a --- /dev/null +++ b/queue-5.4/revert-udp-do-rmem-bulk-free-even-if-the-rx-sk-queue-is-empty.patch @@ -0,0 +1,41 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Paolo Abeni +Date: Tue, 21 Jan 2020 16:50:49 +0100 +Subject: Revert "udp: do rmem bulk free even if the rx sk queue is empty" + +From: Paolo Abeni + +[ Upstream commit d39ca2590d10712f412add7a88e1dd467a7246f4 ] + +This reverts commit 0d4a6608f68c7532dcbfec2ea1150c9761767d03. + +Willem reported that after commit 0d4a6608f68c ("udp: do rmem bulk +free even if the rx sk queue is empty") the memory allocated by +an almost idle system with many UDP sockets can grow a lot. + +For stable kernel keep the solution as simple as possible and revert +the offending commit. + +Reported-by: Willem de Bruijn +Diagnosed-by: Eric Dumazet +Fixes: 0d4a6608f68c ("udp: do rmem bulk free even if the rx sk queue is empty") +Signed-off-by: Paolo Abeni +Acked-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -1368,7 +1368,8 @@ static void udp_rmem_release(struct sock + if (likely(partial)) { + up->forward_deficit += size; + size = up->forward_deficit; +- if (size < (sk->sk_rcvbuf >> 2)) ++ if (size < (sk->sk_rcvbuf >> 2) && ++ !skb_queue_empty(&up->reader_queue)) + return; + } else { + size += up->forward_deficit; diff --git a/queue-5.4/series b/queue-5.4/series new file mode 100644 index 00000000000..4363ab719c2 --- /dev/null +++ b/queue-5.4/series @@ -0,0 +1,32 @@ +can-slip-protect-tty-disc_data-in-write_wakeup-and-close-with-rcu.patch +firestream-fix-memory-leaks.patch +gtp-make-sure-only-sock_dgram-udp-sockets-are-accepted.patch +ipv6-sr-remove-skb_gso_ipxip6-on-end.d-actions.patch +net-bcmgenet-use-netif_tx_napi_add-for-tx-napi.patch +net-cxgb3_main-add-cap_net_admin-check-to-chelsio_get_mem.patch +net-ip6_gre-fix-moving-ip6gre-between-namespaces.patch +net-ip6_tunnel-fix-namespaces-move.patch +net-ip_tunnel-fix-namespaces-move.patch +net-rtnetlink-validate-ifla_mtu-attribute-in-rtnl_create_link.patch +net_sched-fix-datalen-for-ematch.patch +net_sched-use-validated-tca_kind-attribute-in-tc_new_tfilter.patch +net-sysfs-fix-reference-count-leak.patch +net-usb-lan78xx-add-.ndo_features_check.patch +revert-udp-do-rmem-bulk-free-even-if-the-rx-sk-queue-is-empty.patch +tcp_bbr-improve-arithmetic-division-in-bbr_update_bw.patch +tcp-do-not-leave-dangling-pointers-in-tp-highest_sack.patch +tun-add-mutex_unlock-call-and-napi.skb-clearing-in-tun_get_user.patch +airo-fix-possible-info-leak-in-airooldioctl-siocdevprivate.patch +airo-add-missing-cap_net_admin-check-in-airooldioctl-siocdevprivate.patch +mlxsw-spectrum_acl-fix-use-after-free-during-reload.patch +fou-fix-ipv6-netlink-policy.patch +net-fix-packet-reordering-caused-by-gro-and-listified-rx-cooperation.patch +net-mlx5-fix-lowest-fdb-pool-size.patch +net-mlx5-update-the-list-of-the-pci-supported-devices.patch +net-mlx5-dr-enable-counter-on-non-fwd-dest-objects.patch +net-mlx5-e-switch-prevent-ingress-rate-configuration-of-uplink-rep.patch +net-mlx5-dr-use-non-preemptible-call-to-get-the-current-cpu-number.patch +net-mlx5e-ktls-fix-corner-case-checks-in-tx-resync-flow.patch +net-mlx5e-ktls-remove-redundant-posts-in-tx-resync-flow.patch +net-mlx5e-ktls-do-not-send-decrypted-marked-skbs-via-non-accel-path.patch +ipv4-detect-rollover-in-specific-fib-table-dump.patch diff --git a/queue-5.4/tcp-do-not-leave-dangling-pointers-in-tp-highest_sack.patch b/queue-5.4/tcp-do-not-leave-dangling-pointers-in-tp-highest_sack.patch new file mode 100644 index 00000000000..82b936da010 --- /dev/null +++ b/queue-5.4/tcp-do-not-leave-dangling-pointers-in-tp-highest_sack.patch @@ -0,0 +1,189 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Eric Dumazet +Date: Wed, 22 Jan 2020 21:03:00 -0800 +Subject: tcp: do not leave dangling pointers in tp->highest_sack + +From: Eric Dumazet + +[ Upstream commit 2bec445f9bf35e52e395b971df48d3e1e5dc704a ] + +Latest commit 853697504de0 ("tcp: Fix highest_sack and highest_sack_seq") +apparently allowed syzbot to trigger various crashes in TCP stack [1] + +I believe this commit only made things easier for syzbot to find +its way into triggering use-after-frees. But really the bugs +could lead to bad TCP behavior or even plain crashes even for +non malicious peers. + +I have audited all calls to tcp_rtx_queue_unlink() and +tcp_rtx_queue_unlink_and_free() and made sure tp->highest_sack would be updated +if we are removing from rtx queue the skb that tp->highest_sack points to. + +These updates were missing in three locations : + +1) tcp_clean_rtx_queue() [This one seems quite serious, + I have no idea why this was not caught earlier] + +2) tcp_rtx_queue_purge() [Probably not a big deal for normal operations] + +3) tcp_send_synack() [Probably not a big deal for normal operations] + +[1] +BUG: KASAN: use-after-free in tcp_highest_sack_seq include/net/tcp.h:1864 [inline] +BUG: KASAN: use-after-free in tcp_highest_sack_seq include/net/tcp.h:1856 [inline] +BUG: KASAN: use-after-free in tcp_check_sack_reordering+0x33c/0x3a0 net/ipv4/tcp_input.c:891 +Read of size 4 at addr ffff8880a488d068 by task ksoftirqd/1/16 + +CPU: 1 PID: 16 Comm: ksoftirqd/1 Not tainted 5.5.0-rc5-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x197/0x210 lib/dump_stack.c:118 + print_address_description.constprop.0.cold+0xd4/0x30b mm/kasan/report.c:374 + __kasan_report.cold+0x1b/0x41 mm/kasan/report.c:506 + kasan_report+0x12/0x20 mm/kasan/common.c:639 + __asan_report_load4_noabort+0x14/0x20 mm/kasan/generic_report.c:134 + tcp_highest_sack_seq include/net/tcp.h:1864 [inline] + tcp_highest_sack_seq include/net/tcp.h:1856 [inline] + tcp_check_sack_reordering+0x33c/0x3a0 net/ipv4/tcp_input.c:891 + tcp_try_undo_partial net/ipv4/tcp_input.c:2730 [inline] + tcp_fastretrans_alert+0xf74/0x23f0 net/ipv4/tcp_input.c:2847 + tcp_ack+0x2577/0x5bf0 net/ipv4/tcp_input.c:3710 + tcp_rcv_established+0x6dd/0x1e90 net/ipv4/tcp_input.c:5706 + tcp_v4_do_rcv+0x619/0x8d0 net/ipv4/tcp_ipv4.c:1619 + tcp_v4_rcv+0x307f/0x3b40 net/ipv4/tcp_ipv4.c:2001 + ip_protocol_deliver_rcu+0x5a/0x880 net/ipv4/ip_input.c:204 + ip_local_deliver_finish+0x23b/0x380 net/ipv4/ip_input.c:231 + NF_HOOK include/linux/netfilter.h:307 [inline] + NF_HOOK include/linux/netfilter.h:301 [inline] + ip_local_deliver+0x1e9/0x520 net/ipv4/ip_input.c:252 + dst_input include/net/dst.h:442 [inline] + ip_rcv_finish+0x1db/0x2f0 net/ipv4/ip_input.c:428 + NF_HOOK include/linux/netfilter.h:307 [inline] + NF_HOOK include/linux/netfilter.h:301 [inline] + ip_rcv+0xe8/0x3f0 net/ipv4/ip_input.c:538 + __netif_receive_skb_one_core+0x113/0x1a0 net/core/dev.c:5148 + __netif_receive_skb+0x2c/0x1d0 net/core/dev.c:5262 + process_backlog+0x206/0x750 net/core/dev.c:6093 + napi_poll net/core/dev.c:6530 [inline] + net_rx_action+0x508/0x1120 net/core/dev.c:6598 + __do_softirq+0x262/0x98c kernel/softirq.c:292 + run_ksoftirqd kernel/softirq.c:603 [inline] + run_ksoftirqd+0x8e/0x110 kernel/softirq.c:595 + smpboot_thread_fn+0x6a3/0xa40 kernel/smpboot.c:165 + kthread+0x361/0x430 kernel/kthread.c:255 + ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352 + +Allocated by task 10091: + save_stack+0x23/0x90 mm/kasan/common.c:72 + set_track mm/kasan/common.c:80 [inline] + __kasan_kmalloc mm/kasan/common.c:513 [inline] + __kasan_kmalloc.constprop.0+0xcf/0xe0 mm/kasan/common.c:486 + kasan_slab_alloc+0xf/0x20 mm/kasan/common.c:521 + slab_post_alloc_hook mm/slab.h:584 [inline] + slab_alloc_node mm/slab.c:3263 [inline] + kmem_cache_alloc_node+0x138/0x740 mm/slab.c:3575 + __alloc_skb+0xd5/0x5e0 net/core/skbuff.c:198 + alloc_skb_fclone include/linux/skbuff.h:1099 [inline] + sk_stream_alloc_skb net/ipv4/tcp.c:875 [inline] + sk_stream_alloc_skb+0x113/0xc90 net/ipv4/tcp.c:852 + tcp_sendmsg_locked+0xcf9/0x3470 net/ipv4/tcp.c:1282 + tcp_sendmsg+0x30/0x50 net/ipv4/tcp.c:1432 + inet_sendmsg+0x9e/0xe0 net/ipv4/af_inet.c:807 + sock_sendmsg_nosec net/socket.c:652 [inline] + sock_sendmsg+0xd7/0x130 net/socket.c:672 + __sys_sendto+0x262/0x380 net/socket.c:1998 + __do_sys_sendto net/socket.c:2010 [inline] + __se_sys_sendto net/socket.c:2006 [inline] + __x64_sys_sendto+0xe1/0x1a0 net/socket.c:2006 + do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +Freed by task 10095: + save_stack+0x23/0x90 mm/kasan/common.c:72 + set_track mm/kasan/common.c:80 [inline] + kasan_set_free_info mm/kasan/common.c:335 [inline] + __kasan_slab_free+0x102/0x150 mm/kasan/common.c:474 + kasan_slab_free+0xe/0x10 mm/kasan/common.c:483 + __cache_free mm/slab.c:3426 [inline] + kmem_cache_free+0x86/0x320 mm/slab.c:3694 + kfree_skbmem+0x178/0x1c0 net/core/skbuff.c:645 + __kfree_skb+0x1e/0x30 net/core/skbuff.c:681 + sk_eat_skb include/net/sock.h:2453 [inline] + tcp_recvmsg+0x1252/0x2930 net/ipv4/tcp.c:2166 + inet_recvmsg+0x136/0x610 net/ipv4/af_inet.c:838 + sock_recvmsg_nosec net/socket.c:886 [inline] + sock_recvmsg net/socket.c:904 [inline] + sock_recvmsg+0xce/0x110 net/socket.c:900 + __sys_recvfrom+0x1ff/0x350 net/socket.c:2055 + __do_sys_recvfrom net/socket.c:2073 [inline] + __se_sys_recvfrom net/socket.c:2069 [inline] + __x64_sys_recvfrom+0xe1/0x1a0 net/socket.c:2069 + do_syscall_64+0xfa/0x790 arch/x86/entry/common.c:294 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +The buggy address belongs to the object at ffff8880a488d040 + which belongs to the cache skbuff_fclone_cache of size 456 +The buggy address is located 40 bytes inside of + 456-byte region [ffff8880a488d040, ffff8880a488d208) +The buggy address belongs to the page: +page:ffffea0002922340 refcount:1 mapcount:0 mapping:ffff88821b057000 index:0x0 +raw: 00fffe0000000200 ffffea00022a5788 ffffea0002624a48 ffff88821b057000 +raw: 0000000000000000 ffff8880a488d040 0000000100000006 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff8880a488cf00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc + ffff8880a488cf80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc +>ffff8880a488d000: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb + ^ + ffff8880a488d080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8880a488d100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + +Fixes: 853697504de0 ("tcp: Fix highest_sack and highest_sack_seq") +Fixes: 50895b9de1d3 ("tcp: highest_sack fix") +Fixes: 737ff314563c ("tcp: use sequence distance to detect reordering") +Signed-off-by: Eric Dumazet +Cc: Cambda Zhu +Cc: Yuchung Cheng +Cc: Neal Cardwell +Acked-by: Neal Cardwell +Acked-by: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 1 + + net/ipv4/tcp_input.c | 1 + + net/ipv4/tcp_output.c | 1 + + 3 files changed, 3 insertions(+) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2520,6 +2520,7 @@ static void tcp_rtx_queue_purge(struct s + { + struct rb_node *p = rb_first(&sk->tcp_rtx_queue); + ++ tcp_sk(sk)->highest_sack = NULL; + while (p) { + struct sk_buff *skb = rb_to_skb(p); + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -3164,6 +3164,7 @@ static int tcp_clean_rtx_queue(struct so + tp->retransmit_skb_hint = NULL; + if (unlikely(skb == tp->lost_skb_hint)) + tp->lost_skb_hint = NULL; ++ tcp_highest_sack_replace(sk, skb, next); + tcp_rtx_queue_unlink_and_free(skb, sk); + } + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -3231,6 +3231,7 @@ int tcp_send_synack(struct sock *sk) + if (!nskb) + return -ENOMEM; + INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor); ++ tcp_highest_sack_replace(sk, skb, nskb); + tcp_rtx_queue_unlink_and_free(skb, sk); + __skb_header_release(nskb); + tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb); diff --git a/queue-5.4/tcp_bbr-improve-arithmetic-division-in-bbr_update_bw.patch b/queue-5.4/tcp_bbr-improve-arithmetic-division-in-bbr_update_bw.patch new file mode 100644 index 00000000000..b816e2aaaf6 --- /dev/null +++ b/queue-5.4/tcp_bbr-improve-arithmetic-division-in-bbr_update_bw.patch @@ -0,0 +1,39 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Wen Yang +Date: Mon, 20 Jan 2020 18:04:56 +0800 +Subject: tcp_bbr: improve arithmetic division in bbr_update_bw() + +From: Wen Yang + +[ Upstream commit 5b2f1f3070b6447b76174ea8bfb7390dc6253ebd ] + +do_div() does a 64-by-32 division. Use div64_long() instead of it +if the divisor is long, to avoid truncation to 32-bit. +And as a nice side effect also cleans up the function a bit. + +Signed-off-by: Wen Yang +Cc: Eric Dumazet +Cc: "David S. Miller" +Cc: Alexey Kuznetsov +Cc: Hideaki YOSHIFUJI +Cc: netdev@vger.kernel.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_bbr.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/ipv4/tcp_bbr.c ++++ b/net/ipv4/tcp_bbr.c +@@ -779,8 +779,7 @@ static void bbr_update_bw(struct sock *s + * bandwidth sample. Delivered is in packets and interval_us in uS and + * ratio will be <<1 for most connections. So delivered is first scaled. + */ +- bw = (u64)rs->delivered * BW_UNIT; +- do_div(bw, rs->interval_us); ++ bw = div64_long((u64)rs->delivered * BW_UNIT, rs->interval_us); + + /* If this sample is application-limited, it is likely to have a very + * low delivered count that represents application behavior rather than diff --git a/queue-5.4/tun-add-mutex_unlock-call-and-napi.skb-clearing-in-tun_get_user.patch b/queue-5.4/tun-add-mutex_unlock-call-and-napi.skb-clearing-in-tun_get_user.patch new file mode 100644 index 00000000000..acc1924de1e --- /dev/null +++ b/queue-5.4/tun-add-mutex_unlock-call-and-napi.skb-clearing-in-tun_get_user.patch @@ -0,0 +1,44 @@ +From foo@baz Mon 27 Jan 2020 02:32:20 PM CET +From: Eric Dumazet +Date: Wed, 22 Jan 2020 09:07:35 -0800 +Subject: tun: add mutex_unlock() call and napi.skb clearing in tun_get_user() + +From: Eric Dumazet + +[ Upstream commit 1efba987c48629c0c64703bb4ea76ca1a3771d17 ] + +If both IFF_NAPI_FRAGS mode and XDP are enabled, and the XDP program +consumes the skb, we need to clear the napi.skb (or risk +a use-after-free) and release the mutex (or risk a deadlock) + +WARNING: lock held when returning to user space! +5.5.0-rc6-syzkaller #0 Not tainted +------------------------------------------------ +syz-executor.0/455 is leaving the kernel with locks still held! +1 lock held by syz-executor.0/455: + #0: ffff888098f6e748 (&tfile->napi_mutex){+.+.}, at: tun_get_user+0x1604/0x3fc0 drivers/net/tun.c:1835 + +Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Petar Penkov +Cc: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1936,6 +1936,10 @@ drop: + if (ret != XDP_PASS) { + rcu_read_unlock(); + local_bh_enable(); ++ if (frags) { ++ tfile->napi.skb = NULL; ++ mutex_unlock(&tfile->napi_mutex); ++ } + return total_len; + } + }