From: Greg Kroah-Hartman Date: Sun, 24 Feb 2019 07:39:33 +0000 (+0100) Subject: 4.20-stable patches X-Git-Tag: v4.9.161~35 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=39f8988c708b92944fb49203b0496076e2da7739;p=thirdparty%2Fkernel%2Fstable-queue.git 4.20-stable patches added patches: batman-adv-fix-uninit-value-in-batadv_interface_tx.patch geneve-should-not-call-rt6_lookup-when-ipv6-was-disabled.patch inet_diag-fix-reporting-cgroup-classid-and-fallback-to-priority.patch ipv6-propagate-genlmsg_reply-return-code.patch net-dsa-fix-npd-checking-for-br_vlan_enabled.patch net-ena-fix-race-between-link-up-and-device-initalization.patch net-mlx4_en-force-checksum_none-for-short-ethernet-frames.patch net-mlx5e-don-t-overwrite-pedit-action-when-multiple-pedit-used.patch net-mlx5e-fpga-fix-innova-ipsec-tx-offload-data-path-performance.patch net-mlx5e-xdp-fix-redirect-resources-availability-check.patch net-packet-fix-4gb-buffer-limit-due-to-overflow-check.patch net-sfp-do-not-probe-sfp-module-before-we-re-attached.patch net-socket-fix-siocgifname-in-compat.patch net-socket-make-bond-ioctls-go-through-compat_ifreq_ioctl.patch net_sched-fix-a-memory-leak-in-cls_tcindex.patch net_sched-fix-a-race-condition-in-tcindex_destroy.patch net_sched-fix-two-more-memory-leaks-in-cls_tcindex.patch revert-kill-dev_ifsioc.patch revert-socket-fix-struct-ifreq-size-in-compat-ioctl.patch sctp-call-gso_reset_checksum-when-computing-checksum-in-sctp_gso_segment.patch sctp-set-stream-ext-to-null-after-freeing-it-in-sctp_stream_outq_migrate.patch sit-check-if-ipv6-enabled-before-calling-ip6_err_gen_icmpv6_unreach.patch team-avoid-complex-list-operations-in-team_nl_cmd_options_set.patch --- diff --git a/queue-4.20/batman-adv-fix-uninit-value-in-batadv_interface_tx.patch b/queue-4.20/batman-adv-fix-uninit-value-in-batadv_interface_tx.patch new file mode 100644 index 00000000000..897cc09b995 --- /dev/null +++ b/queue-4.20/batman-adv-fix-uninit-value-in-batadv_interface_tx.patch @@ -0,0 +1,99 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Eric Dumazet +Date: Mon, 11 Feb 2019 14:41:22 -0800 +Subject: batman-adv: fix uninit-value in batadv_interface_tx() + +From: Eric Dumazet + +[ Upstream commit 4ffcbfac60642f63ae3d80891f573ba7e94a265c ] + +KMSAN reported batadv_interface_tx() was possibly using a +garbage value [1] + +batadv_get_vid() does have a pskb_may_pull() call +but batadv_interface_tx() does not actually make sure +this did not fail. + +[1] +BUG: KMSAN: uninit-value in batadv_interface_tx+0x908/0x1e40 net/batman-adv/soft-interface.c:231 +CPU: 0 PID: 10006 Comm: syz-executor469 Not tainted 4.20.0-rc7+ #5 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x173/0x1d0 lib/dump_stack.c:113 + kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:613 + __msan_warning+0x82/0xf0 mm/kmsan/kmsan_instr.c:313 + batadv_interface_tx+0x908/0x1e40 net/batman-adv/soft-interface.c:231 + __netdev_start_xmit include/linux/netdevice.h:4356 [inline] + netdev_start_xmit include/linux/netdevice.h:4365 [inline] + xmit_one net/core/dev.c:3257 [inline] + dev_hard_start_xmit+0x607/0xc40 net/core/dev.c:3273 + __dev_queue_xmit+0x2e42/0x3bc0 net/core/dev.c:3843 + dev_queue_xmit+0x4b/0x60 net/core/dev.c:3876 + packet_snd net/packet/af_packet.c:2928 [inline] + packet_sendmsg+0x8306/0x8f30 net/packet/af_packet.c:2953 + sock_sendmsg_nosec net/socket.c:621 [inline] + sock_sendmsg net/socket.c:631 [inline] + __sys_sendto+0x8c4/0xac0 net/socket.c:1788 + __do_sys_sendto net/socket.c:1800 [inline] + __se_sys_sendto+0x107/0x130 net/socket.c:1796 + __x64_sys_sendto+0x6e/0x90 net/socket.c:1796 + do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 + entry_SYSCALL_64_after_hwframe+0x63/0xe7 +RIP: 0033:0x441889 +Code: 18 89 d0 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 bb 10 fc ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007ffdda6fd468 EFLAGS: 00000216 ORIG_RAX: 000000000000002c +RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 0000000000441889 +RDX: 000000000000000e RSI: 00000000200000c0 RDI: 0000000000000003 +RBP: 0000000000000003 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000216 R12: 00007ffdda6fd4c0 +R13: 00007ffdda6fd4b0 R14: 0000000000000000 R15: 0000000000000000 + +Uninit was created at: + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:204 [inline] + kmsan_internal_poison_shadow+0x92/0x150 mm/kmsan/kmsan.c:158 + kmsan_kmalloc+0xa6/0x130 mm/kmsan/kmsan_hooks.c:176 + kmsan_slab_alloc+0xe/0x10 mm/kmsan/kmsan_hooks.c:185 + slab_post_alloc_hook mm/slab.h:446 [inline] + slab_alloc_node mm/slub.c:2759 [inline] + __kmalloc_node_track_caller+0xe18/0x1030 mm/slub.c:4383 + __kmalloc_reserve net/core/skbuff.c:137 [inline] + __alloc_skb+0x309/0xa20 net/core/skbuff.c:205 + alloc_skb include/linux/skbuff.h:998 [inline] + alloc_skb_with_frags+0x1c7/0xac0 net/core/skbuff.c:5220 + sock_alloc_send_pskb+0xafd/0x10e0 net/core/sock.c:2083 + packet_alloc_skb net/packet/af_packet.c:2781 [inline] + packet_snd net/packet/af_packet.c:2872 [inline] + packet_sendmsg+0x661a/0x8f30 net/packet/af_packet.c:2953 + sock_sendmsg_nosec net/socket.c:621 [inline] + sock_sendmsg net/socket.c:631 [inline] + __sys_sendto+0x8c4/0xac0 net/socket.c:1788 + __do_sys_sendto net/socket.c:1800 [inline] + __se_sys_sendto+0x107/0x130 net/socket.c:1796 + __x64_sys_sendto+0x6e/0x90 net/socket.c:1796 + do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 + entry_SYSCALL_64_after_hwframe+0x63/0xe7 + +Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Marek Lindner +Cc: Simon Wunderlich +Cc: Antonio Quartulli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/batman-adv/soft-interface.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/batman-adv/soft-interface.c ++++ b/net/batman-adv/soft-interface.c +@@ -227,6 +227,8 @@ static netdev_tx_t batadv_interface_tx(s + + switch (ntohs(ethhdr->h_proto)) { + case ETH_P_8021Q: ++ if (!pskb_may_pull(skb, sizeof(*vhdr))) ++ goto dropped; + vhdr = vlan_eth_hdr(skb); + + /* drop batman-in-batman packets to prevent loops */ diff --git a/queue-4.20/geneve-should-not-call-rt6_lookup-when-ipv6-was-disabled.patch b/queue-4.20/geneve-should-not-call-rt6_lookup-when-ipv6-was-disabled.patch new file mode 100644 index 00000000000..842d9550b97 --- /dev/null +++ b/queue-4.20/geneve-should-not-call-rt6_lookup-when-ipv6-was-disabled.patch @@ -0,0 +1,50 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Hangbin Liu +Date: Thu, 7 Feb 2019 18:36:10 +0800 +Subject: geneve: should not call rt6_lookup() when ipv6 was disabled + +From: Hangbin Liu + +[ Upstream commit c0a47e44c0980b3b23ee31fa7936d70ea5dce491 ] + +When we add a new GENEVE device with IPv6 remote, checking only for +IS_ENABLED(CONFIG_IPV6) is not enough as we may disable IPv6 in the +kernel command line (ipv6.disable=1), and calling rt6_lookup() would +cause a NULL pointer dereference. + +v2: +- don't mix declarations and code (reported by Stefano Brivio, Eric Dumazet) +- there's no need to use in6_dev_get() as we only need to check that + idev exists (reported by David Ahern). This is under RTNL, so we can + simply use __in6_dev_get() instead (Stefano, Eric). + +Reported-by: Jianlin Shi +Fixes: c40e89fd358e9 ("geneve: configure MTU based on a lower device") +Cc: Alexey Kodanev +Signed-off-by: Hangbin Liu +Reviewed-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/geneve.c | 10 +++++++--- + 1 file changed, 7 insertions(+), 3 deletions(-) + +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -1426,9 +1426,13 @@ static void geneve_link_config(struct ne + } + #if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: { +- struct rt6_info *rt = rt6_lookup(geneve->net, +- &info->key.u.ipv6.dst, NULL, 0, +- NULL, 0); ++ struct rt6_info *rt; ++ ++ if (!__in6_dev_get(dev)) ++ break; ++ ++ rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0, ++ NULL, 0); + + if (rt && rt->dst.dev) + ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN; diff --git a/queue-4.20/inet_diag-fix-reporting-cgroup-classid-and-fallback-to-priority.patch b/queue-4.20/inet_diag-fix-reporting-cgroup-classid-and-fallback-to-priority.patch new file mode 100644 index 00000000000..d70c97e5575 --- /dev/null +++ b/queue-4.20/inet_diag-fix-reporting-cgroup-classid-and-fallback-to-priority.patch @@ -0,0 +1,111 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Konstantin Khlebnikov +Date: Sat, 9 Feb 2019 13:35:52 +0300 +Subject: inet_diag: fix reporting cgroup classid and fallback to priority + +From: Konstantin Khlebnikov + +[ Upstream commit 1ec17dbd90f8b638f41ee650558609c1af63dfa0 ] + +Field idiag_ext in struct inet_diag_req_v2 used as bitmap of requested +extensions has only 8 bits. Thus extensions starting from DCTCPINFO +cannot be requested directly. Some of them included into response +unconditionally or hook into some of lower 8 bits. + +Extension INET_DIAG_CLASS_ID has not way to request from the beginning. + +This patch bundle it with INET_DIAG_TCLASS (ipv6 tos), fixes space +reservation, and documents behavior for other extensions. + +Also this patch adds fallback to reporting socket priority. This filed +is more widely used for traffic classification because ipv4 sockets +automatically maps TOS to priority and default qdisc pfifo_fast knows +about that. But priority could be changed via setsockopt SO_PRIORITY so +INET_DIAG_TOS isn't enough for predicting class. + +Also cgroup2 obsoletes net_cls classid (it always zero), but we cannot +reuse this field for reporting cgroup2 id because it is 64-bit (ino+gen). + +So, after this patch INET_DIAG_CLASS_ID will report socket priority +for most common setup when net_cls isn't set and/or cgroup2 in use. + +Fixes: 0888e372c37f ("net: inet: diag: expose sockets cgroup classid") +Signed-off-by: Konstantin Khlebnikov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/uapi/linux/inet_diag.h | 16 +++++++++++----- + net/ipv4/inet_diag.c | 10 +++++++++- + net/sctp/diag.c | 1 + + 3 files changed, 21 insertions(+), 6 deletions(-) + +--- a/include/uapi/linux/inet_diag.h ++++ b/include/uapi/linux/inet_diag.h +@@ -137,15 +137,21 @@ enum { + INET_DIAG_TCLASS, + INET_DIAG_SKMEMINFO, + INET_DIAG_SHUTDOWN, +- INET_DIAG_DCTCPINFO, +- INET_DIAG_PROTOCOL, /* response attribute only */ ++ ++ /* ++ * Next extenstions cannot be requested in struct inet_diag_req_v2: ++ * its field idiag_ext has only 8 bits. ++ */ ++ ++ INET_DIAG_DCTCPINFO, /* request as INET_DIAG_VEGASINFO */ ++ INET_DIAG_PROTOCOL, /* response attribute only */ + INET_DIAG_SKV6ONLY, + INET_DIAG_LOCALS, + INET_DIAG_PEERS, + INET_DIAG_PAD, +- INET_DIAG_MARK, +- INET_DIAG_BBRINFO, +- INET_DIAG_CLASS_ID, ++ INET_DIAG_MARK, /* only with CAP_NET_ADMIN */ ++ INET_DIAG_BBRINFO, /* request as INET_DIAG_VEGASINFO */ ++ INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */ + INET_DIAG_MD5SIG, + __INET_DIAG_MAX, + }; +--- a/net/ipv4/inet_diag.c ++++ b/net/ipv4/inet_diag.c +@@ -108,6 +108,7 @@ static size_t inet_sk_attr_size(struct s + + nla_total_size(1) /* INET_DIAG_TOS */ + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(4) /* INET_DIAG_MARK */ ++ + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + + nla_total_size(sizeof(struct inet_diag_meminfo)) + + nla_total_size(sizeof(struct inet_diag_msg)) + + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) +@@ -287,12 +288,19 @@ int inet_sk_diag_fill(struct sock *sk, s + goto errout; + } + +- if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) { ++ if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) || ++ ext & (1 << (INET_DIAG_TCLASS - 1))) { + u32 classid = 0; + + #ifdef CONFIG_SOCK_CGROUP_DATA + classid = sock_cgroup_classid(&sk->sk_cgrp_data); + #endif ++ /* Fallback to socket priority if class id isn't set. ++ * Classful qdiscs use it as direct reference to class. ++ * For cgroup2 classid is always zero. ++ */ ++ if (!classid) ++ classid = sk->sk_priority; + + if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) + goto errout; +--- a/net/sctp/diag.c ++++ b/net/sctp/diag.c +@@ -256,6 +256,7 @@ static size_t inet_assoc_attr_size(struc + + nla_total_size(1) /* INET_DIAG_TOS */ + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(4) /* INET_DIAG_MARK */ ++ + nla_total_size(4) /* INET_DIAG_CLASS_ID */ + + nla_total_size(addrlen * asoc->peer.transport_count) + + nla_total_size(addrlen * addrcnt) + + nla_total_size(sizeof(struct inet_diag_meminfo)) diff --git a/queue-4.20/ipv6-propagate-genlmsg_reply-return-code.patch b/queue-4.20/ipv6-propagate-genlmsg_reply-return-code.patch new file mode 100644 index 00000000000..06189f57959 --- /dev/null +++ b/queue-4.20/ipv6-propagate-genlmsg_reply-return-code.patch @@ -0,0 +1,32 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Li RongQing +Date: Mon, 11 Feb 2019 19:32:20 +0800 +Subject: ipv6: propagate genlmsg_reply return code + +From: Li RongQing + +[ Upstream commit d1f20798a119be71746949ba9b2e2ff330fdc038 ] + +genlmsg_reply can fail, so propagate its return code + +Fixes: 915d7e5e593 ("ipv6: sr: add code base for control plane support of SR-IPv6") +Signed-off-by: Li RongQing +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/seg6.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/net/ipv6/seg6.c ++++ b/net/ipv6/seg6.c +@@ -221,9 +221,7 @@ static int seg6_genl_get_tunsrc(struct s + rcu_read_unlock(); + + genlmsg_end(msg, hdr); +- genlmsg_reply(msg, info); +- +- return 0; ++ return genlmsg_reply(msg, info); + + nla_put_failure: + rcu_read_unlock(); diff --git a/queue-4.20/net-dsa-fix-npd-checking-for-br_vlan_enabled.patch b/queue-4.20/net-dsa-fix-npd-checking-for-br_vlan_enabled.patch new file mode 100644 index 00000000000..789618029a1 --- /dev/null +++ b/queue-4.20/net-dsa-fix-npd-checking-for-br_vlan_enabled.patch @@ -0,0 +1,48 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Florian Fainelli +Date: Mon, 18 Feb 2019 14:30:11 -0800 +Subject: net: dsa: Fix NPD checking for br_vlan_enabled() + +From: Florian Fainelli + +[ Upstream not applicable ] + +It is possible for the DSA slave network device not to be part of a +bridge, yet have an upper device like a VLAN device be part of a bridge. +When that VLAN device is enslaved, since it does not define any +switchdev_ops, we will recurse down to the lower/physical port device, +call switchdev_port_obj_add() with a VLAN, and here we will check +br_vlan_enabled() on a NULL dp->bridge_dev, thus causing a NULL pointer +de-reference. + +This is no longer a problem upstream after commit d17d9f5e5143 +("switchdev: Replace port obj add/del SDO with a notification"). + +Fixes: 2ea7a679ca2a ("net: dsa: Don't add vlans when vlan filtering is disabled") +Reported-by: Frank Wunderlich +Signed-off-by: Florian Fainelli +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/port.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/dsa/port.c ++++ b/net/dsa/port.c +@@ -255,7 +255,7 @@ int dsa_port_vlan_add(struct dsa_port *d + if (netif_is_bridge_master(vlan->obj.orig_dev)) + return -EOPNOTSUPP; + +- if (br_vlan_enabled(dp->bridge_dev)) ++ if (dp->bridge_dev && br_vlan_enabled(dp->bridge_dev)) + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); + + return 0; +@@ -273,7 +273,7 @@ int dsa_port_vlan_del(struct dsa_port *d + if (netif_is_bridge_master(vlan->obj.orig_dev)) + return -EOPNOTSUPP; + +- if (br_vlan_enabled(dp->bridge_dev)) ++ if (dp->bridge_dev && br_vlan_enabled(dp->bridge_dev)) + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); + + return 0; diff --git a/queue-4.20/net-ena-fix-race-between-link-up-and-device-initalization.patch b/queue-4.20/net-ena-fix-race-between-link-up-and-device-initalization.patch new file mode 100644 index 00000000000..304f7491390 --- /dev/null +++ b/queue-4.20/net-ena-fix-race-between-link-up-and-device-initalization.patch @@ -0,0 +1,55 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Arthur Kiyanovski +Date: Mon, 11 Feb 2019 19:17:43 +0200 +Subject: net: ena: fix race between link up and device initalization + +From: Arthur Kiyanovski + +[ Upstream commit e1f1bd9bfbedcfce428ee7e1b82a6ec12d4c3863 ] + +Fix race condition between ena_update_on_link_change() and +ena_restore_device(). + +This race can occur if link notification arrives while the driver +is performing a reset sequence. In this case link can be set up, +enabling the device, before it is fully restored. If packets are +sent at this time, the driver might access uninitialized data +structures, causing kernel crash. + +Move the clearing of ENA_FLAG_ONGOING_RESET and netif_carrier_on() +after ena_up() to ensure the device is ready when link is set up. + +Fixes: d18e4f683445 ("net: ena: fix race condition between device reset and link up setup") +Signed-off-by: Arthur Kiyanovski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/amazon/ena/ena_netdev.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c ++++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c +@@ -2663,11 +2663,6 @@ static int ena_restore_device(struct ena + goto err_device_destroy; + } + +- clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); +- /* Make sure we don't have a race with AENQ Links state handler */ +- if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) +- netif_carrier_on(adapter->netdev); +- + rc = ena_enable_msix_and_set_admin_interrupts(adapter, + adapter->num_queues); + if (rc) { +@@ -2684,6 +2679,11 @@ static int ena_restore_device(struct ena + } + + set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); ++ ++ clear_bit(ENA_FLAG_ONGOING_RESET, &adapter->flags); ++ if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) ++ netif_carrier_on(adapter->netdev); ++ + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); + dev_err(&pdev->dev, + "Device reset completed successfully, Driver info: %s\n", diff --git a/queue-4.20/net-mlx4_en-force-checksum_none-for-short-ethernet-frames.patch b/queue-4.20/net-mlx4_en-force-checksum_none-for-short-ethernet-frames.patch new file mode 100644 index 00000000000..6998fac4583 --- /dev/null +++ b/queue-4.20/net-mlx4_en-force-checksum_none-for-short-ethernet-frames.patch @@ -0,0 +1,82 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Saeed Mahameed +Date: Mon, 11 Feb 2019 18:04:17 +0200 +Subject: net/mlx4_en: Force CHECKSUM_NONE for short ethernet frames + +From: Saeed Mahameed + +[ Upstream commit 29dded89e80e3fff61efb34f07a8a3fba3ea146d ] + +When an ethernet frame is padded to meet the minimum ethernet frame +size, the padding octets are not covered by the hardware checksum. +Fortunately the padding octets are usually zero's, which don't affect +checksum. However, it is not guaranteed. For example, switches might +choose to make other use of these octets. +This repeatedly causes kernel hardware checksum fault. + +Prior to the cited commit below, skb checksum was forced to be +CHECKSUM_NONE when padding is detected. After it, we need to keep +skb->csum updated. However, fixing up CHECKSUM_COMPLETE requires to +verify and parse IP headers, it does not worth the effort as the packets +are so small that CHECKSUM_COMPLETE has no significant advantage. + +Future work: when reporting checksum complete is not an option for +IP non-TCP/UDP packets, we can actually fallback to report checksum +unnecessary, by looking at cqe IPOK bit. + +Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") +Cc: Eric Dumazet +Signed-off-by: Saeed Mahameed +Signed-off-by: Tariq Toukan +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_rx.c | 20 +++++++++++++++++++- + 1 file changed, 19 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c +@@ -620,6 +620,8 @@ static int get_fixed_ipv6_csum(__wsum hw + } + #endif + ++#define short_frame(size) ((size) <= ETH_ZLEN + ETH_FCS_LEN) ++ + /* We reach this function only after checking that any of + * the (IPv4 | IPv6) bits are set in cqe->status. + */ +@@ -627,9 +629,20 @@ static int check_csum(struct mlx4_cqe *c + netdev_features_t dev_features) + { + __wsum hw_checksum = 0; ++ void *hdr; + +- void *hdr = (u8 *)va + sizeof(struct ethhdr); ++ /* CQE csum doesn't cover padding octets in short ethernet ++ * frames. And the pad field is appended prior to calculating ++ * and appending the FCS field. ++ * ++ * Detecting these padded frames requires to verify and parse ++ * IP headers, so we simply force all those small frames to skip ++ * checksum complete. ++ */ ++ if (short_frame(skb->len)) ++ return -EINVAL; + ++ hdr = (u8 *)va + sizeof(struct ethhdr); + hw_checksum = csum_unfold((__force __sum16)cqe->checksum); + + if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) && +@@ -822,6 +835,11 @@ xdp_drop_no_cnt: + skb_record_rx_queue(skb, cq_ring); + + if (likely(dev->features & NETIF_F_RXCSUM)) { ++ /* TODO: For IP non TCP/UDP packets when csum complete is ++ * not an option (not supported or any other reason) we can ++ * actually check cqe IPOK status bit and report ++ * CHECKSUM_UNNECESSARY rather than CHECKSUM_NONE ++ */ + if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | + MLX4_CQE_STATUS_UDP)) && + (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) && diff --git a/queue-4.20/net-mlx5e-don-t-overwrite-pedit-action-when-multiple-pedit-used.patch b/queue-4.20/net-mlx5e-don-t-overwrite-pedit-action-when-multiple-pedit-used.patch new file mode 100644 index 00000000000..0eb2023b475 --- /dev/null +++ b/queue-4.20/net-mlx5e-don-t-overwrite-pedit-action-when-multiple-pedit-used.patch @@ -0,0 +1,99 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Tonghao Zhang +Date: Mon, 28 Jan 2019 15:28:06 -0800 +Subject: net/mlx5e: Don't overwrite pedit action when multiple pedit used + +From: Tonghao Zhang + +[ Upstream commit 218d05ce326f9e1b40a56085431fa1068b43d5d9 ] + +In some case, we may use multiple pedit actions to modify packets. +The command shown as below: the last pedit action is effective. + +$ tc filter add dev netdev_rep parent ffff: protocol ip prio 1 \ + flower skip_sw ip_proto icmp dst_ip 3.3.3.3 \ + action pedit ex munge ip dst set 192.168.1.100 pipe \ + action pedit ex munge eth src set 00:00:00:00:00:01 pipe \ + action pedit ex munge eth dst set 00:00:00:00:00:02 pipe \ + action csum ip pipe \ + action tunnel_key set src_ip 1.1.1.100 dst_ip 1.1.1.200 dst_port 4789 id 100 \ + action mirred egress redirect dev vxlan0 + +To fix it, we add max_mod_hdr_actions to mlx5e_tc_flow_parse_attr struction, +max_mod_hdr_actions will store the max pedit action number we support and +num_mod_hdr_actions indicates how many pedit action we used, and store all +pedit action to mod_hdr_actions. + +Fixes: d79b6df6b10a ("net/mlx5e: Add parsing of TC pedit actions to HW format") +Cc: Or Gerlitz +Signed-off-by: Tonghao Zhang +Reviewed-by: Or Gerlitz +Acked-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 25 ++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -98,6 +98,7 @@ struct mlx5e_tc_flow_parse_attr { + struct ip_tunnel_info tun_info; + struct mlx5_flow_spec spec; + int num_mod_hdr_actions; ++ int max_mod_hdr_actions; + void *mod_hdr_actions; + int mirred_ifindex; + }; +@@ -1888,9 +1889,9 @@ static struct mlx5_fields fields[] = { + OFFLOAD(UDP_DPORT, 2, udp.dest, 0), + }; + +-/* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at +- * max from the SW pedit action. On success, it says how many HW actions were +- * actually parsed. ++/* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at ++ * max from the SW pedit action. On success, attr->num_mod_hdr_actions ++ * says how many HW actions were actually parsed. + */ + static int offload_pedit_fields(struct pedit_headers *masks, + struct pedit_headers *vals, +@@ -1914,9 +1915,11 @@ static int offload_pedit_fields(struct p + add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD]; + + action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); +- action = parse_attr->mod_hdr_actions; +- max_actions = parse_attr->num_mod_hdr_actions; +- nactions = 0; ++ action = parse_attr->mod_hdr_actions + ++ parse_attr->num_mod_hdr_actions * action_size; ++ ++ max_actions = parse_attr->max_mod_hdr_actions; ++ nactions = parse_attr->num_mod_hdr_actions; + + for (i = 0; i < ARRAY_SIZE(fields); i++) { + f = &fields[i]; +@@ -2027,7 +2030,7 @@ static int alloc_mod_hdr_actions(struct + if (!parse_attr->mod_hdr_actions) + return -ENOMEM; + +- parse_attr->num_mod_hdr_actions = max_actions; ++ parse_attr->max_mod_hdr_actions = max_actions; + return 0; + } + +@@ -2073,9 +2076,11 @@ static int parse_tc_pedit_action(struct + goto out_err; + } + +- err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); +- if (err) +- goto out_err; ++ if (!parse_attr->mod_hdr_actions) { ++ err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); ++ if (err) ++ goto out_err; ++ } + + err = offload_pedit_fields(masks, vals, parse_attr, extack); + if (err < 0) diff --git a/queue-4.20/net-mlx5e-fpga-fix-innova-ipsec-tx-offload-data-path-performance.patch b/queue-4.20/net-mlx5e-fpga-fix-innova-ipsec-tx-offload-data-path-performance.patch new file mode 100644 index 00000000000..568b830cb9a --- /dev/null +++ b/queue-4.20/net-mlx5e-fpga-fix-innova-ipsec-tx-offload-data-path-performance.patch @@ -0,0 +1,47 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Raed Salem +Date: Mon, 17 Dec 2018 11:40:06 +0200 +Subject: net/mlx5e: FPGA, fix Innova IPsec TX offload data path performance + +From: Raed Salem + +[ Upstream commit 82eaa1fa0448da1852d7b80832e67e80a08dcc27 ] + +At Innova IPsec TX offload data path a special software parser metadata +is used to pass some packet attributes to the hardware, this metadata +is passed using the Ethernet control segment of a WQE (a HW descriptor) +header. + +The cited commit might nullify this header, hence the metadata is lost, +this caused a significant performance drop during hw offloading +operation. + +Fix by restoring the metadata at the Ethernet control segment in case +it was nullified. + +Fixes: 37fdffb217a4 ("net/mlx5: WQ, fixes for fragmented WQ buffers API") +Signed-off-by: Raed Salem +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +@@ -387,8 +387,14 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_t + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); + contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); + if (unlikely(contig_wqebbs_room < num_wqebbs)) { ++#ifdef CONFIG_MLX5_EN_IPSEC ++ struct mlx5_wqe_eth_seg cur_eth = wqe->eth; ++#endif + mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); + mlx5e_sq_fetch_wqe(sq, &wqe, &pi); ++#ifdef CONFIG_MLX5_EN_IPSEC ++ wqe->eth = cur_eth; ++#endif + } + + /* fill wqe */ diff --git a/queue-4.20/net-mlx5e-xdp-fix-redirect-resources-availability-check.patch b/queue-4.20/net-mlx5e-xdp-fix-redirect-resources-availability-check.patch new file mode 100644 index 00000000000..01d4e58be16 --- /dev/null +++ b/queue-4.20/net-mlx5e-xdp-fix-redirect-resources-availability-check.patch @@ -0,0 +1,114 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Saeed Mahameed +Date: Mon, 11 Feb 2019 16:27:02 -0800 +Subject: net/mlx5e: XDP, fix redirect resources availability check + +From: Saeed Mahameed + +[ Upstream commit 407e17b1a69a51ba9a512a04342da56c1f931df4 ] + +Currently mlx5 driver creates xdp redirect hw queues unconditionally on +netdevice open, This is great until someone starts redirecting XDP traffic +via ndo_xdp_xmit on mlx5 device and changes the device configuration at +the same time, this might cause crashes, since the other device's napi +is not aware of the mlx5 state change (resources un-availability). + +To fix this we must synchronize with other devices napi's on the system. +Added a new flag under mlx5e_priv to determine XDP TX resources are +available, set/clear it up when necessary and use synchronize_rcu() +when the flag is turned off, so other napi's are in-sync with it, before +we actually cleanup the hw resources. + +The flag is tested prior to committing to transmit on mlx5e_xdp_xmit, and +it is sufficient to determine if it safe to transmit or not. The other +two internal flags (MLX5E_STATE_OPENED and MLX5E_SQ_STATE_ENABLED) become +unnecessary. Thus, they are removed from data path. + +Fixes: 58b99ee3e3eb ("net/mlx5e: Add support for XDP_REDIRECT in device-out side") +Reported-by: Toke Høiland-Jørgensen +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + + drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 6 ++---- + drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h | 17 +++++++++++++++++ + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ + 4 files changed, 22 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h +@@ -636,6 +636,7 @@ enum { + MLX5E_STATE_ASYNC_EVENTS_ENABLED, + MLX5E_STATE_OPENED, + MLX5E_STATE_DESTROYING, ++ MLX5E_STATE_XDP_TX_ENABLED, + }; + + struct mlx5e_rqt { +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +@@ -262,7 +262,8 @@ int mlx5e_xdp_xmit(struct net_device *de + int sq_num; + int i; + +- if (unlikely(!test_bit(MLX5E_STATE_OPENED, &priv->state))) ++ /* this flag is sufficient, no need to test internal sq state */ ++ if (unlikely(!mlx5e_xdp_tx_is_enabled(priv))) + return -ENETDOWN; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) +@@ -275,9 +276,6 @@ int mlx5e_xdp_xmit(struct net_device *de + + sq = &priv->channels.c[sq_num]->xdpsq; + +- if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) +- return -ENETDOWN; +- + for (i = 0; i < n; i++) { + struct xdp_frame *xdpf = frames[i]; + struct mlx5e_xdp_info xdpi; +--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +@@ -49,6 +49,23 @@ bool mlx5e_xmit_xdp_frame(struct mlx5e_x + int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); + ++static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv) ++{ ++ set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); ++} ++ ++static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) ++{ ++ clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); ++ /* let other device's napi(s) see our new state */ ++ synchronize_rcu(); ++} ++ ++static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) ++{ ++ return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); ++} ++ + static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) + { + struct mlx5_wq_cyc *wq = &sq->wq; +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -2903,6 +2903,7 @@ void mlx5e_activate_priv_channels(struct + + mlx5e_build_tx2sq_maps(priv); + mlx5e_activate_channels(&priv->channels); ++ mlx5e_xdp_tx_enable(priv); + netif_tx_start_all_queues(priv->netdev); + + if (MLX5_ESWITCH_MANAGER(priv->mdev)) +@@ -2924,6 +2925,7 @@ void mlx5e_deactivate_priv_channels(stru + */ + netif_tx_stop_all_queues(priv->netdev); + netif_tx_disable(priv->netdev); ++ mlx5e_xdp_tx_disable(priv); + mlx5e_deactivate_channels(&priv->channels); + } + diff --git a/queue-4.20/net-packet-fix-4gb-buffer-limit-due-to-overflow-check.patch b/queue-4.20/net-packet-fix-4gb-buffer-limit-due-to-overflow-check.patch new file mode 100644 index 00000000000..d1ba4b14d6b --- /dev/null +++ b/queue-4.20/net-packet-fix-4gb-buffer-limit-due-to-overflow-check.patch @@ -0,0 +1,34 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Kal Conley +Date: Sun, 10 Feb 2019 09:57:11 +0100 +Subject: net/packet: fix 4gb buffer limit due to overflow check + +From: Kal Conley + +[ Upstream commit fc62814d690cf62189854464f4bd07457d5e9e50 ] + +When calculating rb->frames_per_block * req->tp_block_nr the result +can overflow. Check it for overflow without limiting the total buffer +size to UINT_MAX. + +This change fixes support for packet ring buffers >= UINT_MAX. + +Fixes: 8f8d28e4d6d8 ("net/packet: fix overflow in check for tp_frame_nr") +Signed-off-by: Kal Conley +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -4292,7 +4292,7 @@ static int packet_set_ring(struct sock * + rb->frames_per_block = req->tp_block_size / req->tp_frame_size; + if (unlikely(rb->frames_per_block == 0)) + goto out; +- if (unlikely(req->tp_block_size > UINT_MAX / req->tp_block_nr)) ++ if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr)) + goto out; + if (unlikely((rb->frames_per_block * req->tp_block_nr) != + req->tp_frame_nr)) diff --git a/queue-4.20/net-sfp-do-not-probe-sfp-module-before-we-re-attached.patch b/queue-4.20/net-sfp-do-not-probe-sfp-module-before-we-re-attached.patch new file mode 100644 index 00000000000..4226490b77d --- /dev/null +++ b/queue-4.20/net-sfp-do-not-probe-sfp-module-before-we-re-attached.patch @@ -0,0 +1,138 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Russell King +Date: Wed, 6 Feb 2019 10:52:30 +0000 +Subject: net: sfp: do not probe SFP module before we're attached + +From: Russell King + +[ Upstream commit b5bfc21af5cb3d53f9cee0ef82eaa43762a90f81 ] + +When we probe a SFP module, we expect to be able to call the upstream +device's module_insert() function so that the upstream link can be +configured. However, when the upstream device is delayed, we currently +may end up probing the module before the upstream device is available, +and lose the module_insert() call. + +Avoid this by holding off probing the module until the SFP bus is +properly connected to both the SFP socket driver and the upstream +driver. + +Signed-off-by: Russell King +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/sfp-bus.c | 2 ++ + drivers/net/phy/sfp.c | 30 +++++++++++++++++++++--------- + drivers/net/phy/sfp.h | 2 ++ + 3 files changed, 25 insertions(+), 9 deletions(-) + +--- a/drivers/net/phy/sfp-bus.c ++++ b/drivers/net/phy/sfp-bus.c +@@ -347,6 +347,7 @@ static int sfp_register_bus(struct sfp_b + return ret; + } + } ++ bus->socket_ops->attach(bus->sfp); + if (bus->started) + bus->socket_ops->start(bus->sfp); + bus->netdev->sfp_bus = bus; +@@ -362,6 +363,7 @@ static void sfp_unregister_bus(struct sf + if (bus->registered) { + if (bus->started) + bus->socket_ops->stop(bus->sfp); ++ bus->socket_ops->detach(bus->sfp); + if (bus->phydev && ops && ops->disconnect_phy) + ops->disconnect_phy(bus->upstream); + } +--- a/drivers/net/phy/sfp.c ++++ b/drivers/net/phy/sfp.c +@@ -184,6 +184,7 @@ struct sfp { + + struct gpio_desc *gpio[GPIO_MAX]; + ++ bool attached; + unsigned int state; + struct delayed_work poll; + struct delayed_work timeout; +@@ -1475,7 +1476,7 @@ static void sfp_sm_event(struct sfp *sfp + */ + switch (sfp->sm_mod_state) { + default: +- if (event == SFP_E_INSERT) { ++ if (event == SFP_E_INSERT && sfp->attached) { + sfp_module_tx_disable(sfp); + sfp_sm_ins_next(sfp, SFP_MOD_PROBE, T_PROBE_INIT); + } +@@ -1607,6 +1608,19 @@ static void sfp_sm_event(struct sfp *sfp + mutex_unlock(&sfp->sm_mutex); + } + ++static void sfp_attach(struct sfp *sfp) ++{ ++ sfp->attached = true; ++ if (sfp->state & SFP_F_PRESENT) ++ sfp_sm_event(sfp, SFP_E_INSERT); ++} ++ ++static void sfp_detach(struct sfp *sfp) ++{ ++ sfp->attached = false; ++ sfp_sm_event(sfp, SFP_E_REMOVE); ++} ++ + static void sfp_start(struct sfp *sfp) + { + sfp_sm_event(sfp, SFP_E_DEV_UP); +@@ -1667,6 +1681,8 @@ static int sfp_module_eeprom(struct sfp + } + + static const struct sfp_socket_ops sfp_module_ops = { ++ .attach = sfp_attach, ++ .detach = sfp_detach, + .start = sfp_start, + .stop = sfp_stop, + .module_info = sfp_module_info, +@@ -1834,10 +1850,6 @@ static int sfp_probe(struct platform_dev + dev_info(sfp->dev, "Host maximum power %u.%uW\n", + sfp->max_power_mW / 1000, (sfp->max_power_mW / 100) % 10); + +- sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops); +- if (!sfp->sfp_bus) +- return -ENOMEM; +- + /* Get the initial state, and always signal TX disable, + * since the network interface will not be up. + */ +@@ -1848,10 +1860,6 @@ static int sfp_probe(struct platform_dev + sfp->state |= SFP_F_RATE_SELECT; + sfp_set_state(sfp, sfp->state); + sfp_module_tx_disable(sfp); +- rtnl_lock(); +- if (sfp->state & SFP_F_PRESENT) +- sfp_sm_event(sfp, SFP_E_INSERT); +- rtnl_unlock(); + + for (i = 0; i < GPIO_MAX; i++) { + if (gpio_flags[i] != GPIOD_IN || !sfp->gpio[i]) +@@ -1884,6 +1892,10 @@ static int sfp_probe(struct platform_dev + dev_warn(sfp->dev, + "No tx_disable pin: SFP modules will always be emitting.\n"); + ++ sfp->sfp_bus = sfp_register_socket(sfp->dev, sfp, &sfp_module_ops); ++ if (!sfp->sfp_bus) ++ return -ENOMEM; ++ + return 0; + } + +--- a/drivers/net/phy/sfp.h ++++ b/drivers/net/phy/sfp.h +@@ -7,6 +7,8 @@ + struct sfp; + + struct sfp_socket_ops { ++ void (*attach)(struct sfp *sfp); ++ void (*detach)(struct sfp *sfp); + void (*start)(struct sfp *sfp); + void (*stop)(struct sfp *sfp); + int (*module_info)(struct sfp *sfp, struct ethtool_modinfo *modinfo); diff --git a/queue-4.20/net-socket-fix-siocgifname-in-compat.patch b/queue-4.20/net-socket-fix-siocgifname-in-compat.patch new file mode 100644 index 00000000000..71f893617b3 --- /dev/null +++ b/queue-4.20/net-socket-fix-siocgifname-in-compat.patch @@ -0,0 +1,58 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Johannes Berg +Date: Fri, 25 Jan 2019 22:43:19 +0100 +Subject: net: socket: fix SIOCGIFNAME in compat + +From: Johannes Berg + +[ Upstream commit c6c9fee35dc27362b7bac34b2fc9f5b8ace2e22c ] + +As reported by Robert O'Callahan in +https://bugzilla.kernel.org/show_bug.cgi?id=202273 +reverting the previous changes in this area broke +the SIOCGIFNAME ioctl in compat again (I'd previously +fixed it after his previous report of breakage in +https://bugzilla.kernel.org/show_bug.cgi?id=199469). + +This is obviously because I fixed SIOCGIFNAME more or +less by accident. + +Fix it explicitly now by making it pass through the +restored compat translation code. + +Cc: stable@vger.kernel.org +Fixes: 4cf808e7ac32 ("kill dev_ifname32()") +Reported-by: Robert O'Callahan +Signed-off-by: Johannes Berg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/socket.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/socket.c ++++ b/net/socket.c +@@ -2999,6 +2999,7 @@ static int compat_ifreq_ioctl(struct net + case SIOCGIFTXQLEN: + case SIOCGMIIPHY: + case SIOCGMIIREG: ++ case SIOCGIFNAME: + if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) + err = -EFAULT; + break; +@@ -3222,6 +3223,7 @@ static int compat_sock_ioctl_trans(struc + case SIOCSIFTXQLEN: + case SIOCBRADDIF: + case SIOCBRDELIF: ++ case SIOCGIFNAME: + case SIOCSIFNAME: + case SIOCGMIIPHY: + case SIOCGMIIREG: +@@ -3236,7 +3238,6 @@ static int compat_sock_ioctl_trans(struc + case SIOCBONDRELEASE: + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: +- case SIOCGIFNAME: + return sock_do_ioctl(net, sock, cmd, arg); + } + diff --git a/queue-4.20/net-socket-make-bond-ioctls-go-through-compat_ifreq_ioctl.patch b/queue-4.20/net-socket-make-bond-ioctls-go-through-compat_ifreq_ioctl.patch new file mode 100644 index 00000000000..3dd9fb9ff24 --- /dev/null +++ b/queue-4.20/net-socket-make-bond-ioctls-go-through-compat_ifreq_ioctl.patch @@ -0,0 +1,44 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Johannes Berg +Date: Fri, 25 Jan 2019 22:43:20 +0100 +Subject: net: socket: make bond ioctls go through compat_ifreq_ioctl() + +From: Johannes Berg + +[ Upstream commit 98406133dd9cb9f195676eab540c270dceca879a ] + +Same story as before, these use struct ifreq and thus need +to be read with the shorter version to not cause faults. + +Cc: stable@vger.kernel.org +Fixes: f92d4fc95341 ("kill bond_ioctl()") +Signed-off-by: Johannes Berg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/socket.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/socket.c ++++ b/net/socket.c +@@ -3228,16 +3228,16 @@ static int compat_sock_ioctl_trans(struc + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSMIIREG: ++ case SIOCBONDENSLAVE: ++ case SIOCBONDRELEASE: ++ case SIOCBONDSETHWADDR: ++ case SIOCBONDCHANGEACTIVE: + return compat_ifreq_ioctl(net, sock, cmd, argp); + + case SIOCSARP: + case SIOCGARP: + case SIOCDARP: + case SIOCATMARK: +- case SIOCBONDENSLAVE: +- case SIOCBONDRELEASE: +- case SIOCBONDSETHWADDR: +- case SIOCBONDCHANGEACTIVE: + return sock_do_ioctl(net, sock, cmd, arg); + } + diff --git a/queue-4.20/net_sched-fix-a-memory-leak-in-cls_tcindex.patch b/queue-4.20/net_sched-fix-a-memory-leak-in-cls_tcindex.patch new file mode 100644 index 00000000000..c18a852d079 --- /dev/null +++ b/queue-4.20/net_sched-fix-a-memory-leak-in-cls_tcindex.patch @@ -0,0 +1,126 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Cong Wang +Date: Mon, 11 Feb 2019 13:06:15 -0800 +Subject: net_sched: fix a memory leak in cls_tcindex + +From: Cong Wang + +[ Upstream commit 033b228e7f26b29ae37f8bfa1bc6b209a5365e9f ] + +When tcindex_destroy() destroys all the filter results in +the perfect hash table, it invokes the walker to delete +each of them. However, results with class==0 are skipped +in either tcindex_walk() or tcindex_delete(), which causes +a memory leak reported by kmemleak. + +This patch fixes it by skipping the walker and directly +deleting these filter results so we don't miss any filter +result. + +As a result of this change, we have to initialize exts->net +properly in tcindex_alloc_perfect_hash(). For net-next, we +need to consider whether we should initialize ->net in +tcf_exts_init() instead, before that just directly test +CONFIG_NET_CLS_ACT=y. + +Cc: Jamal Hadi Salim +Cc: Jiri Pirko +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_tcindex.c | 46 ++++++++++++++++++++++++++++++---------------- + 1 file changed, 30 insertions(+), 16 deletions(-) + +--- a/net/sched/cls_tcindex.c ++++ b/net/sched/cls_tcindex.c +@@ -221,14 +221,6 @@ found: + return 0; + } + +-static int tcindex_destroy_element(struct tcf_proto *tp, +- void *arg, struct tcf_walker *walker) +-{ +- bool last; +- +- return tcindex_delete(tp, arg, &last, NULL); +-} +- + static void tcindex_destroy_work(struct work_struct *work) + { + struct tcindex_data *p = container_of(to_rcu_work(work), +@@ -279,7 +271,7 @@ static void tcindex_free_perfect_hash(st + kfree(cp->perfect); + } + +-static int tcindex_alloc_perfect_hash(struct tcindex_data *cp) ++static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp) + { + int i, err = 0; + +@@ -293,6 +285,9 @@ static int tcindex_alloc_perfect_hash(st + TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + if (err < 0) + goto errout; ++#ifdef CONFIG_NET_CLS_ACT ++ cp->perfect[i].exts.net = net; ++#endif + } + + return 0; +@@ -341,7 +336,7 @@ tcindex_set_parms(struct net *net, struc + if (p->perfect) { + int i; + +- if (tcindex_alloc_perfect_hash(cp) < 0) ++ if (tcindex_alloc_perfect_hash(net, cp) < 0) + goto errout; + for (i = 0; i < cp->hash; i++) + cp->perfect[i].res = p->perfect[i].res; +@@ -410,7 +405,7 @@ tcindex_set_parms(struct net *net, struc + err = -ENOMEM; + if (!cp->perfect && !cp->h) { + if (valid_perfect_hash(cp)) { +- if (tcindex_alloc_perfect_hash(cp) < 0) ++ if (tcindex_alloc_perfect_hash(net, cp) < 0) + goto errout_alloc; + balloc = 1; + } else { +@@ -566,13 +561,32 @@ static void tcindex_destroy(struct tcf_p + struct netlink_ext_ack *extack) + { + struct tcindex_data *p = rtnl_dereference(tp->root); +- struct tcf_walker walker; ++ int i; + + pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p); +- walker.count = 0; +- walker.skip = 0; +- walker.fn = tcindex_destroy_element; +- tcindex_walk(tp, &walker); ++ ++ if (p->perfect) { ++ for (i = 0; i < p->hash; i++) { ++ struct tcindex_filter_result *r = p->perfect + i; ++ ++ tcf_unbind_filter(tp, &r->res); ++ if (tcf_exts_get_net(&r->exts)) ++ tcf_queue_work(&r->rwork, ++ tcindex_destroy_rexts_work); ++ else ++ __tcindex_destroy_rexts(r); ++ } ++ } ++ ++ for (i = 0; p->h && i < p->hash; i++) { ++ struct tcindex_filter *f, *next; ++ bool last; ++ ++ for (f = rtnl_dereference(p->h[i]); f; f = next) { ++ next = rtnl_dereference(f->next); ++ tcindex_delete(tp, &f->result, &last, NULL); ++ } ++ } + + tcf_queue_work(&p->rwork, tcindex_destroy_work); + } diff --git a/queue-4.20/net_sched-fix-a-race-condition-in-tcindex_destroy.patch b/queue-4.20/net_sched-fix-a-race-condition-in-tcindex_destroy.patch new file mode 100644 index 00000000000..24c0bd30af5 --- /dev/null +++ b/queue-4.20/net_sched-fix-a-race-condition-in-tcindex_destroy.patch @@ -0,0 +1,92 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Cong Wang +Date: Mon, 11 Feb 2019 13:06:14 -0800 +Subject: net_sched: fix a race condition in tcindex_destroy() + +From: Cong Wang + +[ Upstream commit 8015d93ebd27484418d4952284fd02172fa4b0b2 ] + +tcindex_destroy() invokes tcindex_destroy_element() via +a walker to delete each filter result in its perfect hash +table, and tcindex_destroy_element() calls tcindex_delete() +which schedules tcf RCU works to do the final deletion work. +Unfortunately this races with the RCU callback +__tcindex_destroy(), which could lead to use-after-free as +reported by Adrian. + +Fix this by migrating this RCU callback to tcf RCU work too, +as that workqueue is ordered, we will not have use-after-free. + +Note, we don't need to hold netns refcnt because we don't call +tcf_exts_destroy() here. + +Fixes: 27ce4f05e2ab ("net_sched: use tcf_queue_work() in tcindex filter") +Reported-by: Adrian +Cc: Ben Hutchings +Cc: Jamal Hadi Salim +Cc: Jiri Pirko +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_tcindex.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +--- a/net/sched/cls_tcindex.c ++++ b/net/sched/cls_tcindex.c +@@ -48,7 +48,7 @@ struct tcindex_data { + u32 hash; /* hash table size; 0 if undefined */ + u32 alloc_hash; /* allocated size */ + u32 fall_through; /* 0: only classify if explicit match */ +- struct rcu_head rcu; ++ struct rcu_work rwork; + }; + + static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) +@@ -229,9 +229,11 @@ static int tcindex_destroy_element(struc + return tcindex_delete(tp, arg, &last, NULL); + } + +-static void __tcindex_destroy(struct rcu_head *head) ++static void tcindex_destroy_work(struct work_struct *work) + { +- struct tcindex_data *p = container_of(head, struct tcindex_data, rcu); ++ struct tcindex_data *p = container_of(to_rcu_work(work), ++ struct tcindex_data, ++ rwork); + + kfree(p->perfect); + kfree(p->h); +@@ -258,9 +260,11 @@ static int tcindex_filter_result_init(st + return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + } + +-static void __tcindex_partial_destroy(struct rcu_head *head) ++static void tcindex_partial_destroy_work(struct work_struct *work) + { +- struct tcindex_data *p = container_of(head, struct tcindex_data, rcu); ++ struct tcindex_data *p = container_of(to_rcu_work(work), ++ struct tcindex_data, ++ rwork); + + kfree(p->perfect); + kfree(p); +@@ -478,7 +482,7 @@ tcindex_set_parms(struct net *net, struc + } + + if (oldp) +- call_rcu(&oldp->rcu, __tcindex_partial_destroy); ++ tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work); + return 0; + + errout_alloc: +@@ -570,7 +574,7 @@ static void tcindex_destroy(struct tcf_p + walker.fn = tcindex_destroy_element; + tcindex_walk(tp, &walker); + +- call_rcu(&p->rcu, __tcindex_destroy); ++ tcf_queue_work(&p->rwork, tcindex_destroy_work); + } + + diff --git a/queue-4.20/net_sched-fix-two-more-memory-leaks-in-cls_tcindex.patch b/queue-4.20/net_sched-fix-two-more-memory-leaks-in-cls_tcindex.patch new file mode 100644 index 00000000000..e0c8000c218 --- /dev/null +++ b/queue-4.20/net_sched-fix-two-more-memory-leaks-in-cls_tcindex.patch @@ -0,0 +1,91 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Cong Wang +Date: Mon, 11 Feb 2019 13:06:16 -0800 +Subject: net_sched: fix two more memory leaks in cls_tcindex + +From: Cong Wang + +[ Upstream commit 1db817e75f5b9387b8db11e37d5f0624eb9223e0 ] + +struct tcindex_filter_result contains two parts: +struct tcf_exts and struct tcf_result. + +For the local variable 'cr', its exts part is never used but +initialized without being released properly on success path. So +just completely remove the exts part to fix this leak. + +For the local variable 'new_filter_result', it is never properly +released if not used by 'r' on success path. + +Cc: Jamal Hadi Salim +Cc: Jiri Pirko +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_tcindex.c | 16 +++++++--------- + 1 file changed, 7 insertions(+), 9 deletions(-) + +--- a/net/sched/cls_tcindex.c ++++ b/net/sched/cls_tcindex.c +@@ -304,9 +304,9 @@ tcindex_set_parms(struct net *net, struc + struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) + { + struct tcindex_filter_result new_filter_result, *old_r = r; +- struct tcindex_filter_result cr; + struct tcindex_data *cp = NULL, *oldp; + struct tcindex_filter *f = NULL; /* make gcc behave */ ++ struct tcf_result cr = {}; + int err, balloc = 0; + struct tcf_exts e; + +@@ -347,11 +347,8 @@ tcindex_set_parms(struct net *net, struc + err = tcindex_filter_result_init(&new_filter_result); + if (err < 0) + goto errout1; +- err = tcindex_filter_result_init(&cr); +- if (err < 0) +- goto errout1; + if (old_r) +- cr.res = r->res; ++ cr = r->res; + + if (tb[TCA_TCINDEX_HASH]) + cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]); +@@ -442,8 +439,8 @@ tcindex_set_parms(struct net *net, struc + } + + if (tb[TCA_TCINDEX_CLASSID]) { +- cr.res.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); +- tcf_bind_filter(tp, &cr.res, base); ++ cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]); ++ tcf_bind_filter(tp, &cr, base); + } + + if (old_r && old_r != r) { +@@ -455,7 +452,7 @@ tcindex_set_parms(struct net *net, struc + } + + oldp = p; +- r->res = cr.res; ++ r->res = cr; + tcf_exts_change(&r->exts, &e); + + rcu_assign_pointer(tp->root, cp); +@@ -474,6 +471,8 @@ tcindex_set_parms(struct net *net, struc + ; /* nothing */ + + rcu_assign_pointer(*fp, f); ++ } else { ++ tcf_exts_destroy(&new_filter_result.exts); + } + + if (oldp) +@@ -486,7 +485,6 @@ errout_alloc: + else if (balloc == 2) + kfree(cp->h); + errout1: +- tcf_exts_destroy(&cr.exts); + tcf_exts_destroy(&new_filter_result.exts); + errout: + kfree(cp); diff --git a/queue-4.20/revert-kill-dev_ifsioc.patch b/queue-4.20/revert-kill-dev_ifsioc.patch new file mode 100644 index 00000000000..3ad984e7658 --- /dev/null +++ b/queue-4.20/revert-kill-dev_ifsioc.patch @@ -0,0 +1,96 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Johannes Berg +Date: Fri, 25 Jan 2019 22:43:18 +0100 +Subject: Revert "kill dev_ifsioc()" + +From: Johannes Berg + +[ Upstream commit 37ac39bdddc528c998a9f36db36937de923fdf2a ] + +This reverts commit bf4405737f9f ("kill dev_ifsioc()"). + +This wasn't really unused as implied by the original commit, +it still handles the copy to/from user differently, and the +commit thus caused issues such as + https://bugzilla.kernel.org/show_bug.cgi?id=199469 +and + https://bugzilla.kernel.org/show_bug.cgi?id=202273 + +However, deviating from a strict revert, rename dev_ifsioc() +to compat_ifreq_ioctl() to be clearer as to its purpose and +add a comment. + +Cc: stable@vger.kernel.org +Fixes: bf4405737f9f ("kill dev_ifsioc()") +Signed-off-by: Johannes Berg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/socket.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 49 insertions(+) + +--- a/net/socket.c ++++ b/net/socket.c +@@ -2960,6 +2960,53 @@ static int compat_ifr_data_ioctl(struct + return dev_ioctl(net, cmd, &ifreq, NULL); + } + ++static int compat_ifreq_ioctl(struct net *net, struct socket *sock, ++ unsigned int cmd, ++ struct compat_ifreq __user *uifr32) ++{ ++ struct ifreq __user *uifr; ++ int err; ++ ++ /* Handle the fact that while struct ifreq has the same *layout* on ++ * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, ++ * which are handled elsewhere, it still has different *size* due to ++ * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, ++ * resulting in struct ifreq being 32 and 40 bytes respectively). ++ * As a result, if the struct happens to be at the end of a page and ++ * the next page isn't readable/writable, we get a fault. To prevent ++ * that, copy back and forth to the full size. ++ */ ++ ++ uifr = compat_alloc_user_space(sizeof(*uifr)); ++ if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) ++ return -EFAULT; ++ ++ err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); ++ ++ if (!err) { ++ switch (cmd) { ++ case SIOCGIFFLAGS: ++ case SIOCGIFMETRIC: ++ case SIOCGIFMTU: ++ case SIOCGIFMEM: ++ case SIOCGIFHWADDR: ++ case SIOCGIFINDEX: ++ case SIOCGIFADDR: ++ case SIOCGIFBRDADDR: ++ case SIOCGIFDSTADDR: ++ case SIOCGIFNETMASK: ++ case SIOCGIFPFLAGS: ++ case SIOCGIFTXQLEN: ++ case SIOCGMIIPHY: ++ case SIOCGMIIREG: ++ if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) ++ err = -EFAULT; ++ break; ++ } ++ } ++ return err; ++} ++ + static int compat_sioc_ifmap(struct net *net, unsigned int cmd, + struct compat_ifreq __user *uifr32) + { +@@ -3179,6 +3226,8 @@ static int compat_sock_ioctl_trans(struc + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSMIIREG: ++ return compat_ifreq_ioctl(net, sock, cmd, argp); ++ + case SIOCSARP: + case SIOCGARP: + case SIOCDARP: diff --git a/queue-4.20/revert-socket-fix-struct-ifreq-size-in-compat-ioctl.patch b/queue-4.20/revert-socket-fix-struct-ifreq-size-in-compat-ioctl.patch new file mode 100644 index 00000000000..232d45255b5 --- /dev/null +++ b/queue-4.20/revert-socket-fix-struct-ifreq-size-in-compat-ioctl.patch @@ -0,0 +1,101 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Johannes Berg +Date: Fri, 25 Jan 2019 22:43:17 +0100 +Subject: Revert "socket: fix struct ifreq size in compat ioctl" + +From: Johannes Berg + +[ Upstream commit 63ff03ab786ab1bc6cca01d48eacd22c95b9b3eb ] + +This reverts commit 1cebf8f143c2 ("socket: fix struct ifreq +size in compat ioctl"), it's a bugfix for another commit that +I'll revert next. + +This is not a 'perfect' revert, I'm keeping some coding style +intact rather than revert to the state with indentation errors. + +Cc: stable@vger.kernel.org +Fixes: 1cebf8f143c2 ("socket: fix struct ifreq size in compat ioctl") +Signed-off-by: Johannes Berg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/socket.c | 22 ++++++++-------------- + 1 file changed, 8 insertions(+), 14 deletions(-) + +--- a/net/socket.c ++++ b/net/socket.c +@@ -941,8 +941,7 @@ void dlci_ioctl_set(int (*hook) (unsigne + EXPORT_SYMBOL(dlci_ioctl_set); + + static long sock_do_ioctl(struct net *net, struct socket *sock, +- unsigned int cmd, unsigned long arg, +- unsigned int ifreq_size) ++ unsigned int cmd, unsigned long arg) + { + int err; + void __user *argp = (void __user *)arg; +@@ -968,11 +967,11 @@ static long sock_do_ioctl(struct net *ne + } else { + struct ifreq ifr; + bool need_copyout; +- if (copy_from_user(&ifr, argp, ifreq_size)) ++ if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) + return -EFAULT; + err = dev_ioctl(net, cmd, &ifr, &need_copyout); + if (!err && need_copyout) +- if (copy_to_user(argp, &ifr, ifreq_size)) ++ if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) + return -EFAULT; + } + return err; +@@ -1071,8 +1070,7 @@ static long sock_ioctl(struct file *file + err = open_related_ns(&net->ns, get_net_ns); + break; + default: +- err = sock_do_ioctl(net, sock, cmd, arg, +- sizeof(struct ifreq)); ++ err = sock_do_ioctl(net, sock, cmd, arg); + break; + } + return err; +@@ -2750,8 +2748,7 @@ static int do_siocgstamp(struct net *net + int err; + + set_fs(KERNEL_DS); +- err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv, +- sizeof(struct compat_ifreq)); ++ err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); + set_fs(old_fs); + if (!err) + err = compat_put_timeval(&ktv, up); +@@ -2767,8 +2764,7 @@ static int do_siocgstampns(struct net *n + int err; + + set_fs(KERNEL_DS); +- err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts, +- sizeof(struct compat_ifreq)); ++ err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); + set_fs(old_fs); + if (!err) + err = compat_put_timespec(&kts, up); +@@ -3079,8 +3075,7 @@ static int routing_ioctl(struct net *net + } + + set_fs(KERNEL_DS); +- ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r, +- sizeof(struct compat_ifreq)); ++ ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); + set_fs(old_fs); + + out: +@@ -3193,8 +3188,7 @@ static int compat_sock_ioctl_trans(struc + case SIOCBONDSETHWADDR: + case SIOCBONDCHANGEACTIVE: + case SIOCGIFNAME: +- return sock_do_ioctl(net, sock, cmd, arg, +- sizeof(struct compat_ifreq)); ++ return sock_do_ioctl(net, sock, cmd, arg); + } + + return -ENOIOCTLCMD; diff --git a/queue-4.20/sctp-call-gso_reset_checksum-when-computing-checksum-in-sctp_gso_segment.patch b/queue-4.20/sctp-call-gso_reset_checksum-when-computing-checksum-in-sctp_gso_segment.patch new file mode 100644 index 00000000000..f7b92d32b3b --- /dev/null +++ b/queue-4.20/sctp-call-gso_reset_checksum-when-computing-checksum-in-sctp_gso_segment.patch @@ -0,0 +1,69 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Xin Long +Date: Tue, 12 Feb 2019 18:47:30 +0800 +Subject: sctp: call gso_reset_checksum when computing checksum in sctp_gso_segment + +From: Xin Long + +[ Upstream commit fc228abc2347e106a44c0e9b29ab70b712c4ca51 ] + +Jianlin reported a panic when running sctp gso over gre over vlan device: + + [ 84.772930] RIP: 0010:do_csum+0x6d/0x170 + [ 84.790605] Call Trace: + [ 84.791054] csum_partial+0xd/0x20 + [ 84.791657] gre_gso_segment+0x2c3/0x390 + [ 84.792364] inet_gso_segment+0x161/0x3e0 + [ 84.793071] skb_mac_gso_segment+0xb8/0x120 + [ 84.793846] __skb_gso_segment+0x7e/0x180 + [ 84.794581] validate_xmit_skb+0x141/0x2e0 + [ 84.795297] __dev_queue_xmit+0x258/0x8f0 + [ 84.795949] ? eth_header+0x26/0xc0 + [ 84.796581] ip_finish_output2+0x196/0x430 + [ 84.797295] ? skb_gso_validate_network_len+0x11/0x80 + [ 84.798183] ? ip_finish_output+0x169/0x270 + [ 84.798875] ip_output+0x6c/0xe0 + [ 84.799413] ? ip_append_data.part.50+0xc0/0xc0 + [ 84.800145] iptunnel_xmit+0x144/0x1c0 + [ 84.800814] ip_tunnel_xmit+0x62d/0x930 [ip_tunnel] + [ 84.801699] gre_tap_xmit+0xac/0xf0 [ip_gre] + [ 84.802395] dev_hard_start_xmit+0xa5/0x210 + [ 84.803086] sch_direct_xmit+0x14f/0x340 + [ 84.803733] __dev_queue_xmit+0x799/0x8f0 + [ 84.804472] ip_finish_output2+0x2e0/0x430 + [ 84.805255] ? skb_gso_validate_network_len+0x11/0x80 + [ 84.806154] ip_output+0x6c/0xe0 + [ 84.806721] ? ip_append_data.part.50+0xc0/0xc0 + [ 84.807516] sctp_packet_transmit+0x716/0xa10 [sctp] + [ 84.808337] sctp_outq_flush+0xd7/0x880 [sctp] + +It was caused by SKB_GSO_CB(skb)->csum_start not set in sctp_gso_segment. +sctp_gso_segment() calls skb_segment() with 'feature | NETIF_F_HW_CSUM', +which causes SKB_GSO_CB(skb)->csum_start not to be set in skb_segment(). + +For TCP/UDP, when feature supports HW_CSUM, CHECKSUM_PARTIAL will be set +and gso_reset_checksum will be called to set SKB_GSO_CB(skb)->csum_start. + +So SCTP should do the same as TCP/UDP, to call gso_reset_checksum() when +computing checksum in sctp_gso_segment. + +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Acked-by: Neil Horman +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/offload.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/sctp/offload.c ++++ b/net/sctp/offload.c +@@ -36,6 +36,7 @@ static __le32 sctp_gso_make_checksum(str + { + skb->ip_summed = CHECKSUM_NONE; + skb->csum_not_inet = 0; ++ gso_reset_checksum(skb, ~0); + return sctp_compute_cksum(skb, skb_transport_offset(skb)); + } + diff --git a/queue-4.20/sctp-set-stream-ext-to-null-after-freeing-it-in-sctp_stream_outq_migrate.patch b/queue-4.20/sctp-set-stream-ext-to-null-after-freeing-it-in-sctp_stream_outq_migrate.patch new file mode 100644 index 00000000000..3b3eb0d0014 --- /dev/null +++ b/queue-4.20/sctp-set-stream-ext-to-null-after-freeing-it-in-sctp_stream_outq_migrate.patch @@ -0,0 +1,46 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Xin Long +Date: Tue, 12 Feb 2019 18:51:01 +0800 +Subject: sctp: set stream ext to NULL after freeing it in sctp_stream_outq_migrate + +From: Xin Long + +[ Upstream commit af98c5a78517c04adb5fd68bb64b1ad6fe3d473f ] + +In sctp_stream_init(), after sctp_stream_outq_migrate() freed the +surplus streams' ext, but sctp_stream_alloc_out() returns -ENOMEM, +stream->outcnt will not be set to 'outcnt'. + +With the bigger value on stream->outcnt, when closing the assoc and +freeing its streams, the ext of those surplus streams will be freed +again since those stream exts were not set to NULL after freeing in +sctp_stream_outq_migrate(). Then the invalid-free issue reported by +syzbot would be triggered. + +We fix it by simply setting them to NULL after freeing. + +Fixes: 5bbbbe32a431 ("sctp: introduce stream scheduler foundations") +Reported-by: syzbot+58e480e7b28f2d890bfd@syzkaller.appspotmail.com +Signed-off-by: Xin Long +Acked-by: Neil Horman +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/stream.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/sctp/stream.c ++++ b/net/sctp/stream.c +@@ -144,8 +144,10 @@ static void sctp_stream_outq_migrate(str + } + } + +- for (i = outcnt; i < stream->outcnt; i++) ++ for (i = outcnt; i < stream->outcnt; i++) { + kfree(SCTP_SO(stream, i)->ext); ++ SCTP_SO(stream, i)->ext = NULL; ++ } + } + + static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, diff --git a/queue-4.20/series b/queue-4.20/series index e8e163783e5..97767f9fe99 100644 --- a/queue-4.20/series +++ b/queue-4.20/series @@ -119,3 +119,26 @@ selftests-forwarding-add-a-test-case-for-externally-.patch bpf-pull-in-pkt_sched.h-header-for-tooling-to-fix-bp.patch net-mlx5e-fix-wrong-zero-tx-drop-counter-indication-.patch isdn-avm-fix-string-plus-integer-warning-from-clang.patch +batman-adv-fix-uninit-value-in-batadv_interface_tx.patch +inet_diag-fix-reporting-cgroup-classid-and-fallback-to-priority.patch +ipv6-propagate-genlmsg_reply-return-code.patch +net-ena-fix-race-between-link-up-and-device-initalization.patch +net-mlx4_en-force-checksum_none-for-short-ethernet-frames.patch +net-mlx5e-don-t-overwrite-pedit-action-when-multiple-pedit-used.patch +net-packet-fix-4gb-buffer-limit-due-to-overflow-check.patch +net-sfp-do-not-probe-sfp-module-before-we-re-attached.patch +sctp-call-gso_reset_checksum-when-computing-checksum-in-sctp_gso_segment.patch +sctp-set-stream-ext-to-null-after-freeing-it-in-sctp_stream_outq_migrate.patch +team-avoid-complex-list-operations-in-team_nl_cmd_options_set.patch +net-dsa-fix-npd-checking-for-br_vlan_enabled.patch +revert-socket-fix-struct-ifreq-size-in-compat-ioctl.patch +revert-kill-dev_ifsioc.patch +net-socket-fix-siocgifname-in-compat.patch +net-socket-make-bond-ioctls-go-through-compat_ifreq_ioctl.patch +geneve-should-not-call-rt6_lookup-when-ipv6-was-disabled.patch +sit-check-if-ipv6-enabled-before-calling-ip6_err_gen_icmpv6_unreach.patch +net_sched-fix-a-race-condition-in-tcindex_destroy.patch +net_sched-fix-a-memory-leak-in-cls_tcindex.patch +net_sched-fix-two-more-memory-leaks-in-cls_tcindex.patch +net-mlx5e-fpga-fix-innova-ipsec-tx-offload-data-path-performance.patch +net-mlx5e-xdp-fix-redirect-resources-availability-check.patch diff --git a/queue-4.20/sit-check-if-ipv6-enabled-before-calling-ip6_err_gen_icmpv6_unreach.patch b/queue-4.20/sit-check-if-ipv6-enabled-before-calling-ip6_err_gen_icmpv6_unreach.patch new file mode 100644 index 00000000000..6e8257b0ed5 --- /dev/null +++ b/queue-4.20/sit-check-if-ipv6-enabled-before-calling-ip6_err_gen_icmpv6_unreach.patch @@ -0,0 +1,46 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Hangbin Liu +Date: Thu, 7 Feb 2019 18:36:11 +0800 +Subject: sit: check if IPv6 enabled before calling ip6_err_gen_icmpv6_unreach() + +From: Hangbin Liu + +[ Upstream commit 173656accaf583698bac3f9e269884ba60d51ef4 ] + +If we disabled IPv6 from the kernel command line (ipv6.disable=1), we should +not call ip6_err_gen_icmpv6_unreach(). This: + + ip link add sit1 type sit local 192.0.2.1 remote 192.0.2.2 ttl 1 + ip link set sit1 up + ip addr add 198.51.100.1/24 dev sit1 + ping 198.51.100.2 + +if IPv6 is disabled at boot time, will crash the kernel. + +v2: there's no need to use in6_dev_get(), use __in6_dev_get() instead, + as we only need to check that idev exists and we are under + rcu_read_lock() (from netif_receive_skb_internal()). + +Reported-by: Jianlin Shi +Fixes: ca15a078bd90 ("sit: generate icmpv6 error when receiving icmpv4 error") +Cc: Oussama Ghorbel +Signed-off-by: Hangbin Liu +Reviewed-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/sit.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv6/sit.c ++++ b/net/ipv6/sit.c +@@ -546,7 +546,8 @@ static int ipip6_err(struct sk_buff *skb + } + + err = 0; +- if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) ++ if (__in6_dev_get(skb->dev) && ++ !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len)) + goto out; + + if (t->parms.iph.daddr == 0) diff --git a/queue-4.20/team-avoid-complex-list-operations-in-team_nl_cmd_options_set.patch b/queue-4.20/team-avoid-complex-list-operations-in-team_nl_cmd_options_set.patch new file mode 100644 index 00000000000..27f636a1849 --- /dev/null +++ b/queue-4.20/team-avoid-complex-list-operations-in-team_nl_cmd_options_set.patch @@ -0,0 +1,113 @@ +From foo@baz Sun Feb 24 08:38:45 CET 2019 +From: Cong Wang +Date: Mon, 11 Feb 2019 21:59:51 -0800 +Subject: team: avoid complex list operations in team_nl_cmd_options_set() + +From: Cong Wang + +[ Upstream commit 2fdeee2549231b1f989f011bb18191f5660d3745 ] + +The current opt_inst_list operations inside team_nl_cmd_options_set() +is too complex to track: + + LIST_HEAD(opt_inst_list); + nla_for_each_nested(...) { + list_for_each_entry(opt_inst, &team->option_inst_list, list) { + if (__team_option_inst_tmp_find(&opt_inst_list, opt_inst)) + continue; + list_add(&opt_inst->tmp_list, &opt_inst_list); + } + } + team_nl_send_event_options_get(team, &opt_inst_list); + +as while we retrieve 'opt_inst' from team->option_inst_list, it could +be added to the local 'opt_inst_list' for multiple times. The +__team_option_inst_tmp_find() doesn't work, as the setter +team_mode_option_set() still calls team->ops.exit() which uses +->tmp_list too in __team_options_change_check(). + +Simplify the list operations by moving the 'opt_inst_list' and +team_nl_send_event_options_get() into the nla_for_each_nested() loop so +that it can be guranteed that we won't insert a same list entry for +multiple times. Therefore, __team_option_inst_tmp_find() can be removed +too. + +Fixes: 4fb0534fb7bb ("team: avoid adding twice the same option to the event list") +Fixes: 2fcdb2c9e659 ("team: allow to send multiple set events in one message") +Reported-by: syzbot+4d4af685432dc0e56c91@syzkaller.appspotmail.com +Reported-by: syzbot+68ee510075cf64260cc4@syzkaller.appspotmail.com +Cc: Jiri Pirko +Cc: Paolo Abeni +Signed-off-by: Cong Wang +Acked-by: Jiri Pirko +Reviewed-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/team/team.c | 27 +++++---------------------- + 1 file changed, 5 insertions(+), 22 deletions(-) + +--- a/drivers/net/team/team.c ++++ b/drivers/net/team/team.c +@@ -256,17 +256,6 @@ static void __team_option_inst_mark_remo + } + } + +-static bool __team_option_inst_tmp_find(const struct list_head *opts, +- const struct team_option_inst *needle) +-{ +- struct team_option_inst *opt_inst; +- +- list_for_each_entry(opt_inst, opts, tmp_list) +- if (opt_inst == needle) +- return true; +- return false; +-} +- + static int __team_options_register(struct team *team, + const struct team_option *option, + size_t option_count) +@@ -2460,7 +2449,6 @@ static int team_nl_cmd_options_set(struc + int err = 0; + int i; + struct nlattr *nl_option; +- LIST_HEAD(opt_inst_list); + + rtnl_lock(); + +@@ -2480,6 +2468,7 @@ static int team_nl_cmd_options_set(struc + struct nlattr *opt_attrs[TEAM_ATTR_OPTION_MAX + 1]; + struct nlattr *attr; + struct nlattr *attr_data; ++ LIST_HEAD(opt_inst_list); + enum team_option_type opt_type; + int opt_port_ifindex = 0; /* != 0 for per-port options */ + u32 opt_array_index = 0; +@@ -2584,23 +2573,17 @@ static int team_nl_cmd_options_set(struc + if (err) + goto team_put; + opt_inst->changed = true; +- +- /* dumb/evil user-space can send us duplicate opt, +- * keep only the last one +- */ +- if (__team_option_inst_tmp_find(&opt_inst_list, +- opt_inst)) +- continue; +- + list_add(&opt_inst->tmp_list, &opt_inst_list); + } + if (!opt_found) { + err = -ENOENT; + goto team_put; + } +- } + +- err = team_nl_send_event_options_get(team, &opt_inst_list); ++ err = team_nl_send_event_options_get(team, &opt_inst_list); ++ if (err) ++ break; ++ } + + team_put: + team_nl_team_put(team);