From: Greg Kroah-Hartman Date: Fri, 2 Nov 2018 05:13:17 +0000 (+0100) Subject: 4.18-stable patches X-Git-Tag: v4.19.1~24 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=b81430772b1283234970052ca7b3c56aad57a403;p=thirdparty%2Fkernel%2Fstable-queue.git 4.18-stable patches added patches: bonding-fix-length-of-actor-system.patch bridge-do-not-add-port-to-router-list-when-receives-query-with-source-0.0.0.0.patch ethtool-fix-a-privilege-escalation-bug.patch ip6_tunnel-fix-encapsulation-layout.patch ipv6-mcast-fix-a-use-after-free-in-inet6_mc_check.patch ipv6-ndisc-preserve-ipv6-control-buffer-if-protocol-error-handlers-are-called.patch ipv6-rate-limit-probes-for-neighbourless-routes.patch llc-set-sock_rcu_free-in-llc_sap_add_socket.patch mlxsw-core-fix-devlink-unregister-flow.patch mlxsw-spectrum_switchdev-don-t-ignore-deletions-of-learned-macs.patch net-bcmgenet-poll-internal-phy-for-genetv5.patch net-bpfilter-use-get_pid_task-instead-of-pid_task.patch net-drop-skb-on-failure-in-ip_check_defrag.patch net-fec-don-t-dump-rx-fifo-register-when-not-available.patch net-fix-pskb_trim_rcsum_slow-with-odd-trim-offset.patch net-ipmr-fix-unresolved-entry-dumps.patch net-ipv6-allow-onlink-routes-to-have-a-device-mismatch-if-it-is-the-default-route.patch net-ipv6-fix-index-counter-for-unicast-addresses-in-in6_dump_addrs.patch net-mlx5-fix-memory-leak-when-setting-fpga-ipsec-caps.patch net-mlx5-take-only-bit-24-26-of-wqe.pftype_wq-for-page-fault-type.patch net-mlx5-wq-fixes-for-fragmented-wq-buffers-api.patch net-mlx5e-fix-csum-adjustments-caused-by-rxfcs.patch net-sched-cls_api-add-missing-validation-of-netlink-attributes.patch net-sched-fix-for-duplicate-class-dump.patch net-sched-gred-pass-the-right-attribute-to-gred_change_table_def.patch net-smc-fix-smc_buf_unuse-to-use-the-lgr-pointer.patch net-socket-fix-a-missing-check-bug.patch net-stmmac-fix-stmmac_mdio_reset-when-building-stmmac-as-modules.patch net-udp-fix-handling-of-checksum_complete-packets.patch openvswitch-fix-push-pop-ethernet-validation.patch r8169-fix-napi-handling-under-high-load.patch rtnetlink-disallow-fdb-configuration-for-non-ethernet-device.patch sctp-fix-race-on-sctp_id2asoc.patch sctp-fix-the-data-size-calculation-in-sctp_data_size.patch sctp-not-free-the-new-asoc-when-sctp_wait_for_connect-returns-err.patch tipc-fix-unsafe-rcu-locking-when-accessing-publication-list.patch udp6-fix-encap-return-code-for-resubmitting.patch vhost-fix-spectre-v1-vulnerability.patch virtio_net-avoid-using-netif_tx_disable-for-serializing-tx-routine.patch --- diff --git a/queue-4.18/bonding-fix-length-of-actor-system.patch b/queue-4.18/bonding-fix-length-of-actor-system.patch new file mode 100644 index 00000000000..23bdd515ebb --- /dev/null +++ b/queue-4.18/bonding-fix-length-of-actor-system.patch @@ -0,0 +1,34 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Tobias Jungel +Date: Sun, 28 Oct 2018 12:54:10 +0100 +Subject: bonding: fix length of actor system + +From: Tobias Jungel + +[ Upstream commit 414dd6fb9a1a1b59983aea7bf0f79f0085ecc5b8 ] + +The attribute IFLA_BOND_AD_ACTOR_SYSTEM is sent to user space having the +length of sizeof(bond->params.ad_actor_system) which is 8 byte. This +patch aligns the length to ETH_ALEN to have the same MAC address exposed +as using sysfs. + +Fixes: f87fda00b6ed2 ("bonding: prevent out of bound accesses") +Signed-off-by: Tobias Jungel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_netlink.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/drivers/net/bonding/bond_netlink.c ++++ b/drivers/net/bonding/bond_netlink.c +@@ -638,8 +638,7 @@ static int bond_fill_info(struct sk_buff + goto nla_put_failure; + + if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM, +- sizeof(bond->params.ad_actor_system), +- &bond->params.ad_actor_system)) ++ ETH_ALEN, &bond->params.ad_actor_system)) + goto nla_put_failure; + } + if (!bond_3ad_get_active_agg_info(bond, &info)) { diff --git a/queue-4.18/bridge-do-not-add-port-to-router-list-when-receives-query-with-source-0.0.0.0.patch b/queue-4.18/bridge-do-not-add-port-to-router-list-when-receives-query-with-source-0.0.0.0.patch new file mode 100644 index 00000000000..7c52a4370e2 --- /dev/null +++ b/queue-4.18/bridge-do-not-add-port-to-router-list-when-receives-query-with-source-0.0.0.0.patch @@ -0,0 +1,56 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Hangbin Liu +Date: Fri, 26 Oct 2018 10:28:43 +0800 +Subject: bridge: do not add port to router list when receives query with source 0.0.0.0 + +From: Hangbin Liu + +[ Upstream commit 5a2de63fd1a59c30c02526d427bc014b98adf508 ] + +Based on RFC 4541, 2.1.1. IGMP Forwarding Rules + + The switch supporting IGMP snooping must maintain a list of + multicast routers and the ports on which they are attached. This + list can be constructed in any combination of the following ways: + + a) This list should be built by the snooping switch sending + Multicast Router Solicitation messages as described in IGMP + Multicast Router Discovery [MRDISC]. It may also snoop + Multicast Router Advertisement messages sent by and to other + nodes. + + b) The arrival port for IGMP Queries (sent by multicast routers) + where the source address is not 0.0.0.0. + +We should not add the port to router list when receives query with source +0.0.0.0. + +Reported-by: Ying Xu +Signed-off-by: Hangbin Liu +Acked-by: Nikolay Aleksandrov +Acked-by: Roopa Prabhu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_multicast.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +--- a/net/bridge/br_multicast.c ++++ b/net/bridge/br_multicast.c +@@ -1420,7 +1420,15 @@ static void br_multicast_query_received( + return; + + br_multicast_update_query_timer(br, query, max_delay); +- br_multicast_mark_router(br, port); ++ ++ /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules, ++ * the arrival port for IGMP Queries where the source address ++ * is 0.0.0.0 should not be added to router port list. ++ */ ++ if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) || ++ (saddr->proto == htons(ETH_P_IPV6) && ++ !ipv6_addr_any(&saddr->u.ip6))) ++ br_multicast_mark_router(br, port); + } + + static int br_ip4_multicast_query(struct net_bridge *br, diff --git a/queue-4.18/ethtool-fix-a-privilege-escalation-bug.patch b/queue-4.18/ethtool-fix-a-privilege-escalation-bug.patch new file mode 100644 index 00000000000..1e962199529 --- /dev/null +++ b/queue-4.18/ethtool-fix-a-privilege-escalation-bug.patch @@ -0,0 +1,73 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Wenwen Wang +Date: Mon, 8 Oct 2018 10:49:35 -0500 +Subject: ethtool: fix a privilege escalation bug + +From: Wenwen Wang + +[ Upstream commit 58f5bbe331c566f49c9559568f982202a278aa78 ] + +In dev_ethtool(), the eth command 'ethcmd' is firstly copied from the +use-space buffer 'useraddr' and checked to see whether it is +ETHTOOL_PERQUEUE. If yes, the sub-command 'sub_cmd' is further copied from +the user space. Otherwise, 'sub_cmd' is the same as 'ethcmd'. Next, +according to 'sub_cmd', a permission check is enforced through the function +ns_capable(). For example, the permission check is required if 'sub_cmd' is +ETHTOOL_SCOALESCE, but it is not necessary if 'sub_cmd' is +ETHTOOL_GCOALESCE, as suggested in the comment "Allow some commands to be +done by anyone". The following execution invokes different handlers +according to 'ethcmd'. Specifically, if 'ethcmd' is ETHTOOL_PERQUEUE, +ethtool_set_per_queue() is called. In ethtool_set_per_queue(), the kernel +object 'per_queue_opt' is copied again from the user-space buffer +'useraddr' and 'per_queue_opt.sub_command' is used to determine which +operation should be performed. Given that the buffer 'useraddr' is in the +user space, a malicious user can race to change the sub-command between the +two copies. In particular, the attacker can supply ETHTOOL_PERQUEUE and +ETHTOOL_GCOALESCE to bypass the permission check in dev_ethtool(). Then +before ethtool_set_per_queue() is called, the attacker changes +ETHTOOL_GCOALESCE to ETHTOOL_SCOALESCE. In this way, the attacker can +bypass the permission check and execute ETHTOOL_SCOALESCE. + +This patch enforces a check in ethtool_set_per_queue() after the second +copy from 'useraddr'. If the sub-command is different from the one obtained +in the first copy in dev_ethtool(), an error code EINVAL will be returned. + +Fixes: f38d138a7da6 ("net/ethtool: support set coalesce per queue") +Signed-off-by: Wenwen Wang +Reviewed-by: Michal Kubecek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/ethtool.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/core/ethtool.c ++++ b/net/core/ethtool.c +@@ -2461,13 +2461,17 @@ roll_back: + return ret; + } + +-static int ethtool_set_per_queue(struct net_device *dev, void __user *useraddr) ++static int ethtool_set_per_queue(struct net_device *dev, ++ void __user *useraddr, u32 sub_cmd) + { + struct ethtool_per_queue_op per_queue_opt; + + if (copy_from_user(&per_queue_opt, useraddr, sizeof(per_queue_opt))) + return -EFAULT; + ++ if (per_queue_opt.sub_command != sub_cmd) ++ return -EINVAL; ++ + switch (per_queue_opt.sub_command) { + case ETHTOOL_GCOALESCE: + return ethtool_get_per_queue_coalesce(dev, useraddr, &per_queue_opt); +@@ -2838,7 +2842,7 @@ int dev_ethtool(struct net *net, struct + rc = ethtool_get_phy_stats(dev, useraddr); + break; + case ETHTOOL_PERQUEUE: +- rc = ethtool_set_per_queue(dev, useraddr); ++ rc = ethtool_set_per_queue(dev, useraddr, sub_cmd); + break; + case ETHTOOL_GLINKSETTINGS: + rc = ethtool_get_link_ksettings(dev, useraddr); diff --git a/queue-4.18/ip6_tunnel-fix-encapsulation-layout.patch b/queue-4.18/ip6_tunnel-fix-encapsulation-layout.patch new file mode 100644 index 00000000000..16817bf0b0c --- /dev/null +++ b/queue-4.18/ip6_tunnel-fix-encapsulation-layout.patch @@ -0,0 +1,69 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Stefano Brivio +Date: Thu, 18 Oct 2018 21:25:07 +0200 +Subject: ip6_tunnel: Fix encapsulation layout + +From: Stefano Brivio + +[ Upstream commit d4d576f5ab7edcb757bb33e6a5600666a0b1232d ] + +Commit 058214a4d1df ("ip6_tun: Add infrastructure for doing +encapsulation") added the ip6_tnl_encap() call in ip6_tnl_xmit(), before +the call to ipv6_push_frag_opts() to append the IPv6 Tunnel Encapsulation +Limit option (option 4, RFC 2473, par. 5.1) to the outer IPv6 header. + +As long as the option didn't actually end up in generated packets, this +wasn't an issue. Then commit 89a23c8b528b ("ip6_tunnel: Fix missing tunnel +encapsulation limit option") fixed sending of this option, and the +resulting layout, e.g. for FoU, is: + +.-------------------.------------.----------.-------------------.----- - - +| Outer IPv6 Header | UDP header | Option 4 | Inner IPv6 Header | Payload +'-------------------'------------'----------'-------------------'----- - - + +Needless to say, FoU and GUE (at least) won't work over IPv6. The option +is appended by default, and I couldn't find a way to disable it with the +current iproute2. + +Turn this into a more reasonable: + +.-------------------.----------.------------.-------------------.----- - - +| Outer IPv6 Header | Option 4 | UDP header | Inner IPv6 Header | Payload +'-------------------'----------'------------'-------------------'----- - - + +With this, and with 84dad55951b0 ("udp6: fix encap return code for +resubmitting"), FoU and GUE work again over IPv6. + +Fixes: 058214a4d1df ("ip6_tun: Add infrastructure for doing encapsulation") +Signed-off-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_tunnel.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/net/ipv6/ip6_tunnel.c ++++ b/net/ipv6/ip6_tunnel.c +@@ -1184,10 +1184,6 @@ route_lookup: + } + skb_dst_set(skb, dst); + +- if (encap_limit >= 0) { +- init_tel_txopt(&opt, encap_limit); +- ipv6_push_frag_opts(skb, &opt.ops, &proto); +- } + hop_limit = hop_limit ? : ip6_dst_hoplimit(dst); + + /* Calculate max headroom for all the headers and adjust +@@ -1202,6 +1198,11 @@ route_lookup: + if (err) + return err; + ++ if (encap_limit >= 0) { ++ init_tel_txopt(&opt, encap_limit); ++ ipv6_push_frag_opts(skb, &opt.ops, &proto); ++ } ++ + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + ipv6h = ipv6_hdr(skb); diff --git a/queue-4.18/ipv6-mcast-fix-a-use-after-free-in-inet6_mc_check.patch b/queue-4.18/ipv6-mcast-fix-a-use-after-free-in-inet6_mc_check.patch new file mode 100644 index 00000000000..5dcc560926e --- /dev/null +++ b/queue-4.18/ipv6-mcast-fix-a-use-after-free-in-inet6_mc_check.patch @@ -0,0 +1,173 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Eric Dumazet +Date: Fri, 12 Oct 2018 18:58:53 -0700 +Subject: ipv6: mcast: fix a use-after-free in inet6_mc_check + +From: Eric Dumazet + +[ Upstream commit dc012f3628eaecfb5ba68404a5c30ef501daf63d ] + +syzbot found a use-after-free in inet6_mc_check [1] + +The problem here is that inet6_mc_check() uses rcu +and read_lock(&iml->sflock) + +So the fact that ip6_mc_leave_src() is called under RTNL +and the socket lock does not help us, we need to acquire +iml->sflock in write mode. + +In the future, we should convert all this stuff to RCU. + +[1] +BUG: KASAN: use-after-free in ipv6_addr_equal include/net/ipv6.h:521 [inline] +BUG: KASAN: use-after-free in inet6_mc_check+0xae7/0xb40 net/ipv6/mcast.c:649 +Read of size 8 at addr ffff8801ce7f2510 by task syz-executor0/22432 + +CPU: 1 PID: 22432 Comm: syz-executor0 Not tainted 4.19.0-rc7+ #280 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1c4/0x2b4 lib/dump_stack.c:113 + print_address_description.cold.8+0x9/0x1ff mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:354 [inline] + kasan_report.cold.9+0x242/0x309 mm/kasan/report.c:412 + __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433 + ipv6_addr_equal include/net/ipv6.h:521 [inline] + inet6_mc_check+0xae7/0xb40 net/ipv6/mcast.c:649 + __raw_v6_lookup+0x320/0x3f0 net/ipv6/raw.c:98 + ipv6_raw_deliver net/ipv6/raw.c:183 [inline] + raw6_local_deliver+0x3d3/0xcb0 net/ipv6/raw.c:240 + ip6_input_finish+0x467/0x1aa0 net/ipv6/ip6_input.c:345 + NF_HOOK include/linux/netfilter.h:289 [inline] + ip6_input+0xe9/0x600 net/ipv6/ip6_input.c:426 + ip6_mc_input+0x48a/0xd20 net/ipv6/ip6_input.c:503 + dst_input include/net/dst.h:450 [inline] + ip6_rcv_finish+0x17a/0x330 net/ipv6/ip6_input.c:76 + NF_HOOK include/linux/netfilter.h:289 [inline] + ipv6_rcv+0x120/0x640 net/ipv6/ip6_input.c:271 + __netif_receive_skb_one_core+0x14d/0x200 net/core/dev.c:4913 + __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:5023 + netif_receive_skb_internal+0x12c/0x620 net/core/dev.c:5126 + napi_frags_finish net/core/dev.c:5664 [inline] + napi_gro_frags+0x75a/0xc90 net/core/dev.c:5737 + tun_get_user+0x3189/0x4250 drivers/net/tun.c:1923 + tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1968 + call_write_iter include/linux/fs.h:1808 [inline] + do_iter_readv_writev+0x8b0/0xa80 fs/read_write.c:680 + do_iter_write+0x185/0x5f0 fs/read_write.c:959 + vfs_writev+0x1f1/0x360 fs/read_write.c:1004 + do_writev+0x11a/0x310 fs/read_write.c:1039 + __do_sys_writev fs/read_write.c:1112 [inline] + __se_sys_writev fs/read_write.c:1109 [inline] + __x64_sys_writev+0x75/0xb0 fs/read_write.c:1109 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x457421 +Code: 75 14 b8 14 00 00 00 0f 05 48 3d 01 f0 ff ff 0f 83 34 b5 fb ff c3 48 83 ec 08 e8 1a 2d 00 00 48 89 04 24 b8 14 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 63 2d 00 00 48 89 d0 48 83 c4 08 48 3d 01 +RSP: 002b:00007f2d30ecaba0 EFLAGS: 00000293 ORIG_RAX: 0000000000000014 +RAX: ffffffffffffffda RBX: 000000000000003e RCX: 0000000000457421 +RDX: 0000000000000001 RSI: 00007f2d30ecabf0 RDI: 00000000000000f0 +RBP: 0000000020000500 R08: 00000000000000f0 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000293 R12: 00007f2d30ecb6d4 +R13: 00000000004c4890 R14: 00000000004d7b90 R15: 00000000ffffffff + +Allocated by task 22437: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 + __do_kmalloc mm/slab.c:3718 [inline] + __kmalloc+0x14e/0x760 mm/slab.c:3727 + kmalloc include/linux/slab.h:518 [inline] + sock_kmalloc+0x15a/0x1f0 net/core/sock.c:1983 + ip6_mc_source+0x14dd/0x1960 net/ipv6/mcast.c:427 + do_ipv6_setsockopt.isra.9+0x3afb/0x45d0 net/ipv6/ipv6_sockglue.c:743 + ipv6_setsockopt+0xbd/0x170 net/ipv6/ipv6_sockglue.c:933 + rawv6_setsockopt+0x59/0x140 net/ipv6/raw.c:1069 + sock_common_setsockopt+0x9a/0xe0 net/core/sock.c:3038 + __sys_setsockopt+0x1ba/0x3c0 net/socket.c:1902 + __do_sys_setsockopt net/socket.c:1913 [inline] + __se_sys_setsockopt net/socket.c:1910 [inline] + __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1910 + do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +Freed by task 22430: + save_stack+0x43/0xd0 mm/kasan/kasan.c:448 + set_track mm/kasan/kasan.c:460 [inline] + __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 + kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 + __cache_free mm/slab.c:3498 [inline] + kfree+0xcf/0x230 mm/slab.c:3813 + __sock_kfree_s net/core/sock.c:2004 [inline] + sock_kfree_s+0x29/0x60 net/core/sock.c:2010 + ip6_mc_leave_src+0x11a/0x1d0 net/ipv6/mcast.c:2448 + __ipv6_sock_mc_close+0x20b/0x4e0 net/ipv6/mcast.c:310 + ipv6_sock_mc_close+0x158/0x1d0 net/ipv6/mcast.c:328 + inet6_release+0x40/0x70 net/ipv6/af_inet6.c:452 + __sock_release+0xd7/0x250 net/socket.c:579 + sock_close+0x19/0x20 net/socket.c:1141 + __fput+0x385/0xa30 fs/file_table.c:278 + ____fput+0x15/0x20 fs/file_table.c:309 + task_work_run+0x1e8/0x2a0 kernel/task_work.c:113 + tracehook_notify_resume include/linux/tracehook.h:193 [inline] + exit_to_usermode_loop+0x318/0x380 arch/x86/entry/common.c:166 + prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] + syscall_return_slowpath arch/x86/entry/common.c:268 [inline] + do_syscall_64+0x6be/0x820 arch/x86/entry/common.c:293 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +The buggy address belongs to the object at ffff8801ce7f2500 + which belongs to the cache kmalloc-192 of size 192 +The buggy address is located 16 bytes inside of + 192-byte region [ffff8801ce7f2500, ffff8801ce7f25c0) +The buggy address belongs to the page: +page:ffffea000739fc80 count:1 mapcount:0 mapping:ffff8801da800040 index:0x0 +flags: 0x2fffc0000000100(slab) +raw: 02fffc0000000100 ffffea0006f6e548 ffffea000737b948 ffff8801da800040 +raw: 0000000000000000 ffff8801ce7f2000 0000000100000010 0000000000000000 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff8801ce7f2400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ffff8801ce7f2480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc +>ffff8801ce7f2500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb + ^ + ffff8801ce7f2580: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc + ffff8801ce7f2600: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/mcast.c | 16 ++++++++-------- + 1 file changed, 8 insertions(+), 8 deletions(-) + +--- a/net/ipv6/mcast.c ++++ b/net/ipv6/mcast.c +@@ -2436,17 +2436,17 @@ static int ip6_mc_leave_src(struct sock + { + int err; + +- /* callers have the socket lock and rtnl lock +- * so no other readers or writers of iml or its sflist +- */ ++ write_lock_bh(&iml->sflock); + if (!iml->sflist) { + /* any-source empty exclude case */ +- return ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); ++ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, 0, NULL, 0); ++ } else { ++ err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, ++ iml->sflist->sl_count, iml->sflist->sl_addr, 0); ++ sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); ++ iml->sflist = NULL; + } +- err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode, +- iml->sflist->sl_count, iml->sflist->sl_addr, 0); +- sock_kfree_s(sk, iml->sflist, IP6_SFLSIZE(iml->sflist->sl_max)); +- iml->sflist = NULL; ++ write_unlock_bh(&iml->sflock); + return err; + } + diff --git a/queue-4.18/ipv6-ndisc-preserve-ipv6-control-buffer-if-protocol-error-handlers-are-called.patch b/queue-4.18/ipv6-ndisc-preserve-ipv6-control-buffer-if-protocol-error-handlers-are-called.patch new file mode 100644 index 00000000000..906927dee61 --- /dev/null +++ b/queue-4.18/ipv6-ndisc-preserve-ipv6-control-buffer-if-protocol-error-handlers-are-called.patch @@ -0,0 +1,53 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Stefano Brivio +Date: Wed, 24 Oct 2018 14:37:21 +0200 +Subject: ipv6/ndisc: Preserve IPv6 control buffer if protocol error handlers are called + +From: Stefano Brivio + +[ Upstream commit ee1abcf689353f36d9322231b4320926096bdee0 ] + +Commit a61bbcf28a8c ("[NET]: Store skb->timestamp as offset to a base +timestamp") introduces a neighbour control buffer and zeroes it out in +ndisc_rcv(), as ndisc_recv_ns() uses it. + +Commit f2776ff04722 ("[IPV6]: Fix address/interface handling in UDP and +DCCP, according to the scoping architecture.") introduces the usage of the +IPv6 control buffer in protocol error handlers (e.g. inet6_iif() in +present-day __udp6_lib_err()). + +Now, with commit b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate +redirect, instead of rt6_redirect()."), we call protocol error handlers +from ndisc_redirect_rcv(), after the control buffer is already stolen and +some parts are already zeroed out. This implies that inet6_iif() on this +path will always return zero. + +This gives unexpected results on UDP socket lookup in __udp6_lib_err(), as +we might actually need to match sockets for a given interface. + +Instead of always claiming the control buffer in ndisc_rcv(), do that only +when needed. + +Fixes: b94f1c0904da ("ipv6: Use icmpv6_notify() to propagate redirect, instead of rt6_redirect().") +Signed-off-by: Stefano Brivio +Reviewed-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ndisc.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/ipv6/ndisc.c ++++ b/net/ipv6/ndisc.c +@@ -1732,10 +1732,9 @@ int ndisc_rcv(struct sk_buff *skb) + return 0; + } + +- memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); +- + switch (msg->icmph.icmp6_type) { + case NDISC_NEIGHBOUR_SOLICITATION: ++ memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); + ndisc_recv_ns(skb); + break; + diff --git a/queue-4.18/ipv6-rate-limit-probes-for-neighbourless-routes.patch b/queue-4.18/ipv6-rate-limit-probes-for-neighbourless-routes.patch new file mode 100644 index 00000000000..2e6ad46763e --- /dev/null +++ b/queue-4.18/ipv6-rate-limit-probes-for-neighbourless-routes.patch @@ -0,0 +1,91 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Sabrina Dubroca +Date: Fri, 12 Oct 2018 16:22:47 +0200 +Subject: ipv6: rate-limit probes for neighbourless routes + +From: Sabrina Dubroca + +[ Upstream commit f547fac624be53ad8b07e9ebca7654a7827ba61b ] + +When commit 270972554c91 ("[IPV6]: ROUTE: Add Router Reachability +Probing (RFC4191).") introduced router probing, the rt6_probe() function +required that a neighbour entry existed. This neighbour entry is used to +record the timestamp of the last probe via the ->updated field. + +Later, commit 2152caea7196 ("ipv6: Do not depend on rt->n in rt6_probe().") +removed the requirement for a neighbour entry. Neighbourless routes skip +the interval check and are not rate-limited. + +This patch adds rate-limiting for neighbourless routes, by recording the +timestamp of the last probe in the fib6_info itself. + +Fixes: 2152caea7196 ("ipv6: Do not depend on rt->n in rt6_probe().") +Signed-off-by: Sabrina Dubroca +Reviewed-by: Stefano Brivio +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ip6_fib.h | 4 ++++ + net/ipv6/route.c | 12 ++++++------ + 2 files changed, 10 insertions(+), 6 deletions(-) + +--- a/include/net/ip6_fib.h ++++ b/include/net/ip6_fib.h +@@ -159,6 +159,10 @@ struct fib6_info { + struct rt6_info * __percpu *rt6i_pcpu; + struct rt6_exception_bucket __rcu *rt6i_exception_bucket; + ++#ifdef CONFIG_IPV6_ROUTER_PREF ++ unsigned long last_probe; ++#endif ++ + u32 fib6_metric; + u8 fib6_protocol; + u8 fib6_type; +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -517,10 +517,11 @@ static void rt6_probe_deferred(struct wo + + static void rt6_probe(struct fib6_info *rt) + { +- struct __rt6_probe_work *work; ++ struct __rt6_probe_work *work = NULL; + const struct in6_addr *nh_gw; + struct neighbour *neigh; + struct net_device *dev; ++ struct inet6_dev *idev; + + /* + * Okay, this does not seem to be appropriate +@@ -536,15 +537,12 @@ static void rt6_probe(struct fib6_info * + nh_gw = &rt->fib6_nh.nh_gw; + dev = rt->fib6_nh.nh_dev; + rcu_read_lock_bh(); ++ idev = __in6_dev_get(dev); + neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); + if (neigh) { +- struct inet6_dev *idev; +- + if (neigh->nud_state & NUD_VALID) + goto out; + +- idev = __in6_dev_get(dev); +- work = NULL; + write_lock(&neigh->lock); + if (!(neigh->nud_state & NUD_VALID) && + time_after(jiffies, +@@ -554,11 +552,13 @@ static void rt6_probe(struct fib6_info * + __neigh_set_probe_once(neigh); + } + write_unlock(&neigh->lock); +- } else { ++ } else if (time_after(jiffies, rt->last_probe + ++ idev->cnf.rtr_probe_interval)) { + work = kmalloc(sizeof(*work), GFP_ATOMIC); + } + + if (work) { ++ rt->last_probe = jiffies; + INIT_WORK(&work->work, rt6_probe_deferred); + work->target = *nh_gw; + dev_hold(dev); diff --git a/queue-4.18/llc-set-sock_rcu_free-in-llc_sap_add_socket.patch b/queue-4.18/llc-set-sock_rcu_free-in-llc_sap_add_socket.patch new file mode 100644 index 00000000000..22c17281c06 --- /dev/null +++ b/queue-4.18/llc-set-sock_rcu_free-in-llc_sap_add_socket.patch @@ -0,0 +1,37 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Cong Wang +Date: Thu, 11 Oct 2018 11:15:13 -0700 +Subject: llc: set SOCK_RCU_FREE in llc_sap_add_socket() + +From: Cong Wang + +[ Upstream commit 5a8e7aea953bdb6d4da13aff6f1e7f9c62023499 ] + +WHen an llc sock is added into the sk_laddr_hash of an llc_sap, +it is not marked with SOCK_RCU_FREE. + +This causes that the sock could be freed while it is still being +read by __llc_lookup_established() with RCU read lock. sock is +refcounted, but with RCU read lock, nothing prevents the readers +getting a zero refcnt. + +Fix it by setting SOCK_RCU_FREE in llc_sap_add_socket(). + +Reported-by: syzbot+11e05f04c15e03be5254@syzkaller.appspotmail.com +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/llc/llc_conn.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/llc/llc_conn.c ++++ b/net/llc/llc_conn.c +@@ -734,6 +734,7 @@ void llc_sap_add_socket(struct llc_sap * + llc_sk(sk)->sap = sap; + + spin_lock_bh(&sap->sk_lock); ++ sock_set_flag(sk, SOCK_RCU_FREE); + sap->sk_count++; + sk_nulls_add_node_rcu(sk, laddr_hb); + hlist_add_head(&llc->dev_hash_node, dev_hb); diff --git a/queue-4.18/mlxsw-core-fix-devlink-unregister-flow.patch b/queue-4.18/mlxsw-core-fix-devlink-unregister-flow.patch new file mode 100644 index 00000000000..140b51ead01 --- /dev/null +++ b/queue-4.18/mlxsw-core-fix-devlink-unregister-flow.patch @@ -0,0 +1,75 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Shalom Toledo +Date: Mon, 29 Oct 2018 14:26:16 +0000 +Subject: mlxsw: core: Fix devlink unregister flow + +From: Shalom Toledo + +[ Upstream commit a22712a962912faf257e857ab6857f56a93cfb34 ] + +After a failed reload, the driver is still registered to devlink, its +devlink instance is still allocated and the 'reload_fail' flag is set. +Then, in the next reload try, the driver's allocated devlink instance will +be freed without unregistering from devlink and its components (e.g, +resources). This scenario can cause a use-after-free if the user tries to +execute command via devlink user-space tool. + +Fix by not freeing the devlink instance during reload (failed or not). + +Fixes: 24cc68ad6c46 ("mlxsw: core: Add support for reload") +Signed-off-by: Shalom Toledo +Reviewed-by: Jiri Pirko +Signed-off-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/core.c | 24 +++++++++++++++++------- + 1 file changed, 17 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/core.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/core.c +@@ -985,8 +985,8 @@ static int mlxsw_devlink_core_bus_device + mlxsw_core->bus, + mlxsw_core->bus_priv, true, + devlink); +- if (err) +- mlxsw_core->reload_fail = true; ++ mlxsw_core->reload_fail = !!err; ++ + return err; + } + +@@ -1126,8 +1126,15 @@ void mlxsw_core_bus_device_unregister(st + const char *device_kind = mlxsw_core->bus_info->device_kind; + struct devlink *devlink = priv_to_devlink(mlxsw_core); + +- if (mlxsw_core->reload_fail) +- goto reload_fail; ++ if (mlxsw_core->reload_fail) { ++ if (!reload) ++ /* Only the parts that were not de-initialized in the ++ * failed reload attempt need to be de-initialized. ++ */ ++ goto reload_fail_deinit; ++ else ++ return; ++ } + + if (mlxsw_core->driver->fini) + mlxsw_core->driver->fini(mlxsw_core); +@@ -1140,9 +1147,12 @@ void mlxsw_core_bus_device_unregister(st + if (!reload) + devlink_resources_unregister(devlink, NULL); + mlxsw_core->bus->fini(mlxsw_core->bus_priv); +- if (reload) +- return; +-reload_fail: ++ ++ return; ++ ++reload_fail_deinit: ++ devlink_unregister(devlink); ++ devlink_resources_unregister(devlink, NULL); + devlink_free(devlink); + mlxsw_core_driver_put(device_kind); + } diff --git a/queue-4.18/mlxsw-spectrum_switchdev-don-t-ignore-deletions-of-learned-macs.patch b/queue-4.18/mlxsw-spectrum_switchdev-don-t-ignore-deletions-of-learned-macs.patch new file mode 100644 index 00000000000..7af3929bd97 --- /dev/null +++ b/queue-4.18/mlxsw-spectrum_switchdev-don-t-ignore-deletions-of-learned-macs.patch @@ -0,0 +1,34 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Petr Machata +Date: Mon, 29 Oct 2018 14:26:14 +0000 +Subject: mlxsw: spectrum_switchdev: Don't ignore deletions of learned MACs + +From: Petr Machata + +[ Upstream commit ad0b9d94182be8356978d220c82f9837cffeb7a9 ] + +Demands to remove FDB entries should be honored even if the FDB entry in +question was originally learned, and not added by the user. Therefore +ignore the added_by_user datum for SWITCHDEV_FDB_DEL_TO_DEVICE. + +Fixes: 816a3bed9549 ("switchdev: Add fdb.added_by_user to switchdev notifications") +Signed-off-by: Petr Machata +Suggested-by: Ido Schimmel +Signed-off-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +@@ -2307,8 +2307,6 @@ static void mlxsw_sp_switchdev_event_wor + break; + case SWITCHDEV_FDB_DEL_TO_DEVICE: + fdb_info = &switchdev_work->fdb_info; +- if (!fdb_info->added_by_user) +- break; + mlxsw_sp_port_fdb_set(mlxsw_sp_port, fdb_info, false); + break; + case SWITCHDEV_FDB_ADD_TO_BRIDGE: /* fall through */ diff --git a/queue-4.18/net-bcmgenet-poll-internal-phy-for-genetv5.patch b/queue-4.18/net-bcmgenet-poll-internal-phy-for-genetv5.patch new file mode 100644 index 00000000000..179dd658860 --- /dev/null +++ b/queue-4.18/net-bcmgenet-poll-internal-phy-for-genetv5.patch @@ -0,0 +1,39 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Florian Fainelli +Date: Thu, 11 Oct 2018 15:06:33 -0700 +Subject: net: bcmgenet: Poll internal PHY for GENETv5 + +From: Florian Fainelli + +[ Upstream commit 64bd9c8135751b561f27edaaffe93d07093f81af ] + +On GENETv5, there is a hardware issue which prevents the GENET hardware +from generating a link UP interrupt when the link is operating at +10Mbits/sec. Since we do not have any way to configure the link +detection logic, fallback to polling in that case. + +Fixes: 421380856d9c ("net: bcmgenet: add support for the GENETv5 hardware") +Signed-off-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/genet/bcmmii.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c ++++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c +@@ -321,9 +321,12 @@ int bcmgenet_mii_probe(struct net_device + phydev->advertising = phydev->supported; + + /* The internal PHY has its link interrupts routed to the +- * Ethernet MAC ISRs ++ * Ethernet MAC ISRs. On GENETv5 there is a hardware issue ++ * that prevents the signaling of link UP interrupts when ++ * the link operates at 10Mbps, so fallback to polling for ++ * those versions of GENET. + */ +- if (priv->internal_phy) ++ if (priv->internal_phy && !GENET_IS_V5(priv)) + dev->phydev->irq = PHY_IGNORE_INTERRUPT; + + return 0; diff --git a/queue-4.18/net-bpfilter-use-get_pid_task-instead-of-pid_task.patch b/queue-4.18/net-bpfilter-use-get_pid_task-instead-of-pid_task.patch new file mode 100644 index 00000000000..878506773ea --- /dev/null +++ b/queue-4.18/net-bpfilter-use-get_pid_task-instead-of-pid_task.patch @@ -0,0 +1,73 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Taehee Yoo +Date: Wed, 17 Oct 2018 00:35:10 +0900 +Subject: net: bpfilter: use get_pid_task instead of pid_task + +From: Taehee Yoo + +[ Upstream commit 84258438e8ce12d6888b68a1238bba9cb25307e2 ] + +pid_task() dereferences rcu protected tasks array. +But there is no rcu_read_lock() in shutdown_umh() routine so that +rcu_read_lock() is needed. +get_pid_task() is wrapper function of pid_task. it holds rcu_read_lock() +then calls pid_task(). if task isn't NULL, it increases reference count +of task. + +test commands: + %modprobe bpfilter + %modprobe -rv bpfilter + +splat looks like: +[15102.030932] ============================= +[15102.030957] WARNING: suspicious RCU usage +[15102.030985] 4.19.0-rc7+ #21 Not tainted +[15102.031010] ----------------------------- +[15102.031038] kernel/pid.c:330 suspicious rcu_dereference_check() usage! +[15102.031063] + other info that might help us debug this: + +[15102.031332] + rcu_scheduler_active = 2, debug_locks = 1 +[15102.031363] 1 lock held by modprobe/1570: +[15102.031389] #0: 00000000580ef2b0 (bpfilter_lock){+.+.}, at: stop_umh+0x13/0x52 [bpfilter] +[15102.031552] + stack backtrace: +[15102.031583] CPU: 1 PID: 1570 Comm: modprobe Not tainted 4.19.0-rc7+ #21 +[15102.031607] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015 +[15102.031628] Call Trace: +[15102.031676] dump_stack+0xc9/0x16b +[15102.031723] ? show_regs_print_info+0x5/0x5 +[15102.031801] ? lockdep_rcu_suspicious+0x117/0x160 +[15102.031855] pid_task+0x134/0x160 +[15102.031900] ? find_vpid+0xf0/0xf0 +[15102.032017] shutdown_umh.constprop.1+0x1e/0x53 [bpfilter] +[15102.032055] stop_umh+0x46/0x52 [bpfilter] +[15102.032092] __x64_sys_delete_module+0x47e/0x570 +[ ... ] + +Fixes: d2ba09c17a06 ("net: add skeleton of bpfilter kernel module") +Signed-off-by: Taehee Yoo +Acked-by: Alexei Starovoitov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bpfilter/bpfilter_kern.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/bpfilter/bpfilter_kern.c ++++ b/net/bpfilter/bpfilter_kern.c +@@ -23,9 +23,11 @@ static void shutdown_umh(struct umh_info + + if (!info->pid) + return; +- tsk = pid_task(find_vpid(info->pid), PIDTYPE_PID); +- if (tsk) ++ tsk = get_pid_task(find_vpid(info->pid), PIDTYPE_PID); ++ if (tsk) { + force_sig(SIGKILL, tsk); ++ put_task_struct(tsk); ++ } + fput(info->pipe_to_umh); + fput(info->pipe_from_umh); + info->pid = 0; diff --git a/queue-4.18/net-drop-skb-on-failure-in-ip_check_defrag.patch b/queue-4.18/net-drop-skb-on-failure-in-ip_check_defrag.patch new file mode 100644 index 00000000000..ab4d0f7a83f --- /dev/null +++ b/queue-4.18/net-drop-skb-on-failure-in-ip_check_defrag.patch @@ -0,0 +1,55 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Cong Wang +Date: Thu, 1 Nov 2018 12:02:37 -0700 +Subject: net: drop skb on failure in ip_check_defrag() + +From: Cong Wang + +[ Upstream commit 7de414a9dd91426318df7b63da024b2b07e53df5 ] + +Most callers of pskb_trim_rcsum() simply drop the skb when +it fails, however, ip_check_defrag() still continues to pass +the skb up to stack. This is suspicious. + +In ip_check_defrag(), after we learn the skb is an IP fragment, +passing the skb to callers makes no sense, because callers expect +fragments are defrag'ed on success. So, dropping the skb when we +can't defrag it is reasonable. + +Note, prior to commit 88078d98d1bb, this is not a big problem as +checksum will be fixed up anyway. After it, the checksum is not +correct on failure. + +Found this during code review. + +Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") +Cc: Eric Dumazet +Signed-off-by: Cong Wang +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ip_fragment.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/net/ipv4/ip_fragment.c ++++ b/net/ipv4/ip_fragment.c +@@ -657,10 +657,14 @@ struct sk_buff *ip_check_defrag(struct n + if (ip_is_fragment(&iph)) { + skb = skb_share_check(skb, GFP_ATOMIC); + if (skb) { +- if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) +- return skb; +- if (pskb_trim_rcsum(skb, netoff + len)) +- return skb; ++ if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) { ++ kfree_skb(skb); ++ return NULL; ++ } ++ if (pskb_trim_rcsum(skb, netoff + len)) { ++ kfree_skb(skb); ++ return NULL; ++ } + memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); + if (ip_defrag(net, skb, user)) + return NULL; diff --git a/queue-4.18/net-fec-don-t-dump-rx-fifo-register-when-not-available.patch b/queue-4.18/net-fec-don-t-dump-rx-fifo-register-when-not-available.patch new file mode 100644 index 00000000000..2d753bdb873 --- /dev/null +++ b/queue-4.18/net-fec-don-t-dump-rx-fifo-register-when-not-available.patch @@ -0,0 +1,78 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Fugang Duan +Date: Mon, 15 Oct 2018 05:19:00 +0000 +Subject: net: fec: don't dump RX FIFO register when not available + +From: Fugang Duan + +[ Upstream commit ec20a63aa8b8ec3223fb25cdb2a49f9f9dfda88c ] + +Commit db65f35f50e0 ("net: fec: add support of ethtool get_regs") introduce +ethool "--register-dump" interface to dump all FEC registers. + +But not all silicon implementations of the Freescale FEC hardware module +have the FRBR (FIFO Receive Bound Register) and FRSR (FIFO Receive Start +Register) register, so we should not be trying to dump them on those that +don't. + +To fix it we create a quirk flag, FEC_QUIRK_HAS_RFREG, and check it before +dump those RX FIFO registers. + +Signed-off-by: Fugang Duan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/freescale/fec.h | 4 ++++ + drivers/net/ethernet/freescale/fec_main.c | 16 ++++++++++++---- + 2 files changed, 16 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/freescale/fec.h ++++ b/drivers/net/ethernet/freescale/fec.h +@@ -452,6 +452,10 @@ struct bufdesc_ex { + * initialisation. + */ + #define FEC_QUIRK_MIB_CLEAR (1 << 15) ++/* Only i.MX25/i.MX27/i.MX28 controller supports FRBR,FRSR registers, ++ * those FIFO receive registers are resolved in other platforms. ++ */ ++#define FEC_QUIRK_HAS_FRREG (1 << 16) + + struct bufdesc_prop { + int qid; +--- a/drivers/net/ethernet/freescale/fec_main.c ++++ b/drivers/net/ethernet/freescale/fec_main.c +@@ -90,14 +90,16 @@ static struct platform_device_id fec_dev + .driver_data = 0, + }, { + .name = "imx25-fec", +- .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR, ++ .driver_data = FEC_QUIRK_USE_GASKET | FEC_QUIRK_MIB_CLEAR | ++ FEC_QUIRK_HAS_FRREG, + }, { + .name = "imx27-fec", +- .driver_data = FEC_QUIRK_MIB_CLEAR, ++ .driver_data = FEC_QUIRK_MIB_CLEAR | FEC_QUIRK_HAS_FRREG, + }, { + .name = "imx28-fec", + .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_SWAP_FRAME | +- FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC, ++ FEC_QUIRK_SINGLE_MDIO | FEC_QUIRK_HAS_RACC | ++ FEC_QUIRK_HAS_FRREG, + }, { + .name = "imx6q-fec", + .driver_data = FEC_QUIRK_ENET_MAC | FEC_QUIRK_HAS_GBIT | +@@ -2163,7 +2165,13 @@ static void fec_enet_get_regs(struct net + memset(buf, 0, regs->len); + + for (i = 0; i < ARRAY_SIZE(fec_enet_register_offset); i++) { +- off = fec_enet_register_offset[i] / 4; ++ off = fec_enet_register_offset[i]; ++ ++ if ((off == FEC_R_BOUND || off == FEC_R_FSTART) && ++ !(fep->quirks & FEC_QUIRK_HAS_FRREG)) ++ continue; ++ ++ off >>= 2; + buf[off] = readl(&theregs[off]); + } + } diff --git a/queue-4.18/net-fix-pskb_trim_rcsum_slow-with-odd-trim-offset.patch b/queue-4.18/net-fix-pskb_trim_rcsum_slow-with-odd-trim-offset.patch new file mode 100644 index 00000000000..27f8b76cc01 --- /dev/null +++ b/queue-4.18/net-fix-pskb_trim_rcsum_slow-with-odd-trim-offset.patch @@ -0,0 +1,47 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Dimitris Michailidis +Date: Fri, 19 Oct 2018 17:07:13 -0700 +Subject: net: fix pskb_trim_rcsum_slow() with odd trim offset + +From: Dimitris Michailidis + +[ Upstream commit d55bef5059dd057bd077155375c581b49d25be7e ] + +We've been getting checksum errors involving small UDP packets, usually +59B packets with 1 extra non-zero padding byte. netdev_rx_csum_fault() +has been complaining that HW is providing bad checksums. Turns out the +problem is in pskb_trim_rcsum_slow(), introduced in commit 88078d98d1bb +("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends"). + +The source of the problem is that when the bytes we are trimming start +at an odd address, as in the case of the 1 padding byte above, +skb_checksum() returns a byte-swapped value. We cannot just combine this +with skb->csum using csum_sub(). We need to use csum_block_sub() here +that takes into account the parity of the start address and handles the +swapping. + +Matches existing code in __skb_postpull_rcsum() and esp_remove_trailer(). + +Fixes: 88078d98d1bb ("net: pskb_trim_rcsum() and CHECKSUM_COMPLETE are friends") +Signed-off-by: Dimitris Michailidis +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/skbuff.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/net/core/skbuff.c ++++ b/net/core/skbuff.c +@@ -1845,8 +1845,9 @@ int pskb_trim_rcsum_slow(struct sk_buff + if (skb->ip_summed == CHECKSUM_COMPLETE) { + int delta = skb->len - len; + +- skb->csum = csum_sub(skb->csum, +- skb_checksum(skb, len, delta, 0)); ++ skb->csum = csum_block_sub(skb->csum, ++ skb_checksum(skb, len, delta, 0), ++ len); + } + return __pskb_trim(skb, len); + } diff --git a/queue-4.18/net-ipmr-fix-unresolved-entry-dumps.patch b/queue-4.18/net-ipmr-fix-unresolved-entry-dumps.patch new file mode 100644 index 00000000000..3a9655f6124 --- /dev/null +++ b/queue-4.18/net-ipmr-fix-unresolved-entry-dumps.patch @@ -0,0 +1,35 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Nikolay Aleksandrov +Date: Wed, 17 Oct 2018 22:34:34 +0300 +Subject: net: ipmr: fix unresolved entry dumps + +From: Nikolay Aleksandrov + +[ Upstream commit eddf016b910486d2123675a6b5fd7d64f77cdca8 ] + +If the skb space ends in an unresolved entry while dumping we'll miss +some unresolved entries. The reason is due to zeroing the entry counter +between dumping resolved and unresolved mfc entries. We should just +keep counting until the whole table is dumped and zero when we move to +the next as we have a separate table counter. + +Reported-by: Colin Ian King +Fixes: 8fb472c09b9d ("ipmr: improve hash scalability") +Signed-off-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ipmr_base.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/net/ipv4/ipmr_base.c ++++ b/net/ipv4/ipmr_base.c +@@ -295,8 +295,6 @@ int mr_rtm_dumproute(struct sk_buff *skb + next_entry: + e++; + } +- e = 0; +- s_e = 0; + + spin_lock_bh(lock); + list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { diff --git a/queue-4.18/net-ipv6-allow-onlink-routes-to-have-a-device-mismatch-if-it-is-the-default-route.patch b/queue-4.18/net-ipv6-allow-onlink-routes-to-have-a-device-mismatch-if-it-is-the-default-route.patch new file mode 100644 index 00000000000..ae8d2fefed4 --- /dev/null +++ b/queue-4.18/net-ipv6-allow-onlink-routes-to-have-a-device-mismatch-if-it-is-the-default-route.patch @@ -0,0 +1,81 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: David Ahern +Date: Wed, 24 Oct 2018 13:58:39 -0700 +Subject: net/ipv6: Allow onlink routes to have a device mismatch if it is the default route + +From: David Ahern + +[ Upstream commit 4ed591c8ab44e711e56b8e021ffaf4f407c045f5 ] + +The intent of ip6_route_check_nh_onlink is to make sure the gateway +given for an onlink route is not actually on a connected route for +a different interface (e.g., 2001:db8:1::/64 is on dev eth1 and then +an onlink route has a via 2001:db8:1::1 dev eth2). If the gateway +lookup hits the default route then it most likely will be a different +interface than the onlink route which is ok. + +Update ip6_route_check_nh_onlink to disregard the device mismatch +if the gateway lookup hits the default route. Turns out the existing +onlink tests are passing because there is no default route or it is +an unreachable default, so update the onlink tests to have a default +route other than unreachable. + +Fixes: fc1e64e1092f6 ("net/ipv6: Add support for onlink flag") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 2 ++ + tools/testing/selftests/net/fib-onlink-tests.sh | 14 +++++++------- + 2 files changed, 9 insertions(+), 7 deletions(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -2792,6 +2792,8 @@ static int ip6_route_check_nh_onlink(str + grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0); + if (grt) { + if (!grt->dst.error && ++ /* ignore match if it is the default route */ ++ grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) && + (grt->rt6i_flags & flags || dev != grt->dst.dev)) { + NL_SET_ERR_MSG(extack, + "Nexthop has invalid gateway or device mismatch"); +--- a/tools/testing/selftests/net/fib-onlink-tests.sh ++++ b/tools/testing/selftests/net/fib-onlink-tests.sh +@@ -167,8 +167,8 @@ setup() + # add vrf table + ip li add ${VRF} type vrf table ${VRF_TABLE} + ip li set ${VRF} up +- ip ro add table ${VRF_TABLE} unreachable default +- ip -6 ro add table ${VRF_TABLE} unreachable default ++ ip ro add table ${VRF_TABLE} unreachable default metric 8192 ++ ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192 + + # create test interfaces + ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]} +@@ -185,20 +185,20 @@ setup() + for n in 1 3 5 7; do + ip li set ${NETIFS[p${n}]} up + ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} +- ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} ++ ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad + done + + # move peer interfaces to namespace and add addresses + for n in 2 4 6 8; do + ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up + ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]} +- ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} ++ ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad + done + +- set +e ++ ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64} ++ ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64} + +- # let DAD complete - assume default of 1 probe +- sleep 1 ++ set +e + } + + cleanup() diff --git a/queue-4.18/net-ipv6-fix-index-counter-for-unicast-addresses-in-in6_dump_addrs.patch b/queue-4.18/net-ipv6-fix-index-counter-for-unicast-addresses-in-in6_dump_addrs.patch new file mode 100644 index 00000000000..75003c05e76 --- /dev/null +++ b/queue-4.18/net-ipv6-fix-index-counter-for-unicast-addresses-in-in6_dump_addrs.patch @@ -0,0 +1,50 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: David Ahern +Date: Fri, 19 Oct 2018 10:00:19 -0700 +Subject: net/ipv6: Fix index counter for unicast addresses in in6_dump_addrs + +From: David Ahern + +[ Upstream commit 4ba4c566ba8448a05e6257e0b98a21f1a0d55315 ] + +The loop wants to skip previously dumped addresses, so loops until +current index >= saved index. If the message fills it wants to save +the index for the next address to dump - ie., the one that did not +fit in the current message. + +Currently, it is incrementing the index counter before comparing to the +saved index, and then the saved index is off by 1 - it assumes the +current address is going to fit in the message. + +Change the index handling to increment only after a succesful dump. + +Fixes: 502a2ffd7376a ("ipv6: convert idev_list to list macros") +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -4930,8 +4930,8 @@ static int in6_dump_addrs(struct inet6_d + + /* unicast address incl. temp addr */ + list_for_each_entry(ifa, &idev->addr_list, if_list) { +- if (++ip_idx < s_ip_idx) +- continue; ++ if (ip_idx < s_ip_idx) ++ goto next; + err = inet6_fill_ifaddr(skb, ifa, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, +@@ -4940,6 +4940,8 @@ static int in6_dump_addrs(struct inet6_d + if (err < 0) + break; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); ++next: ++ ip_idx++; + } + break; + } diff --git a/queue-4.18/net-mlx5-fix-memory-leak-when-setting-fpga-ipsec-caps.patch b/queue-4.18/net-mlx5-fix-memory-leak-when-setting-fpga-ipsec-caps.patch new file mode 100644 index 00000000000..cc05aa0198b --- /dev/null +++ b/queue-4.18/net-mlx5-fix-memory-leak-when-setting-fpga-ipsec-caps.patch @@ -0,0 +1,54 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Talat Batheesh +Date: Thu, 30 Aug 2018 16:31:52 +0300 +Subject: net/mlx5: Fix memory leak when setting fpga ipsec caps + +From: Talat Batheesh + +[ Upstream commit fd7e848077c1a466b9187537adce16658f7cb94b ] + +Allocated memory for context should be freed once +finished working with it. + +Fixes: d6c4f0298cec ("net/mlx5: Refactor accel IPSec code") +Signed-off-by: Talat Batheesh +Reviewed-by: Or Gerlitz +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c | 9 ++++----- + 1 file changed, 4 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +@@ -245,7 +245,7 @@ static void *mlx5_fpga_ipsec_cmd_exec(st + return ERR_PTR(res); + } + +- /* Context will be freed by wait func after completion */ ++ /* Context should be freed by the caller after completion. */ + return context; + } + +@@ -418,10 +418,8 @@ static int mlx5_fpga_ipsec_set_caps(stru + cmd.cmd = htonl(MLX5_FPGA_IPSEC_CMD_OP_SET_CAP); + cmd.flags = htonl(flags); + context = mlx5_fpga_ipsec_cmd_exec(mdev, &cmd, sizeof(cmd)); +- if (IS_ERR(context)) { +- err = PTR_ERR(context); +- goto out; +- } ++ if (IS_ERR(context)) ++ return PTR_ERR(context); + + err = mlx5_fpga_ipsec_cmd_wait(context); + if (err) +@@ -435,6 +433,7 @@ static int mlx5_fpga_ipsec_set_caps(stru + } + + out: ++ kfree(context); + return err; + } + diff --git a/queue-4.18/net-mlx5-take-only-bit-24-26-of-wqe.pftype_wq-for-page-fault-type.patch b/queue-4.18/net-mlx5-take-only-bit-24-26-of-wqe.pftype_wq-for-page-fault-type.patch new file mode 100644 index 00000000000..c670f2f899c --- /dev/null +++ b/queue-4.18/net-mlx5-take-only-bit-24-26-of-wqe.pftype_wq-for-page-fault-type.patch @@ -0,0 +1,32 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Huy Nguyen +Date: Tue, 11 Sep 2018 14:58:22 -0500 +Subject: net/mlx5: Take only bit 24-26 of wqe.pftype_wq for page fault type + +From: Huy Nguyen + +[ Upstream commit a48bc513159d4767f9988f0d857b2b0c38a4d614 ] + +The HW spec defines only bits 24-26 of pftype_wq as the page fault type, +use the required mask to ensure that. + +Fixes: d9aaed838765 ("{net,IB}/mlx5: Refactor page fault handling") +Signed-off-by: Huy Nguyen +Signed-off-by: Eli Cohen +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/eq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c +@@ -269,7 +269,7 @@ static void eq_pf_process(struct mlx5_eq + case MLX5_PFAULT_SUBTYPE_WQE: + /* WQE based event */ + pfault->type = +- be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24; ++ (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; + pfault->token = + be32_to_cpu(pf_eqe->wqe.token); + pfault->wqe.wq_num = diff --git a/queue-4.18/net-mlx5-wq-fixes-for-fragmented-wq-buffers-api.patch b/queue-4.18/net-mlx5-wq-fixes-for-fragmented-wq-buffers-api.patch new file mode 100644 index 00000000000..df02e5d7095 --- /dev/null +++ b/queue-4.18/net-mlx5-wq-fixes-for-fragmented-wq-buffers-api.patch @@ -0,0 +1,234 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Tariq Toukan +Date: Tue, 21 Aug 2018 14:41:41 +0300 +Subject: net/mlx5: WQ, fixes for fragmented WQ buffers API + +From: Tariq Toukan + +[ Upstream commit 37fdffb217a45609edccbb8b407d031143f551c0 ] + +mlx5e netdevice used to calculate fragment edges by a call to +mlx5_wq_cyc_get_frag_size(). This calculation did not give the correct +indication for queues smaller than a PAGE_SIZE, (broken by default on +PowerPC, where PAGE_SIZE == 64KB). Here it is replaced by the correct new +calls/API. + +Since (TX/RX) Work Queues buffers are fragmented, here we introduce +changes to the API in core driver, so that it gets a stride index and +returns the index of last stride on same fragment, and an additional +wrapping function that returns the number of physically contiguous +strides that can be written contiguously to the work queue. + +This obsoletes the following API functions, and their buggy +usage in EN driver: +* mlx5_wq_cyc_get_frag_size() +* mlx5_wq_cyc_ctr2fragix() + +The new API improves modularity and hides the details of such +calculation for mlx5e netdevice and mlx5_ib rdma drivers. + +New calculation is also more efficient, and improves performance +as follows: + +Packet rate test: pktgen, UDP / IPv4, 64byte, single ring, 8K ring size. + +Before: 16,477,619 pps +After: 17,085,793 pps + +3.7% improvement + +Fixes: 3a2f70331226 ("net/mlx5: Use order-0 allocations for all WQ types") +Signed-off-by: Tariq Toukan +Reviewed-by: Eran Ben Elisha +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 12 ++++----- + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 22 +++++++++--------- + drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h | 5 +--- + drivers/net/ethernet/mellanox/mlx5/core/wq.c | 5 ---- + drivers/net/ethernet/mellanox/mlx5/core/wq.h | 11 ++++----- + include/linux/mlx5/driver.h | 8 ++++++ + 6 files changed, 31 insertions(+), 32 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -429,10 +429,9 @@ static inline u16 mlx5e_icosq_wrap_cnt(s + + static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq, + struct mlx5_wq_cyc *wq, +- u16 pi, u16 frag_pi) ++ u16 pi, u16 nnops) + { + struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi]; +- u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi; + + edge_wi = wi + nnops; + +@@ -451,15 +450,14 @@ static int mlx5e_alloc_rx_mpwqe(struct m + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_umr_wqe *umr_wqe; + u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1); +- u16 pi, frag_pi; ++ u16 pi, contig_wqebbs_room; + int err; + int i; + + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); +- frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc); +- +- if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) { +- mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi); ++ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); ++ if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) { ++ mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room); + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + } + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +@@ -287,10 +287,9 @@ dma_unmap_wqe_err: + + static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, + struct mlx5_wq_cyc *wq, +- u16 pi, u16 frag_pi) ++ u16 pi, u16 nnops) + { + struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; +- u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi; + + edge_wi = wi + nnops; + +@@ -345,8 +344,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_t + struct mlx5e_tx_wqe_info *wi; + + struct mlx5e_sq_stats *stats = sq->stats; ++ u16 headlen, ihs, contig_wqebbs_room; + u16 ds_cnt, ds_cnt_inl = 0; +- u16 headlen, ihs, frag_pi; + u8 num_wqebbs, opcode; + u32 num_bytes; + int num_dma; +@@ -383,9 +382,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_t + } + + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); +- frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc); +- if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) { +- mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi); ++ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); ++ if (unlikely(contig_wqebbs_room < num_wqebbs)) { ++ mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); + mlx5e_sq_fetch_wqe(sq, &wqe, &pi); + } + +@@ -629,7 +628,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_t + struct mlx5e_tx_wqe_info *wi; + + struct mlx5e_sq_stats *stats = sq->stats; +- u16 headlen, ihs, pi, frag_pi; ++ u16 headlen, ihs, pi, contig_wqebbs_room; + u16 ds_cnt, ds_cnt_inl = 0; + u8 num_wqebbs, opcode; + u32 num_bytes; +@@ -665,13 +664,14 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_t + } + + num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); +- frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc); +- if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) { ++ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); ++ contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); ++ if (unlikely(contig_wqebbs_room < num_wqebbs)) { ++ mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); + pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); +- mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi); + } + +- mlx5i_sq_fetch_wqe(sq, &wqe, &pi); ++ mlx5i_sq_fetch_wqe(sq, &wqe, pi); + + /* fill wqe */ + wi = &sq->db.wqe_info[pi]; +--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h +@@ -109,12 +109,11 @@ struct mlx5i_tx_wqe { + + static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq, + struct mlx5i_tx_wqe **wqe, +- u16 *pi) ++ u16 pi) + { + struct mlx5_wq_cyc *wq = &sq->wq; + +- *pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); +- *wqe = mlx5_wq_cyc_get_wqe(wq, *pi); ++ *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + memset(*wqe, 0, sizeof(**wqe)); + } + +--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c +@@ -39,11 +39,6 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_ + return (u32)wq->fbc.sz_m1 + 1; + } + +-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq) +-{ +- return wq->fbc.frag_sz_m1 + 1; +-} +- + u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq) + { + return wq->fbc.sz_m1 + 1; +--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h ++++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h +@@ -80,7 +80,6 @@ int mlx5_wq_cyc_create(struct mlx5_core_ + void *wqc, struct mlx5_wq_cyc *wq, + struct mlx5_wq_ctrl *wq_ctrl); + u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq); +-u16 mlx5_wq_cyc_get_frag_size(struct mlx5_wq_cyc *wq); + + int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, + void *qpc, struct mlx5_wq_qp *wq, +@@ -140,11 +139,6 @@ static inline u16 mlx5_wq_cyc_ctr2ix(str + return ctr & wq->fbc.sz_m1; + } + +-static inline u16 mlx5_wq_cyc_ctr2fragix(struct mlx5_wq_cyc *wq, u16 ctr) +-{ +- return ctr & wq->fbc.frag_sz_m1; +-} +- + static inline u16 mlx5_wq_cyc_get_head(struct mlx5_wq_cyc *wq) + { + return mlx5_wq_cyc_ctr2ix(wq, wq->wqe_ctr); +@@ -160,6 +154,11 @@ static inline void *mlx5_wq_cyc_get_wqe( + return mlx5_frag_buf_get_wqe(&wq->fbc, ix); + } + ++static inline u16 mlx5_wq_cyc_get_contig_wqebbs(struct mlx5_wq_cyc *wq, u16 ix) ++{ ++ return mlx5_frag_buf_get_idx_last_contig_stride(&wq->fbc, ix) - ix + 1; ++} ++ + static inline int mlx5_wq_cyc_cc_bigger(u16 cc1, u16 cc2) + { + int equal = (cc1 == cc2); +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -1022,6 +1022,14 @@ static inline void *mlx5_frag_buf_get_wq + ((fbc->frag_sz_m1 & ix) << fbc->log_stride); + } + ++static inline u32 ++mlx5_frag_buf_get_idx_last_contig_stride(struct mlx5_frag_buf_ctrl *fbc, u32 ix) ++{ ++ u32 last_frag_stride_idx = (ix + fbc->strides_offset) | fbc->frag_sz_m1; ++ ++ return min_t(u32, last_frag_stride_idx - fbc->strides_offset, fbc->sz_m1); ++} ++ + int mlx5_cmd_init(struct mlx5_core_dev *dev); + void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); + void mlx5_cmd_use_events(struct mlx5_core_dev *dev); diff --git a/queue-4.18/net-mlx5e-fix-csum-adjustments-caused-by-rxfcs.patch b/queue-4.18/net-mlx5e-fix-csum-adjustments-caused-by-rxfcs.patch new file mode 100644 index 00000000000..eb6bee0bf36 --- /dev/null +++ b/queue-4.18/net-mlx5e-fix-csum-adjustments-caused-by-rxfcs.patch @@ -0,0 +1,108 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Eric Dumazet +Date: Tue, 30 Oct 2018 00:57:25 -0700 +Subject: net/mlx5e: fix csum adjustments caused by RXFCS + +From: Eric Dumazet + +[ Upstream commit d48051c5b8376038c2b287c3b1bd55b8d391d567 ] + +As shown by Dmitris, we need to use csum_block_add() instead of csum_add() +when adding the FCS contribution to skb csum. + +Before 4.18 (more exactly commit 88078d98d1bb "net: pskb_trim_rcsum() +and CHECKSUM_COMPLETE are friends"), the whole skb csum was thrown away, +so RXFCS changes were ignored. + +Then before commit d55bef5059dd ("net: fix pskb_trim_rcsum_slow() with +odd trim offset") both mlx5 and pskb_trim_rcsum_slow() bugs were canceling +each other. + +Now we fixed pskb_trim_rcsum_slow() we need to fix mlx5. + +Note that this patch also rewrites mlx5e_get_fcs() to : + +- Use skb_header_pointer() instead of reinventing it. +- Use __get_unaligned_cpu32() to avoid possible non aligned accesses + as Dmitris pointed out. + +Fixes: 902a545904c7 ("net/mlx5e: When RXFCS is set, add FCS data into checksum calculation") +Reported-by: Paweł Staszewski +Signed-off-by: Eric Dumazet +Cc: Eran Ben Elisha +Cc: Saeed Mahameed +Cc: Dimitris Michailidis +Cc: Cong Wang +Cc: Paweł Staszewski +Reviewed-by: Eran Ben Elisha +Tested-By: Maria Pasechnik +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 45 ++++-------------------- + 1 file changed, 9 insertions(+), 36 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +@@ -693,43 +693,15 @@ static inline bool is_last_ethertype_ip( + return (ethertype == htons(ETH_P_IP) || ethertype == htons(ETH_P_IPV6)); + } + +-static __be32 mlx5e_get_fcs(struct sk_buff *skb) ++static u32 mlx5e_get_fcs(const struct sk_buff *skb) + { +- int last_frag_sz, bytes_in_prev, nr_frags; +- u8 *fcs_p1, *fcs_p2; +- skb_frag_t *last_frag; +- __be32 fcs_bytes; +- +- if (!skb_is_nonlinear(skb)) +- return *(__be32 *)(skb->data + skb->len - ETH_FCS_LEN); +- +- nr_frags = skb_shinfo(skb)->nr_frags; +- last_frag = &skb_shinfo(skb)->frags[nr_frags - 1]; +- last_frag_sz = skb_frag_size(last_frag); +- +- /* If all FCS data is in last frag */ +- if (last_frag_sz >= ETH_FCS_LEN) +- return *(__be32 *)(skb_frag_address(last_frag) + +- last_frag_sz - ETH_FCS_LEN); +- +- fcs_p2 = (u8 *)skb_frag_address(last_frag); +- bytes_in_prev = ETH_FCS_LEN - last_frag_sz; +- +- /* Find where the other part of the FCS is - Linear or another frag */ +- if (nr_frags == 1) { +- fcs_p1 = skb_tail_pointer(skb); +- } else { +- skb_frag_t *prev_frag = &skb_shinfo(skb)->frags[nr_frags - 2]; ++ const void *fcs_bytes; ++ u32 _fcs_bytes; + +- fcs_p1 = skb_frag_address(prev_frag) + +- skb_frag_size(prev_frag); +- } +- fcs_p1 -= bytes_in_prev; +- +- memcpy(&fcs_bytes, fcs_p1, bytes_in_prev); +- memcpy(((u8 *)&fcs_bytes) + bytes_in_prev, fcs_p2, last_frag_sz); ++ fcs_bytes = skb_header_pointer(skb, skb->len - ETH_FCS_LEN, ++ ETH_FCS_LEN, &_fcs_bytes); + +- return fcs_bytes; ++ return __get_unaligned_cpu32(fcs_bytes); + } + + static inline void mlx5e_handle_csum(struct net_device *netdev, +@@ -762,8 +734,9 @@ static inline void mlx5e_handle_csum(str + network_depth - ETH_HLEN, + skb->csum); + if (unlikely(netdev->features & NETIF_F_RXFCS)) +- skb->csum = csum_add(skb->csum, +- (__force __wsum)mlx5e_get_fcs(skb)); ++ skb->csum = csum_block_add(skb->csum, ++ (__force __wsum)mlx5e_get_fcs(skb), ++ skb->len - ETH_FCS_LEN); + stats->csum_complete++; + return; + } diff --git a/queue-4.18/net-sched-cls_api-add-missing-validation-of-netlink-attributes.patch b/queue-4.18/net-sched-cls_api-add-missing-validation-of-netlink-attributes.patch new file mode 100644 index 00000000000..f26d71945de --- /dev/null +++ b/queue-4.18/net-sched-cls_api-add-missing-validation-of-netlink-attributes.patch @@ -0,0 +1,79 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Davide Caratti +Date: Wed, 10 Oct 2018 22:00:58 +0200 +Subject: net/sched: cls_api: add missing validation of netlink attributes + +From: Davide Caratti + +[ Upstream commit e331473fee3d500bb0d2582a1fe598df3326d8cd ] + +Similarly to what has been done in 8b4c3cdd9dd8 ("net: sched: Add policy +validation for tc attributes"), fix classifier code to add validation of +TCA_CHAIN and TCA_KIND netlink attributes. + +tested with: + # ./tdc.py -c filter + +v2: Let sch_api and cls_api share nla_policy they have in common, thanks + to David Ahern. +v3: Avoid EXPORT_SYMBOL(), as validation of those attributes is not done + by TC modules, thanks to Cong Wang. + While at it, restore the 'Delete / get qdisc' comment to its orginal + position, just above tc_get_qdisc() function prototype. + +Fixes: 5bc1701881e39 ("net: sched: introduce multichain support for filters") +Signed-off-by: Davide Caratti +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_api.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -31,6 +31,8 @@ + #include + #include + ++extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; ++ + /* The list of all installed classifier types */ + static LIST_HEAD(tcf_proto_base); + +@@ -1083,7 +1085,7 @@ static int tc_new_tfilter(struct sk_buff + replay: + tp_created = 0; + +- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); ++ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + if (err < 0) + return err; + +@@ -1226,7 +1228,7 @@ static int tc_del_tfilter(struct sk_buff + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) + return -EPERM; + +- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); ++ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + if (err < 0) + return err; + +@@ -1334,7 +1336,7 @@ static int tc_get_tfilter(struct sk_buff + void *fh = NULL; + int err; + +- err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack); ++ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); + if (err < 0) + return err; + +@@ -1488,7 +1490,8 @@ static int tc_dump_tfilter(struct sk_buf + if (nlmsg_len(cb->nlh) < sizeof(*tcm)) + return skb->len; + +- err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); ++ err = nlmsg_parse(cb->nlh, sizeof(*tcm), tca, TCA_MAX, rtm_tca_policy, ++ NULL); + if (err) + return err; + diff --git a/queue-4.18/net-sched-fix-for-duplicate-class-dump.patch b/queue-4.18/net-sched-fix-for-duplicate-class-dump.patch new file mode 100644 index 00000000000..d0642f84842 --- /dev/null +++ b/queue-4.18/net-sched-fix-for-duplicate-class-dump.patch @@ -0,0 +1,50 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Phil Sutter +Date: Thu, 18 Oct 2018 10:34:26 +0200 +Subject: net: sched: Fix for duplicate class dump + +From: Phil Sutter + +[ Upstream commit 3c53ed8fef6881a864f0ee8240ed2793ef73ad0d ] + +When dumping classes by parent, kernel would return classes twice: + +| # tc qdisc add dev lo root prio +| # tc class show dev lo +| class prio 8001:1 parent 8001: +| class prio 8001:2 parent 8001: +| class prio 8001:3 parent 8001: +| # tc class show dev lo parent 8001: +| class prio 8001:1 parent 8001: +| class prio 8001:2 parent 8001: +| class prio 8001:3 parent 8001: +| class prio 8001:1 parent 8001: +| class prio 8001:2 parent 8001: +| class prio 8001:3 parent 8001: + +This comes from qdisc_match_from_root() potentially returning the root +qdisc itself if its handle matched. Though in that case, root's classes +were already dumped a few lines above. + +Fixes: cb395b2010879 ("net: sched: optimize class dumps") +Signed-off-by: Phil Sutter +Reviewed-by: Jiri Pirko +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_api.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -2052,7 +2052,8 @@ static int tc_dump_tclass_root(struct Qd + + if (tcm->tcm_parent) { + q = qdisc_match_from_root(root, TC_H_MAJ(tcm->tcm_parent)); +- if (q && tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) ++ if (q && q != root && ++ tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) + return -1; + return 0; + } diff --git a/queue-4.18/net-sched-gred-pass-the-right-attribute-to-gred_change_table_def.patch b/queue-4.18/net-sched-gred-pass-the-right-attribute-to-gred_change_table_def.patch new file mode 100644 index 00000000000..7019926efdf --- /dev/null +++ b/queue-4.18/net-sched-gred-pass-the-right-attribute-to-gred_change_table_def.patch @@ -0,0 +1,54 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Jakub Kicinski +Date: Fri, 26 Oct 2018 15:51:06 -0700 +Subject: net: sched: gred: pass the right attribute to gred_change_table_def() + +From: Jakub Kicinski + +[ Upstream commit 38b4f18d56372e1e21771ab7b0357b853330186c ] + +gred_change_table_def() takes a pointer to TCA_GRED_DPS attribute, +and expects it will be able to interpret its contents as +struct tc_gred_sopt. Pass the correct gred attribute, instead of +TCA_OPTIONS. + +This bug meant the table definition could never be changed after +Qdisc was initialized (unless whatever TCA_OPTIONS contained both +passed netlink validation and was a valid struct tc_gred_sopt...). + +Old behaviour: +$ ip link add type dummy +$ tc qdisc replace dev dummy0 parent root handle 7: \ + gred setup vqs 4 default 0 +$ tc qdisc replace dev dummy0 parent root handle 7: \ + gred setup vqs 4 default 0 +RTNETLINK answers: Invalid argument + +Now: +$ ip link add type dummy +$ tc qdisc replace dev dummy0 parent root handle 7: \ + gred setup vqs 4 default 0 +$ tc qdisc replace dev dummy0 parent root handle 7: \ + gred setup vqs 4 default 0 +$ tc qdisc replace dev dummy0 parent root handle 7: \ + gred setup vqs 4 default 0 + +Fixes: f62d6b936df5 ("[PKT_SCHED]: GRED: Use central VQ change procedure") +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_gred.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sched/sch_gred.c ++++ b/net/sched/sch_gred.c +@@ -413,7 +413,7 @@ static int gred_change(struct Qdisc *sch + if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) { + if (tb[TCA_GRED_LIMIT] != NULL) + sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]); +- return gred_change_table_def(sch, opt); ++ return gred_change_table_def(sch, tb[TCA_GRED_DPS]); + } + + if (tb[TCA_GRED_PARMS] == NULL || diff --git a/queue-4.18/net-smc-fix-smc_buf_unuse-to-use-the-lgr-pointer.patch b/queue-4.18/net-smc-fix-smc_buf_unuse-to-use-the-lgr-pointer.patch new file mode 100644 index 00000000000..98b0e5d03e4 --- /dev/null +++ b/queue-4.18/net-smc-fix-smc_buf_unuse-to-use-the-lgr-pointer.patch @@ -0,0 +1,89 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Karsten Graul +Date: Thu, 25 Oct 2018 13:25:28 +0200 +Subject: net/smc: fix smc_buf_unuse to use the lgr pointer + +From: Karsten Graul + +[ Upstream commit fb692ec4117f6fd25044cfb5720d6b79d400dc65 ] + +The pointer to the link group is unset in the smc connection structure +right before the call to smc_buf_unuse. Provide the lgr pointer to +smc_buf_unuse explicitly. +And move the call to smc_lgr_schedule_free_work to the end of +smc_conn_free. + +Fixes: a6920d1d130c ("net/smc: handle unregistered buffers") +Signed-off-by: Karsten Graul +Signed-off-by: Ursula Braun +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/smc/smc_core.c | 23 +++++++++++------------ + 1 file changed, 11 insertions(+), 12 deletions(-) + +--- a/net/smc/smc_core.c ++++ b/net/smc/smc_core.c +@@ -114,22 +114,17 @@ static void __smc_lgr_unregister_conn(st + sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ + } + +-/* Unregister connection and trigger lgr freeing if applicable ++/* Unregister connection from lgr + */ + static void smc_lgr_unregister_conn(struct smc_connection *conn) + { + struct smc_link_group *lgr = conn->lgr; +- int reduced = 0; + + write_lock_bh(&lgr->conns_lock); + if (conn->alert_token_local) { +- reduced = 1; + __smc_lgr_unregister_conn(conn); + } + write_unlock_bh(&lgr->conns_lock); +- if (!reduced || lgr->conns_num) +- return; +- smc_lgr_schedule_free_work(lgr); + } + + static void smc_lgr_free_work(struct work_struct *work) +@@ -238,7 +233,8 @@ out: + return rc; + } + +-static void smc_buf_unuse(struct smc_connection *conn) ++static void smc_buf_unuse(struct smc_connection *conn, ++ struct smc_link_group *lgr) + { + if (conn->sndbuf_desc) + conn->sndbuf_desc->used = 0; +@@ -248,8 +244,6 @@ static void smc_buf_unuse(struct smc_con + conn->rmb_desc->used = 0; + } else { + /* buf registration failed, reuse not possible */ +- struct smc_link_group *lgr = conn->lgr; +- + write_lock_bh(&lgr->rmbs_lock); + list_del(&conn->rmb_desc->list); + write_unlock_bh(&lgr->rmbs_lock); +@@ -262,11 +256,16 @@ static void smc_buf_unuse(struct smc_con + /* remove a finished connection from its link group */ + void smc_conn_free(struct smc_connection *conn) + { +- if (!conn->lgr) ++ struct smc_link_group *lgr = conn->lgr; ++ ++ if (!lgr) + return; + smc_cdc_tx_dismiss_slots(conn); +- smc_lgr_unregister_conn(conn); +- smc_buf_unuse(conn); ++ smc_lgr_unregister_conn(conn); /* unsets conn->lgr */ ++ smc_buf_unuse(conn, lgr); /* allow buffer reuse */ ++ ++ if (!lgr->conns_num) ++ smc_lgr_schedule_free_work(lgr); + } + + static void smc_link_clear(struct smc_link *lnk) diff --git a/queue-4.18/net-socket-fix-a-missing-check-bug.patch b/queue-4.18/net-socket-fix-a-missing-check-bug.patch new file mode 100644 index 00000000000..b1a12bb9284 --- /dev/null +++ b/queue-4.18/net-socket-fix-a-missing-check-bug.patch @@ -0,0 +1,56 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Wenwen Wang +Date: Thu, 18 Oct 2018 09:36:46 -0500 +Subject: net: socket: fix a missing-check bug + +From: Wenwen Wang + +[ Upstream commit b6168562c8ce2bd5a30e213021650422e08764dc ] + +In ethtool_ioctl(), the ioctl command 'ethcmd' is checked through a switch +statement to see whether it is necessary to pre-process the ethtool +structure, because, as mentioned in the comment, the structure +ethtool_rxnfc is defined with padding. If yes, a user-space buffer 'rxnfc' +is allocated through compat_alloc_user_space(). One thing to note here is +that, if 'ethcmd' is ETHTOOL_GRXCLSRLALL, the size of the buffer 'rxnfc' is +partially determined by 'rule_cnt', which is actually acquired from the +user-space buffer 'compat_rxnfc', i.e., 'compat_rxnfc->rule_cnt', through +get_user(). After 'rxnfc' is allocated, the data in the original user-space +buffer 'compat_rxnfc' is then copied to 'rxnfc' through copy_in_user(), +including the 'rule_cnt' field. However, after this copy, no check is +re-enforced on 'rxnfc->rule_cnt'. So it is possible that a malicious user +race to change the value in the 'compat_rxnfc->rule_cnt' between these two +copies. Through this way, the attacker can bypass the previous check on +'rule_cnt' and inject malicious data. This can cause undefined behavior of +the kernel and introduce potential security risk. + +This patch avoids the above issue via copying the value acquired by +get_user() to 'rxnfc->rule_cn', if 'ethcmd' is ETHTOOL_GRXCLSRLALL. + +Signed-off-by: Wenwen Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/socket.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/net/socket.c ++++ b/net/socket.c +@@ -2887,9 +2887,14 @@ static int ethtool_ioctl(struct net *net + copy_in_user(&rxnfc->fs.ring_cookie, + &compat_rxnfc->fs.ring_cookie, + (void __user *)(&rxnfc->fs.location + 1) - +- (void __user *)&rxnfc->fs.ring_cookie) || +- copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, +- sizeof(rxnfc->rule_cnt))) ++ (void __user *)&rxnfc->fs.ring_cookie)) ++ return -EFAULT; ++ if (ethcmd == ETHTOOL_GRXCLSRLALL) { ++ if (put_user(rule_cnt, &rxnfc->rule_cnt)) ++ return -EFAULT; ++ } else if (copy_in_user(&rxnfc->rule_cnt, ++ &compat_rxnfc->rule_cnt, ++ sizeof(rxnfc->rule_cnt))) + return -EFAULT; + } + diff --git a/queue-4.18/net-stmmac-fix-stmmac_mdio_reset-when-building-stmmac-as-modules.patch b/queue-4.18/net-stmmac-fix-stmmac_mdio_reset-when-building-stmmac-as-modules.patch new file mode 100644 index 00000000000..1647c4bc7c4 --- /dev/null +++ b/queue-4.18/net-stmmac-fix-stmmac_mdio_reset-when-building-stmmac-as-modules.patch @@ -0,0 +1,43 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Niklas Cassel +Date: Wed, 31 Oct 2018 16:08:10 +0100 +Subject: net: stmmac: Fix stmmac_mdio_reset() when building stmmac as modules + +From: Niklas Cassel + +[ Upstream commit 30549aab146ccb1275230c3b4b4bc6b4181fd54e ] + +When building stmmac, it is only possible to select CONFIG_DWMAC_GENERIC, +or any of the glue drivers, when CONFIG_STMMAC_PLATFORM is set. +The only exception is CONFIG_STMMAC_PCI. + +When calling of_mdiobus_register(), it will call our ->reset() +callback, which is set to stmmac_mdio_reset(). + +Most of the code in stmmac_mdio_reset() is protected by a +"#if defined(CONFIG_STMMAC_PLATFORM)", which will evaluate +to false when CONFIG_STMMAC_PLATFORM=m. + +Because of this, the phy reset gpio will only be pulled when +stmmac is built as built-in, but not when built as modules. + +Fix this by using "#if IS_ENABLED()" instead of "#if defined()". + +Signed-off-by: Niklas Cassel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +@@ -133,7 +133,7 @@ static int stmmac_mdio_write(struct mii_ + */ + int stmmac_mdio_reset(struct mii_bus *bus) + { +-#if defined(CONFIG_STMMAC_PLATFORM) ++#if IS_ENABLED(CONFIG_STMMAC_PLATFORM) + struct net_device *ndev = bus->priv; + struct stmmac_priv *priv = netdev_priv(ndev); + unsigned int mii_address = priv->hw->mii.addr; diff --git a/queue-4.18/net-udp-fix-handling-of-checksum_complete-packets.patch b/queue-4.18/net-udp-fix-handling-of-checksum_complete-packets.patch new file mode 100644 index 00000000000..174efd517de --- /dev/null +++ b/queue-4.18/net-udp-fix-handling-of-checksum_complete-packets.patch @@ -0,0 +1,146 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Sean Tranchetti +Date: Tue, 23 Oct 2018 16:04:31 -0600 +Subject: net: udp: fix handling of CHECKSUM_COMPLETE packets + +From: Sean Tranchetti + +[ Upstream commit db4f1be3ca9b0ef7330763d07bf4ace83ad6f913 ] + +Current handling of CHECKSUM_COMPLETE packets by the UDP stack is +incorrect for any packet that has an incorrect checksum value. + +udp4/6_csum_init() will both make a call to +__skb_checksum_validate_complete() to initialize/validate the csum +field when receiving a CHECKSUM_COMPLETE packet. When this packet +fails validation, skb->csum will be overwritten with the pseudoheader +checksum so the packet can be fully validated by software, but the +skb->ip_summed value will be left as CHECKSUM_COMPLETE so that way +the stack can later warn the user about their hardware spewing bad +checksums. Unfortunately, leaving the SKB in this state can cause +problems later on in the checksum calculation. + +Since the the packet is still marked as CHECKSUM_COMPLETE, +udp_csum_pull_header() will SUBTRACT the checksum of the UDP header +from skb->csum instead of adding it, leaving us with a garbage value +in that field. Once we try to copy the packet to userspace in the +udp4/6_recvmsg(), we'll make a call to skb_copy_and_csum_datagram_msg() +to checksum the packet data and add it in the garbage skb->csum value +to perform our final validation check. + +Since the value we're validating is not the proper checksum, it's possible +that the folded value could come out to 0, causing us not to drop the +packet. Instead, we believe that the packet was checksummed incorrectly +by hardware since skb->ip_summed is still CHECKSUM_COMPLETE, and we attempt +to warn the user with netdev_rx_csum_fault(skb->dev); + +Unfortunately, since this is the UDP path, skb->dev has been overwritten +by skb->dev_scratch and is no longer a valid pointer, so we end up +reading invalid memory. + +This patch addresses this problem in two ways: + 1) Do not use the dev pointer when calling netdev_rx_csum_fault() + from skb_copy_and_csum_datagram_msg(). Since this gets called + from the UDP path where skb->dev has been overwritten, we have + no way of knowing if the pointer is still valid. Also for the + sake of consistency with the other uses of + netdev_rx_csum_fault(), don't attempt to call it if the + packet was checksummed by software. + + 2) Add better CHECKSUM_COMPLETE handling to udp4/6_csum_init(). + If we receive a packet that's CHECKSUM_COMPLETE that fails + verification (i.e. skb->csum_valid == 0), check who performed + the calculation. It's possible that the checksum was done in + software by the network stack earlier (such as Netfilter's + CONNTRACK module), and if that says the checksum is bad, + we can drop the packet immediately instead of waiting until + we try and copy it to userspace. Otherwise, we need to + mark the SKB as CHECKSUM_NONE, since the skb->csum field + no longer contains the full packet checksum after the + call to __skb_checksum_validate_complete(). + +Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") +Fixes: c84d949057ca ("udp: copy skb->truesize in the first cache line") +Cc: Sam Kumar +Cc: Eric Dumazet +Signed-off-by: Sean Tranchetti +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/datagram.c | 5 +++-- + net/ipv4/udp.c | 20 ++++++++++++++++++-- + net/ipv6/ip6_checksum.c | 20 ++++++++++++++++++-- + 3 files changed, 39 insertions(+), 6 deletions(-) + +--- a/net/core/datagram.c ++++ b/net/core/datagram.c +@@ -808,8 +808,9 @@ int skb_copy_and_csum_datagram_msg(struc + return -EINVAL; + } + +- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) +- netdev_rx_csum_fault(skb->dev); ++ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) && ++ !skb->csum_complete_sw) ++ netdev_rx_csum_fault(NULL); + } + return 0; + fault: +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -2124,8 +2124,24 @@ static inline int udp4_csum_init(struct + /* Note, we are only interested in != 0 or == 0, thus the + * force to int. + */ +- return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, +- inet_compute_pseudo); ++ err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, ++ inet_compute_pseudo); ++ if (err) ++ return err; ++ ++ if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { ++ /* If SW calculated the value, we know it's bad */ ++ if (skb->csum_complete_sw) ++ return 1; ++ ++ /* HW says the value is bad. Let's validate that. ++ * skb->csum is no longer the full packet checksum, ++ * so don't treat it as such. ++ */ ++ skb_checksum_complete_unset(skb); ++ } ++ ++ return 0; + } + + /* wrapper for udp_queue_rcv_skb tacking care of csum conversion and +--- a/net/ipv6/ip6_checksum.c ++++ b/net/ipv6/ip6_checksum.c +@@ -88,8 +88,24 @@ int udp6_csum_init(struct sk_buff *skb, + * Note, we are only interested in != 0 or == 0, thus the + * force to int. + */ +- return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, +- ip6_compute_pseudo); ++ err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, ++ ip6_compute_pseudo); ++ if (err) ++ return err; ++ ++ if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { ++ /* If SW calculated the value, we know it's bad */ ++ if (skb->csum_complete_sw) ++ return 1; ++ ++ /* HW says the value is bad. Let's validate that. ++ * skb->csum is no longer the full packet checksum, ++ * so don't treat is as such. ++ */ ++ skb_checksum_complete_unset(skb); ++ } ++ ++ return 0; + } + EXPORT_SYMBOL(udp6_csum_init); + diff --git a/queue-4.18/openvswitch-fix-push-pop-ethernet-validation.patch b/queue-4.18/openvswitch-fix-push-pop-ethernet-validation.patch new file mode 100644 index 00000000000..1cedc8fa5e3 --- /dev/null +++ b/queue-4.18/openvswitch-fix-push-pop-ethernet-validation.patch @@ -0,0 +1,46 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: "Jaime Caamaño Ruiz" +Date: Wed, 31 Oct 2018 18:52:03 +0100 +Subject: openvswitch: Fix push/pop ethernet validation + +From: "Jaime Caamaño Ruiz" + +[ Upstream commit 46ebe2834ba5b541f28ee72e556a3fed42c47570 ] + +When there are both pop and push ethernet header actions among the +actions to be applied to a packet, an unexpected EINVAL (Invalid +argument) error is obtained. This is due to mac_proto not being reset +correctly when those actions are validated. + +Reported-at: +https://mail.openvswitch.org/pipermail/ovs-discuss/2018-October/047554.html +Fixes: 91820da6ae85 ("openvswitch: add Ethernet push and pop actions") +Signed-off-by: Jaime Caamaño Ruiz +Tested-by: Greg Rose +Reviewed-by: Greg Rose +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/flow_netlink.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/openvswitch/flow_netlink.c ++++ b/net/openvswitch/flow_netlink.c +@@ -2990,7 +2990,7 @@ static int __ovs_nla_copy_actions(struct + * is already present */ + if (mac_proto != MAC_PROTO_NONE) + return -EINVAL; +- mac_proto = MAC_PROTO_NONE; ++ mac_proto = MAC_PROTO_ETHERNET; + break; + + case OVS_ACTION_ATTR_POP_ETH: +@@ -2998,7 +2998,7 @@ static int __ovs_nla_copy_actions(struct + return -EINVAL; + if (vlan_tci & htons(VLAN_TAG_PRESENT)) + return -EINVAL; +- mac_proto = MAC_PROTO_ETHERNET; ++ mac_proto = MAC_PROTO_NONE; + break; + + case OVS_ACTION_ATTR_PUSH_NSH: diff --git a/queue-4.18/perf-tools-disable-parallelism-for-make-clean.patch b/queue-4.18/perf-tools-disable-parallelism-for-make-clean.patch index 256b6e8236f..30ea58f6a9d 100644 --- a/queue-4.18/perf-tools-disable-parallelism-for-make-clean.patch +++ b/queue-4.18/perf-tools-disable-parallelism-for-make-clean.patch @@ -31,11 +31,9 @@ Link: http://lkml.kernel.org/r/20180705131527.19749-1-linux@rasmusvillemoes.dk Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- - tools/perf/Makefile | 4 ++-- + tools/perf/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -diff --git a/tools/perf/Makefile b/tools/perf/Makefile -index 225454416ed5..7902a5681fc8 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -84,10 +84,10 @@ endif # has_clean @@ -51,6 +49,3 @@ index 225454416ed5..7902a5681fc8 100644 # # The build-test target is not really parallel, don't print the jobs info, --- -2.17.1 - diff --git a/queue-4.18/r8169-fix-napi-handling-under-high-load.patch b/queue-4.18/r8169-fix-napi-handling-under-high-load.patch new file mode 100644 index 00000000000..83516d15f6c --- /dev/null +++ b/queue-4.18/r8169-fix-napi-handling-under-high-load.patch @@ -0,0 +1,52 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Heiner Kallweit +Date: Thu, 18 Oct 2018 19:56:01 +0200 +Subject: r8169: fix NAPI handling under high load + +From: Heiner Kallweit + +[ Upstream commit 6b839b6cf9eada30b086effb51e5d6076bafc761 ] + +rtl_rx() and rtl_tx() are called only if the respective bits are set +in the interrupt status register. Under high load NAPI may not be +able to process all data (work_done == budget) and it will schedule +subsequent calls to the poll callback. +rtl_ack_events() however resets the bits in the interrupt status +register, therefore subsequent calls to rtl8169_poll() won't call +rtl_rx() and rtl_tx() - chip interrupts are still disabled. + +Fix this by calling rtl_rx() and rtl_tx() independent of the bits +set in the interrupt status register. Both functions will detect +if there's nothing to do for them. + +Fixes: da78dbff2e05 ("r8169: remove work from irq handler.") +Signed-off-by: Heiner Kallweit +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169.c | 8 +++----- + 1 file changed, 3 insertions(+), 5 deletions(-) + +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -7044,17 +7044,15 @@ static int rtl8169_poll(struct napi_stru + struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi); + struct net_device *dev = tp->dev; + u16 enable_mask = RTL_EVENT_NAPI | tp->event_slow; +- int work_done= 0; ++ int work_done; + u16 status; + + status = rtl_get_events(tp); + rtl_ack_events(tp, status & ~tp->event_slow); + +- if (status & RTL_EVENT_NAPI_RX) +- work_done = rtl_rx(dev, tp, (u32) budget); ++ work_done = rtl_rx(dev, tp, (u32) budget); + +- if (status & RTL_EVENT_NAPI_TX) +- rtl_tx(dev, tp); ++ rtl_tx(dev, tp); + + if (status & tp->event_slow) { + enable_mask &= ~tp->event_slow; diff --git a/queue-4.18/rtnetlink-disallow-fdb-configuration-for-non-ethernet-device.patch b/queue-4.18/rtnetlink-disallow-fdb-configuration-for-non-ethernet-device.patch new file mode 100644 index 00000000000..94d151eeb89 --- /dev/null +++ b/queue-4.18/rtnetlink-disallow-fdb-configuration-for-non-ethernet-device.patch @@ -0,0 +1,124 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Ido Schimmel +Date: Mon, 29 Oct 2018 20:36:43 +0000 +Subject: rtnetlink: Disallow FDB configuration for non-Ethernet device + +From: Ido Schimmel + +[ Upstream commit da71577545a52be3e0e9225a946e5fd79cfab015 ] + +When an FDB entry is configured, the address is validated to have the +length of an Ethernet address, but the device for which the address is +configured can be of any type. + +The above can result in the use of uninitialized memory when the address +is later compared against existing addresses since 'dev->addr_len' is +used and it may be greater than ETH_ALEN, as with ip6tnl devices. + +Fix this by making sure that FDB entries are only configured for +Ethernet devices. + +BUG: KMSAN: uninit-value in memcmp+0x11d/0x180 lib/string.c:863 +CPU: 1 PID: 4318 Comm: syz-executor998 Not tainted 4.19.0-rc3+ #49 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS +Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x14b/0x190 lib/dump_stack.c:113 + kmsan_report+0x183/0x2b0 mm/kmsan/kmsan.c:956 + __msan_warning+0x70/0xc0 mm/kmsan/kmsan_instr.c:645 + memcmp+0x11d/0x180 lib/string.c:863 + dev_uc_add_excl+0x165/0x7b0 net/core/dev_addr_lists.c:464 + ndo_dflt_fdb_add net/core/rtnetlink.c:3463 [inline] + rtnl_fdb_add+0x1081/0x1270 net/core/rtnetlink.c:3558 + rtnetlink_rcv_msg+0xa0b/0x1530 net/core/rtnetlink.c:4715 + netlink_rcv_skb+0x36e/0x5f0 net/netlink/af_netlink.c:2454 + rtnetlink_rcv+0x50/0x60 net/core/rtnetlink.c:4733 + netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline] + netlink_unicast+0x1638/0x1720 net/netlink/af_netlink.c:1343 + netlink_sendmsg+0x1205/0x1290 net/netlink/af_netlink.c:1908 + sock_sendmsg_nosec net/socket.c:621 [inline] + sock_sendmsg net/socket.c:631 [inline] + ___sys_sendmsg+0xe70/0x1290 net/socket.c:2114 + __sys_sendmsg net/socket.c:2152 [inline] + __do_sys_sendmsg net/socket.c:2161 [inline] + __se_sys_sendmsg+0x2a3/0x3d0 net/socket.c:2159 + __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2159 + do_syscall_64+0xb8/0x100 arch/x86/entry/common.c:291 + entry_SYSCALL_64_after_hwframe+0x63/0xe7 +RIP: 0033:0x440ee9 +Code: e8 cc ab 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 +48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff +ff 0f 83 bb 0a fc ff c3 66 2e 0f 1f 84 00 00 00 00 +RSP: 002b:00007fff6a93b518 EFLAGS: 00000213 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000440ee9 +RDX: 0000000000000000 RSI: 0000000020000240 RDI: 0000000000000003 +RBP: 0000000000000000 R08: 00000000004002c8 R09: 00000000004002c8 +R10: 00000000004002c8 R11: 0000000000000213 R12: 000000000000b4b0 +R13: 0000000000401ec0 R14: 0000000000000000 R15: 0000000000000000 + +Uninit was created at: + kmsan_save_stack_with_flags mm/kmsan/kmsan.c:256 [inline] + kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:181 + kmsan_kmalloc+0x98/0x100 mm/kmsan/kmsan_hooks.c:91 + kmsan_slab_alloc+0x10/0x20 mm/kmsan/kmsan_hooks.c:100 + slab_post_alloc_hook mm/slab.h:446 [inline] + slab_alloc_node mm/slub.c:2718 [inline] + __kmalloc_node_track_caller+0x9e7/0x1160 mm/slub.c:4351 + __kmalloc_reserve net/core/skbuff.c:138 [inline] + __alloc_skb+0x2f5/0x9e0 net/core/skbuff.c:206 + alloc_skb include/linux/skbuff.h:996 [inline] + netlink_alloc_large_skb net/netlink/af_netlink.c:1189 [inline] + netlink_sendmsg+0xb49/0x1290 net/netlink/af_netlink.c:1883 + sock_sendmsg_nosec net/socket.c:621 [inline] + sock_sendmsg net/socket.c:631 [inline] + ___sys_sendmsg+0xe70/0x1290 net/socket.c:2114 + __sys_sendmsg net/socket.c:2152 [inline] + __do_sys_sendmsg net/socket.c:2161 [inline] + __se_sys_sendmsg+0x2a3/0x3d0 net/socket.c:2159 + __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2159 + do_syscall_64+0xb8/0x100 arch/x86/entry/common.c:291 + entry_SYSCALL_64_after_hwframe+0x63/0xe7 + +v2: +* Make error message more specific (David) + +Fixes: 090096bf3db1 ("net: generic fdb support for drivers without ndo_fdb_") +Signed-off-by: Ido Schimmel +Reported-and-tested-by: syzbot+3a288d5f5530b901310e@syzkaller.appspotmail.com +Reported-and-tested-by: syzbot+d53ab4e92a1db04110ff@syzkaller.appspotmail.com +Cc: Vlad Yasevich +Cc: David Ahern +Reviewed-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/rtnetlink.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +--- a/net/core/rtnetlink.c ++++ b/net/core/rtnetlink.c +@@ -3480,6 +3480,11 @@ static int rtnl_fdb_add(struct sk_buff * + return -EINVAL; + } + ++ if (dev->type != ARPHRD_ETHER) { ++ NL_SET_ERR_MSG(extack, "FDB delete only supported for Ethernet devices"); ++ return -EINVAL; ++ } ++ + addr = nla_data(tb[NDA_LLADDR]); + + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); +@@ -3584,6 +3589,11 @@ static int rtnl_fdb_del(struct sk_buff * + return -EINVAL; + } + ++ if (dev->type != ARPHRD_ETHER) { ++ NL_SET_ERR_MSG(extack, "FDB add only supported for Ethernet devices"); ++ return -EINVAL; ++ } ++ + addr = nla_data(tb[NDA_LLADDR]); + + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); diff --git a/queue-4.18/rxrpc-don-t-check-rxrpc_call_tx_last-after-calling-r.patch b/queue-4.18/rxrpc-don-t-check-rxrpc_call_tx_last-after-calling-r.patch index a324bb55fbd..e9ccad55c80 100644 --- a/queue-4.18/rxrpc-don-t-check-rxrpc_call_tx_last-after-calling-r.patch +++ b/queue-4.18/rxrpc-don-t-check-rxrpc_call_tx_last-after-calling-r.patch @@ -19,14 +19,12 @@ Fixes: 70790dbe3f66 ("rxrpc: Pass the last Tx packet marker in the annotation bu Signed-off-by: David Howells Signed-off-by: Sasha Levin --- - net/rxrpc/input.c | 35 +++++++++++++++++++---------------- + net/rxrpc/input.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) -diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c -index 608d078a4981..5e180a3c2d01 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c -@@ -216,10 +216,11 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, +@@ -216,10 +216,11 @@ static void rxrpc_send_ping(struct rxrpc /* * Apply a hard ACK by advancing the Tx window. */ @@ -39,7 +37,7 @@ index 608d078a4981..5e180a3c2d01 100644 int ix; u8 annotation; -@@ -243,15 +244,17 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, +@@ -243,15 +244,17 @@ static void rxrpc_rotate_tx_window(struc skb->next = list; list = skb; @@ -59,7 +57,7 @@ index 608d078a4981..5e180a3c2d01 100644 rxrpc_transmit_rotate_last : rxrpc_transmit_rotate)); wake_up(&call->waitq); -@@ -262,6 +265,8 @@ static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, +@@ -262,6 +265,8 @@ static void rxrpc_rotate_tx_window(struc skb->next = NULL; rxrpc_free_skb(skb, rxrpc_skb_tx_freed); } @@ -68,7 +66,7 @@ index 608d078a4981..5e180a3c2d01 100644 } /* -@@ -332,11 +337,11 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call) +@@ -332,11 +337,11 @@ static bool rxrpc_receiving_reply(struct trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now); } @@ -84,7 +82,7 @@ index 608d078a4981..5e180a3c2d01 100644 } if (!rxrpc_end_tx_phase(call, true, "ETD")) return false; -@@ -890,8 +895,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, +@@ -891,8 +896,12 @@ static void rxrpc_input_ack(struct rxrpc if (nr_acks > call->tx_top - hard_ack) return rxrpc_proto_abort("AKN", call, 0); @@ -99,7 +97,7 @@ index 608d078a4981..5e180a3c2d01 100644 if (nr_acks > 0) { if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) -@@ -900,11 +909,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, +@@ -901,11 +910,6 @@ static void rxrpc_input_ack(struct rxrpc &summary); } @@ -111,7 +109,7 @@ index 608d078a4981..5e180a3c2d01 100644 if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & RXRPC_TX_ANNO_LAST && summary.nr_acks == call->tx_top - hard_ack && -@@ -926,8 +930,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) +@@ -927,8 +931,7 @@ static void rxrpc_input_ackall(struct rx _proto("Rx ACKALL %%%u", sp->hdr.serial); @@ -121,6 +119,3 @@ index 608d078a4981..5e180a3c2d01 100644 rxrpc_end_tx_phase(call, false, "ETL"); } --- -2.17.1 - diff --git a/queue-4.18/rxrpc-fix-connection-level-abort-handling.patch b/queue-4.18/rxrpc-fix-connection-level-abort-handling.patch index 6dc4ba31c22..b1e1f6087b4 100644 --- a/queue-4.18/rxrpc-fix-connection-level-abort-handling.patch +++ b/queue-4.18/rxrpc-fix-connection-level-abort-handling.patch @@ -16,16 +16,14 @@ Fixes: f5c17aaeb2ae ("rxrpc: Calls should only have one terminal state") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- - net/rxrpc/ar-internal.h | 4 ++-- - net/rxrpc/call_accept.c | 4 ++-- - net/rxrpc/conn_event.c | 26 +++++++++++++++----------- + net/rxrpc/ar-internal.h | 4 ++-- + net/rxrpc/call_accept.c | 4 ++-- + net/rxrpc/conn_event.c | 26 +++++++++++++++----------- 3 files changed, 19 insertions(+), 15 deletions(-) -diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h -index 707630ab4713..599d6c4e9444 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h -@@ -449,8 +449,7 @@ struct rxrpc_connection { +@@ -446,8 +446,7 @@ struct rxrpc_connection { spinlock_t state_lock; /* state-change lock */ enum rxrpc_conn_cache_state cache_state; enum rxrpc_conn_proto_state state; /* current state of connection */ @@ -35,16 +33,14 @@ index 707630ab4713..599d6c4e9444 100644 int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ unsigned int hi_serial; /* highest serial number received */ -@@ -460,6 +459,7 @@ struct rxrpc_connection { +@@ -457,6 +456,7 @@ struct rxrpc_connection { u8 security_size; /* security header size */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ + short error; /* Local error code */ }; - /* -diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c -index 9d1e298b784c..0e378d73e856 100644 + static inline bool rxrpc_to_server(const struct rxrpc_skb_priv *sp) --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -422,11 +422,11 @@ found_service: @@ -61,11 +57,9 @@ index 9d1e298b784c..0e378d73e856 100644 break; default: BUG(); -diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c -index 3fde001fcc39..5e7c8239e703 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c -@@ -126,7 +126,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, +@@ -126,7 +126,7 @@ static void rxrpc_conn_retransmit_call(s switch (chan->last_type) { case RXRPC_PACKET_TYPE_ABORT: @@ -74,7 +68,7 @@ index 3fde001fcc39..5e7c8239e703 100644 break; case RXRPC_PACKET_TYPE_ACK: trace_rxrpc_tx_ack(NULL, serial, chan->last_seq, 0, -@@ -148,13 +148,12 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, +@@ -148,13 +148,12 @@ static void rxrpc_conn_retransmit_call(s * pass a connection-level abort onto all calls on that connection */ static void rxrpc_abort_calls(struct rxrpc_connection *conn, @@ -90,7 +84,7 @@ index 3fde001fcc39..5e7c8239e703 100644 spin_lock(&conn->channel_lock); -@@ -167,9 +166,11 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, +@@ -167,9 +166,11 @@ static void rxrpc_abort_calls(struct rxr trace_rxrpc_abort(call->debug_id, "CON", call->cid, call->call_id, 0, @@ -104,7 +98,7 @@ index 3fde001fcc39..5e7c8239e703 100644 rxrpc_notify_socket(call); } } -@@ -202,10 +203,12 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, +@@ -202,10 +203,12 @@ static int rxrpc_abort_connection(struct return 0; } @@ -118,7 +112,7 @@ index 3fde001fcc39..5e7c8239e703 100644 msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; -@@ -224,7 +227,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, +@@ -224,7 +227,7 @@ static int rxrpc_abort_connection(struct whdr._rsvd = 0; whdr.serviceId = htons(conn->service_id); @@ -127,7 +121,7 @@ index 3fde001fcc39..5e7c8239e703 100644 iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); -@@ -235,7 +238,7 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, +@@ -235,7 +238,7 @@ static int rxrpc_abort_connection(struct serial = atomic_inc_return(&conn->serial); whdr.serial = htonl(serial); @@ -136,7 +130,7 @@ index 3fde001fcc39..5e7c8239e703 100644 ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); if (ret < 0) { -@@ -308,9 +311,10 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, +@@ -308,9 +311,10 @@ static int rxrpc_process_event(struct rx abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); @@ -149,6 +143,3 @@ index 3fde001fcc39..5e7c8239e703 100644 return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: --- -2.17.1 - diff --git a/queue-4.18/rxrpc-only-take-the-rwind-and-mtu-values-from-latest.patch b/queue-4.18/rxrpc-only-take-the-rwind-and-mtu-values-from-latest.patch index a92047b9741..60cd317ae3a 100644 --- a/queue-4.18/rxrpc-only-take-the-rwind-and-mtu-values-from-latest.patch +++ b/queue-4.18/rxrpc-only-take-the-rwind-and-mtu-values-from-latest.patch @@ -13,14 +13,12 @@ Fixes: 248f219cb8bc ("rxrpc: Rewrite the data and ack handling code") Signed-off-by: David Howells Signed-off-by: Sasha Levin --- - net/rxrpc/input.c | 19 ++++++++++--------- + net/rxrpc/input.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) -diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c -index fe1cf206d12a..b768b170f0e7 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c -@@ -861,6 +861,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, +@@ -862,6 +862,16 @@ static void rxrpc_input_ack(struct rxrpc rxrpc_propose_ack_respond_to_ack); } @@ -37,7 +35,7 @@ index fe1cf206d12a..b768b170f0e7 100644 ioffset = offset + nr_acks + 3; if (skb->len >= ioffset + sizeof(buf.info)) { if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) -@@ -882,15 +892,6 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, +@@ -883,15 +893,6 @@ static void rxrpc_input_ack(struct rxrpc return; } @@ -53,6 +51,3 @@ index fe1cf206d12a..b768b170f0e7 100644 if (before(hard_ack, call->tx_hard_ack) || after(hard_ack, call->tx_top)) return rxrpc_proto_abort("AKW", call, 0); --- -2.17.1 - diff --git a/queue-4.18/sctp-fix-race-on-sctp_id2asoc.patch b/queue-4.18/sctp-fix-race-on-sctp_id2asoc.patch new file mode 100644 index 00000000000..0e7fecaaea9 --- /dev/null +++ b/queue-4.18/sctp-fix-race-on-sctp_id2asoc.patch @@ -0,0 +1,62 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Marcelo Ricardo Leitner +Date: Tue, 16 Oct 2018 15:18:17 -0300 +Subject: sctp: fix race on sctp_id2asoc + +From: Marcelo Ricardo Leitner + +[ Upstream commit b336decab22158937975293aea79396525f92bb3 ] + +syzbot reported an use-after-free involving sctp_id2asoc. Dmitry Vyukov +helped to root cause it and it is because of reading the asoc after it +was freed: + + CPU 1 CPU 2 +(working on socket 1) (working on socket 2) + sctp_association_destroy +sctp_id2asoc + spin lock + grab the asoc from idr + spin unlock + spin lock + remove asoc from idr + spin unlock + free(asoc) + if asoc->base.sk != sk ... [*] + +This can only be hit if trying to fetch asocs from different sockets. As +we have a single IDR for all asocs, in all SCTP sockets, their id is +unique on the system. An application can try to send stuff on an id +that matches on another socket, and the if in [*] will protect from such +usage. But it didn't consider that as that asoc may belong to another +socket, it may be freed in parallel (read: under another socket lock). + +We fix it by moving the checks in [*] into the protected region. This +fixes it because the asoc cannot be freed while the lock is held. + +Reported-by: syzbot+c7dd55d7aec49d48e49a@syzkaller.appspotmail.com +Acked-by: Dmitry Vyukov +Signed-off-by: Marcelo Ricardo Leitner +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -270,11 +270,10 @@ struct sctp_association *sctp_id2assoc(s + + spin_lock_bh(&sctp_assocs_id_lock); + asoc = (struct sctp_association *)idr_find(&sctp_assocs_id, (int)id); ++ if (asoc && (asoc->base.sk != sk || asoc->base.dead)) ++ asoc = NULL; + spin_unlock_bh(&sctp_assocs_id_lock); + +- if (!asoc || (asoc->base.sk != sk) || asoc->base.dead) +- return NULL; +- + return asoc; + } + diff --git a/queue-4.18/sctp-fix-the-data-size-calculation-in-sctp_data_size.patch b/queue-4.18/sctp-fix-the-data-size-calculation-in-sctp_data_size.patch new file mode 100644 index 00000000000..f5c7348ca45 --- /dev/null +++ b/queue-4.18/sctp-fix-the-data-size-calculation-in-sctp_data_size.patch @@ -0,0 +1,32 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Xin Long +Date: Wed, 17 Oct 2018 21:11:27 +0800 +Subject: sctp: fix the data size calculation in sctp_data_size + +From: Xin Long + +[ Upstream commit 5660b9d9d6a29c2c3cc12f62ae44bfb56b0a15a9 ] + +sctp data size should be calculated by subtracting data chunk header's +length from chunk_hdr->length, not just data header. + +Fixes: 668c9beb9020 ("sctp: implement assign_number for sctp_stream_interleave") +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/sctp/sm.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/net/sctp/sm.h ++++ b/include/net/sctp/sm.h +@@ -347,7 +347,7 @@ static inline __u16 sctp_data_size(struc + __u16 size; + + size = ntohs(chunk->chunk_hdr->length); +- size -= sctp_datahdr_len(&chunk->asoc->stream); ++ size -= sctp_datachk_len(&chunk->asoc->stream); + + return size; + } diff --git a/queue-4.18/sctp-not-free-the-new-asoc-when-sctp_wait_for_connect-returns-err.patch b/queue-4.18/sctp-not-free-the-new-asoc-when-sctp_wait_for_connect-returns-err.patch new file mode 100644 index 00000000000..114e4a6aaeb --- /dev/null +++ b/queue-4.18/sctp-not-free-the-new-asoc-when-sctp_wait_for_connect-returns-err.patch @@ -0,0 +1,75 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Xin Long +Date: Wed, 17 Oct 2018 03:06:12 +0800 +Subject: sctp: not free the new asoc when sctp_wait_for_connect returns err + +From: Xin Long + +[ Upstream commit c863850ce22e1b0bb365d49cadf51f4765153ae4 ] + +When sctp_wait_for_connect is called to wait for connect ready +for sp->strm_interleave in sctp_sendmsg_to_asoc, a panic could +be triggered if cpu is scheduled out and the new asoc is freed +elsewhere, as it will return err and later the asoc gets freed +again in sctp_sendmsg. + +[ 285.840764] list_del corruption, ffff9f0f7b284078->next is LIST_POISON1 (dead000000000100) +[ 285.843590] WARNING: CPU: 1 PID: 8861 at lib/list_debug.c:47 __list_del_entry_valid+0x50/0xa0 +[ 285.846193] Kernel panic - not syncing: panic_on_warn set ... +[ 285.846193] +[ 285.848206] CPU: 1 PID: 8861 Comm: sctp_ndata Kdump: loaded Not tainted 4.19.0-rc7.label #584 +[ 285.850559] Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 +[ 285.852164] Call Trace: +... +[ 285.872210] ? __list_del_entry_valid+0x50/0xa0 +[ 285.872894] sctp_association_free+0x42/0x2d0 [sctp] +[ 285.873612] sctp_sendmsg+0x5a4/0x6b0 [sctp] +[ 285.874236] sock_sendmsg+0x30/0x40 +[ 285.874741] ___sys_sendmsg+0x27a/0x290 +[ 285.875304] ? __switch_to_asm+0x34/0x70 +[ 285.875872] ? __switch_to_asm+0x40/0x70 +[ 285.876438] ? ptep_set_access_flags+0x2a/0x30 +[ 285.877083] ? do_wp_page+0x151/0x540 +[ 285.877614] __sys_sendmsg+0x58/0xa0 +[ 285.878138] do_syscall_64+0x55/0x180 +[ 285.878669] entry_SYSCALL_64_after_hwframe+0x44/0xa9 + +This is a similar issue with the one fixed in Commit ca3af4dd28cf +("sctp: do not free asoc when it is already dead in sctp_sendmsg"). +But this one can't be fixed by returning -ESRCH for the dead asoc +in sctp_wait_for_connect, as it will break sctp_connect's return +value to users. + +This patch is to simply set err to -ESRCH before it returns to +sctp_sendmsg when any err is returned by sctp_wait_for_connect +for sp->strm_interleave, so that no asoc would be freed due to +this. + +When users see this error, they will know the packet hasn't been +sent. And it also makes sense to not free asoc because waiting +connect fails, like the second call for sctp_wait_for_connect in +sctp_sendmsg_to_asoc. + +Fixes: 668c9beb9020 ("sctp: implement assign_number for sctp_stream_interleave") +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -1939,8 +1939,10 @@ static int sctp_sendmsg_to_asoc(struct s + if (sp->strm_interleave) { + timeo = sock_sndtimeo(sk, 0); + err = sctp_wait_for_connect(asoc, &timeo); +- if (err) ++ if (err) { ++ err = -ESRCH; + goto err; ++ } + } else { + wait_connect = true; + } diff --git a/queue-4.18/series b/queue-4.18/series index cb414b953af..8c6b1860036 100644 --- a/queue-4.18/series +++ b/queue-4.18/series @@ -100,3 +100,42 @@ fs-fat-fatent.c-add-cond_resched-to-fat_count_free_c.patch revert-mm-slowly-shrink-slabs-with-a-relatively-smal.patch revert-netfilter-ipv6-nf_defrag-drop-skb-dst-before-.patch perf-tools-disable-parallelism-for-make-clean.patch +bridge-do-not-add-port-to-router-list-when-receives-query-with-source-0.0.0.0.patch +ipv6-mcast-fix-a-use-after-free-in-inet6_mc_check.patch +ipv6-ndisc-preserve-ipv6-control-buffer-if-protocol-error-handlers-are-called.patch +ipv6-rate-limit-probes-for-neighbourless-routes.patch +llc-set-sock_rcu_free-in-llc_sap_add_socket.patch +net-fec-don-t-dump-rx-fifo-register-when-not-available.patch +net-ipv6-fix-index-counter-for-unicast-addresses-in-in6_dump_addrs.patch +net-mlx5e-fix-csum-adjustments-caused-by-rxfcs.patch +net-sched-gred-pass-the-right-attribute-to-gred_change_table_def.patch +net-socket-fix-a-missing-check-bug.patch +net-stmmac-fix-stmmac_mdio_reset-when-building-stmmac-as-modules.patch +net-udp-fix-handling-of-checksum_complete-packets.patch +r8169-fix-napi-handling-under-high-load.patch +rtnetlink-disallow-fdb-configuration-for-non-ethernet-device.patch +sctp-fix-race-on-sctp_id2asoc.patch +tipc-fix-unsafe-rcu-locking-when-accessing-publication-list.patch +udp6-fix-encap-return-code-for-resubmitting.patch +vhost-fix-spectre-v1-vulnerability.patch +virtio_net-avoid-using-netif_tx_disable-for-serializing-tx-routine.patch +ethtool-fix-a-privilege-escalation-bug.patch +bonding-fix-length-of-actor-system.patch +ip6_tunnel-fix-encapsulation-layout.patch +openvswitch-fix-push-pop-ethernet-validation.patch +net-ipmr-fix-unresolved-entry-dumps.patch +net-mlx5-take-only-bit-24-26-of-wqe.pftype_wq-for-page-fault-type.patch +net-bcmgenet-poll-internal-phy-for-genetv5.patch +net-sched-fix-for-duplicate-class-dump.patch +net-sched-cls_api-add-missing-validation-of-netlink-attributes.patch +net-ipv6-allow-onlink-routes-to-have-a-device-mismatch-if-it-is-the-default-route.patch +sctp-fix-the-data-size-calculation-in-sctp_data_size.patch +sctp-not-free-the-new-asoc-when-sctp_wait_for_connect-returns-err.patch +net-mlx5-fix-memory-leak-when-setting-fpga-ipsec-caps.patch +net-smc-fix-smc_buf_unuse-to-use-the-lgr-pointer.patch +mlxsw-spectrum_switchdev-don-t-ignore-deletions-of-learned-macs.patch +net-bpfilter-use-get_pid_task-instead-of-pid_task.patch +net-drop-skb-on-failure-in-ip_check_defrag.patch +net-fix-pskb_trim_rcsum_slow-with-odd-trim-offset.patch +net-mlx5-wq-fixes-for-fragmented-wq-buffers-api.patch +mlxsw-core-fix-devlink-unregister-flow.patch diff --git a/queue-4.18/tipc-fix-unsafe-rcu-locking-when-accessing-publication-list.patch b/queue-4.18/tipc-fix-unsafe-rcu-locking-when-accessing-publication-list.patch new file mode 100644 index 00000000000..a553631e844 --- /dev/null +++ b/queue-4.18/tipc-fix-unsafe-rcu-locking-when-accessing-publication-list.patch @@ -0,0 +1,47 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Tung Nguyen +Date: Fri, 12 Oct 2018 22:46:55 +0200 +Subject: tipc: fix unsafe rcu locking when accessing publication list + +From: Tung Nguyen + +[ Upstream commit d3092b2efca1cd1d492d0b08499a2066c5ca8cec ] + +The binding table's 'cluster_scope' list is rcu protected to handle +races between threads changing the list and those traversing the list at +the same moment. We have now found that the function named_distribute() +uses the regular list_for_each() macro to traverse the said list. +Likewise, the function tipc_named_withdraw() is removing items from the +same list using the regular list_del() call. When these two functions +execute in parallel we see occasional crashes. + +This commit fixes this by adding the missing _rcu() suffixes. + +Signed-off-by: Tung Nguyen +Signed-off-by: Jon Maloy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/name_distr.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/tipc/name_distr.c ++++ b/net/tipc/name_distr.c +@@ -115,7 +115,7 @@ struct sk_buff *tipc_named_withdraw(stru + struct sk_buff *buf; + struct distr_item *item; + +- list_del(&publ->binding_node); ++ list_del_rcu(&publ->binding_node); + + if (publ->scope == TIPC_NODE_SCOPE) + return NULL; +@@ -147,7 +147,7 @@ static void named_distribute(struct net + ITEM_SIZE) * ITEM_SIZE; + u32 msg_rem = msg_dsz; + +- list_for_each_entry(publ, pls, binding_node) { ++ list_for_each_entry_rcu(publ, pls, binding_node) { + /* Prepare next buffer: */ + if (!skb) { + skb = named_prepare_buf(net, PUBLICATION, msg_rem, diff --git a/queue-4.18/udp6-fix-encap-return-code-for-resubmitting.patch b/queue-4.18/udp6-fix-encap-return-code-for-resubmitting.patch new file mode 100644 index 00000000000..214bb6848c5 --- /dev/null +++ b/queue-4.18/udp6-fix-encap-return-code-for-resubmitting.patch @@ -0,0 +1,40 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Paolo Abeni +Date: Wed, 17 Oct 2018 11:44:04 +0200 +Subject: udp6: fix encap return code for resubmitting + +From: Paolo Abeni + +[ Upstream commit 84dad55951b0d009372ec21760b650634246e144 ] + +The commit eb63f2964dbe ("udp6: add missing checks on edumux packet +processing") used the same return code convention of the ipv4 counterpart, +but ipv6 uses the opposite one: positive values means resubmit. + +This change addresses the issue, using positive return value for +resubmitting. Also update the related comment, which was broken, too. + +Fixes: eb63f2964dbe ("udp6: add missing checks on edumux packet processing") +Signed-off-by: Paolo Abeni +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/udp.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -762,11 +762,9 @@ static int udp6_unicast_rcv_skb(struct s + + ret = udpv6_queue_rcv_skb(sk, skb); + +- /* a return value > 0 means to resubmit the input, but +- * it wants the return to be -protocol, or 0 +- */ ++ /* a return value > 0 means to resubmit the input */ + if (ret > 0) +- return -ret; ++ return ret; + return 0; + } + diff --git a/queue-4.18/vhost-fix-spectre-v1-vulnerability.patch b/queue-4.18/vhost-fix-spectre-v1-vulnerability.patch new file mode 100644 index 00000000000..5ebe33ec558 --- /dev/null +++ b/queue-4.18/vhost-fix-spectre-v1-vulnerability.patch @@ -0,0 +1,42 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Jason Wang +Date: Tue, 30 Oct 2018 14:10:49 +0800 +Subject: vhost: Fix Spectre V1 vulnerability + +From: Jason Wang + +[ Upstream commit ff002269a4ee9c769dbf9365acef633ebcbd6cbe ] + +The idx in vhost_vring_ioctl() was controlled by userspace, hence a +potential exploitation of the Spectre variant 1 vulnerability. + +Fixing this by sanitizing idx before using it to index d->vqs. + +Cc: Michael S. Tsirkin +Cc: Josh Poimboeuf +Cc: Andrea Arcangeli +Signed-off-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/vhost/vhost.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/vhost/vhost.c ++++ b/drivers/vhost/vhost.c +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + + #include "vhost.h" + +@@ -1362,6 +1363,7 @@ long vhost_vring_ioctl(struct vhost_dev + if (idx >= d->nvqs) + return -ENOBUFS; + ++ idx = array_index_nospec(idx, d->nvqs); + vq = d->vqs[idx]; + + mutex_lock(&vq->mutex); diff --git a/queue-4.18/virtio_net-avoid-using-netif_tx_disable-for-serializing-tx-routine.patch b/queue-4.18/virtio_net-avoid-using-netif_tx_disable-for-serializing-tx-routine.patch new file mode 100644 index 00000000000..9d45edc877b --- /dev/null +++ b/queue-4.18/virtio_net-avoid-using-netif_tx_disable-for-serializing-tx-routine.patch @@ -0,0 +1,56 @@ +From foo@baz Fri Nov 2 06:12:44 CET 2018 +From: Ake Koomsin +Date: Wed, 17 Oct 2018 19:44:12 +0900 +Subject: virtio_net: avoid using netif_tx_disable() for serializing tx routine + +From: Ake Koomsin + +[ Upstream commit 05c998b738fdd3e5d6a257bcacc8f34b6284d795 ] + +Commit 713a98d90c5e ("virtio-net: serialize tx routine during reset") +introduces netif_tx_disable() after netif_device_detach() in order to +avoid use-after-free of tx queues. However, there are two issues. + +1) Its operation is redundant with netif_device_detach() in case the + interface is running. +2) In case of the interface is not running before suspending and + resuming, the tx does not get resumed by netif_device_attach(). + This results in losing network connectivity. + +It is better to use netif_tx_lock_bh()/netif_tx_unlock_bh() instead for +serializing tx routine during reset. This also preserves the symmetry +of netif_device_detach() and netif_device_attach(). + +Fixes commit 713a98d90c5e ("virtio-net: serialize tx routine during reset") +Signed-off-by: Ake Koomsin +Acked-by: Jason Wang +Acked-by: Michael S. Tsirkin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/virtio_net.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -2162,8 +2162,9 @@ static void virtnet_freeze_down(struct v + /* Make sure no work handler is accessing the device */ + flush_work(&vi->config_work); + ++ netif_tx_lock_bh(vi->dev); + netif_device_detach(vi->dev); +- netif_tx_disable(vi->dev); ++ netif_tx_unlock_bh(vi->dev); + cancel_delayed_work_sync(&vi->refill); + + if (netif_running(vi->dev)) { +@@ -2199,7 +2200,9 @@ static int virtnet_restore_up(struct vir + } + } + ++ netif_tx_lock_bh(vi->dev); + netif_device_attach(vi->dev); ++ netif_tx_unlock_bh(vi->dev); + return err; + } +