From: Greg Kroah-Hartman Date: Wed, 16 May 2018 08:40:27 +0000 (+0200) Subject: 4.16-stable patches X-Git-Tag: v4.16.10~29 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=913bd8ee6c9275187218b2639550bf1f685de35f;p=thirdparty%2Fkernel%2Fstable-queue.git 4.16-stable patches added patches: 8139too-use-disable_irq_nosync-in-rtl8139_poll_controller.patch bonding-do-not-allow-rlb-updates-to-invalid-mac.patch bonding-send-learning-packets-for-vlans-on-slave.patch bridge-check-iface-upper-dev-when-setting-master-via-ioctl.patch dccp-fix-tasklet-usage.patch hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch hv_netvsc-set-master-device.patch ipv4-fix-fnhe-usage-by-non-cached-routes.patch ipv4-fix-memory-leaks-in-udp_sendmsg-ping_v4_sendmsg.patch ipv4-reset-fnhe_mtu_locked-after-cache-route-flushed.patch ipv6-fix-uninit-value-in-ip6_multipath_l3_keys.patch llc-better-deal-with-too-small-mtu.patch mlxsw-core-fix-an-error-handling-path-in-mlxsw_core_bus_device_register.patch mlxsw-spectrum_switchdev-do-not-remove-mrouter-port-from-mdb-s-ports-list.patch net-ethernet-sun-niu-set-correct-packet-size-in-skb.patch net-ethernet-ti-cpsw-fix-packet-leaking-in-dual_mac-mode.patch net-mlx4_en-fix-an-error-handling-path-in-mlx4_en_init_netdev.patch net-mlx4_en-verify-coalescing-parameters-are-in-range.patch net-mlx5-avoid-cleaning-flow-steering-table-twice-during-error-flow.patch net-mlx5-e-switch-include-vf-rdma-stats-in-vport-statistics.patch net-mlx5-fix-mlx5_get_vector_affinity-function.patch net-mlx5e-allow-offloading-ipv4-header-re-write-for-icmp.patch net-mlx5e-dcbnl-fix-min-inline-header-size-for-dscp.patch net-mlx5e-err-if-asked-to-offload-tc-match-on-frag-being-first.patch net-mlx5e-tx-use-correct-counter-in-dma_map-error-flow.patch net-phy-sfp-fix-the-br-min-computation.patch net-sched-actions-fix-refcnt-leak-in-skbmod.patch net-sched-fix-error-path-in-tcf_proto_create-when-modules-are-not-configured.patch net-smc-keep-clcsock-reference-in-smc_tcp_listen_work.patch net-smc-restrict-non-blocking-connect-finish.patch net-support-compat-64-bit-time-in-s-g-etsockopt.patch net-systemport-correclty-disambiguate-driver-instances.patch net-tls-don-t-recursively-call-push_record-during-tls_write_space-callbacks.patch net-tls-fix-connection-stall-on-partial-tls-record.patch net_sched-fq-take-care-of-throttled-flows-before-reuse.patch nfp-flower-set-tunnel-ttl-value-to-net-default.patch nsh-fix-infinite-loop.patch openvswitch-don-t-swap-table-in-nlattr_set-after-ovs_attr_nested-is-found.patch qmi_wwan-do-not-steal-interfaces-from-class-drivers.patch r8169-fix-powering-up-rtl8168h.patch rds-do-not-leak-kernel-memory-to-user-land.patch sctp-clear-the-new-asoc-s-stream-outcnt-in-sctp_stream_update.patch sctp-delay-the-authentication-for-the-duplicated-cookie-echo-chunk.patch sctp-fix-the-issue-that-the-cookie-ack-with-auth-can-t-get-processed.patch sctp-handle-two-v4-addrs-comparison-in-sctp_inet6_cmp_addr.patch sctp-remove-sctp_chunk_put-from-fail_mark-err-path-in-sctp_ulpevent_make_rcvmsg.patch sctp-use-the-old-asoc-when-making-the-cookie-ack-chunk-in-dupcook_d.patch tcp-ignore-fast-open-on-repair-mode.patch tcp-restore-autocorking.patch tcp_bbr-fix-to-zero-idle_restart-only-upon-s-acked-data.patch tg3-fix-vunmap-bug_on-triggered-from-tg3_free_consistent.patch tipc-fix-one-byte-leak-in-tipc_sk_set_orig_addr.patch udp-fix-so_bindtodevice.patch --- diff --git a/queue-4.16/8139too-use-disable_irq_nosync-in-rtl8139_poll_controller.patch b/queue-4.16/8139too-use-disable_irq_nosync-in-rtl8139_poll_controller.patch new file mode 100644 index 00000000000..17665dd146d --- /dev/null +++ b/queue-4.16/8139too-use-disable_irq_nosync-in-rtl8139_poll_controller.patch @@ -0,0 +1,32 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Ingo Molnar +Date: Wed, 2 May 2018 13:30:57 +0200 +Subject: 8139too: Use disable_irq_nosync() in rtl8139_poll_controller() + +From: Ingo Molnar + +[ Upstream commit af3e0fcf78879f718c5f73df0814951bd7057d34 ] + +Use disable_irq_nosync() instead of disable_irq() as this might be +called in atomic context with netpoll. + +Signed-off-by: Ingo Molnar +Signed-off-by: Thomas Gleixner +Signed-off-by: Sebastian Andrzej Siewior +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/8139too.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/realtek/8139too.c ++++ b/drivers/net/ethernet/realtek/8139too.c +@@ -2224,7 +2224,7 @@ static void rtl8139_poll_controller(stru + struct rtl8139_private *tp = netdev_priv(dev); + const int irq = tp->pci_dev->irq; + +- disable_irq(irq); ++ disable_irq_nosync(irq); + rtl8139_interrupt(irq, dev); + enable_irq(irq); + } diff --git a/queue-4.16/bonding-do-not-allow-rlb-updates-to-invalid-mac.patch b/queue-4.16/bonding-do-not-allow-rlb-updates-to-invalid-mac.patch new file mode 100644 index 00000000000..235fdf54d20 --- /dev/null +++ b/queue-4.16/bonding-do-not-allow-rlb-updates-to-invalid-mac.patch @@ -0,0 +1,31 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Debabrata Banerjee +Date: Wed, 9 May 2018 19:32:10 -0400 +Subject: bonding: do not allow rlb updates to invalid mac + +From: Debabrata Banerjee + +[ Upstream commit 4fa8667ca3989ce14cf66301fa251544fbddbdd0 ] + +Make sure multicast, broadcast, and zero mac's cannot be the output of rlb +updates, which should all be directed arps. Receive load balancing will be +collapsed if any of these happen, as the switch will broadcast. + +Signed-off-by: Debabrata Banerjee +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_alb.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/bonding/bond_alb.c ++++ b/drivers/net/bonding/bond_alb.c +@@ -450,7 +450,7 @@ static void rlb_update_client(struct rlb + { + int i; + +- if (!client_info->slave) ++ if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst)) + return; + + for (i = 0; i < RLB_ARP_BURST_SIZE; i++) { diff --git a/queue-4.16/bonding-send-learning-packets-for-vlans-on-slave.patch b/queue-4.16/bonding-send-learning-packets-for-vlans-on-slave.patch new file mode 100644 index 00000000000..82356e98118 --- /dev/null +++ b/queue-4.16/bonding-send-learning-packets-for-vlans-on-slave.patch @@ -0,0 +1,87 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Debabrata Banerjee +Date: Wed, 9 May 2018 19:32:11 -0400 +Subject: bonding: send learning packets for vlans on slave + +From: Debabrata Banerjee + +[ Upstream commit 21706ee8a47d3ede7fdae0be6d7c0a0e31a83229 ] + +There was a regression at some point from the intended functionality of +commit f60c3704e87d ("bonding: Fix alb mode to only use first level +vlans.") + +Given the return value vlan_get_encap_level() we need to store the nest +level of the bond device, and then compare the vlan's encap level to +this. Without this, this check always fails and learning packets are +never sent. + +In addition, this same commit caused a regression in the behavior of +balance_alb, which requires learning packets be sent for all interfaces +using the slave's mac in order to load balance properly. For vlan's +that have not set a user mac, we can send after checking one bit. +Otherwise we need send the set mac, albeit defeating rx load balancing +for that vlan. + +Signed-off-by: Debabrata Banerjee +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/bonding/bond_alb.c | 13 ++++++++----- + drivers/net/bonding/bond_main.c | 2 ++ + include/net/bonding.h | 1 + + 3 files changed, 11 insertions(+), 5 deletions(-) + +--- a/drivers/net/bonding/bond_alb.c ++++ b/drivers/net/bonding/bond_alb.c +@@ -943,6 +943,10 @@ static void alb_send_lp_vid(struct slave + skb->priority = TC_PRIO_CONTROL; + skb->dev = slave->dev; + ++ netdev_dbg(slave->bond->dev, ++ "Send learning packet: dev %s mac %pM vlan %d\n", ++ slave->dev->name, mac_addr, vid); ++ + if (vid) + __vlan_hwaccel_put_tag(skb, vlan_proto, vid); + +@@ -965,14 +969,13 @@ static int alb_upper_dev_walk(struct net + u8 *mac_addr = data->mac_addr; + struct bond_vlan_tag *tags; + +- if (is_vlan_dev(upper) && vlan_get_encap_level(upper) == 0) { +- if (strict_match && +- ether_addr_equal_64bits(mac_addr, +- upper->dev_addr)) { ++ if (is_vlan_dev(upper) && ++ bond->nest_level == vlan_get_encap_level(upper) - 1) { ++ if (upper->addr_assign_type == NET_ADDR_STOLEN) { + alb_send_lp_vid(slave, mac_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); +- } else if (!strict_match) { ++ } else { + alb_send_lp_vid(slave, upper->dev_addr, + vlan_dev_vlan_proto(upper), + vlan_dev_vlan_id(upper)); +--- a/drivers/net/bonding/bond_main.c ++++ b/drivers/net/bonding/bond_main.c +@@ -1738,6 +1738,8 @@ int bond_enslave(struct net_device *bond + if (bond_mode_uses_xmit_hash(bond)) + bond_update_slave_arr(bond, NULL); + ++ bond->nest_level = dev_get_nest_level(bond_dev); ++ + netdev_info(bond_dev, "Enslaving %s as %s interface with %s link\n", + slave_dev->name, + bond_is_active_slave(new_slave) ? "an active" : "a backup", +--- a/include/net/bonding.h ++++ b/include/net/bonding.h +@@ -198,6 +198,7 @@ struct bonding { + struct slave __rcu *primary_slave; + struct bond_up_slave __rcu *slave_arr; /* Array of usable slaves */ + bool force_primary; ++ u32 nest_level; + s32 slave_cnt; /* never change this value outside the attach/detach wrappers */ + int (*recv_probe)(const struct sk_buff *, struct bonding *, + struct slave *); diff --git a/queue-4.16/bridge-check-iface-upper-dev-when-setting-master-via-ioctl.patch b/queue-4.16/bridge-check-iface-upper-dev-when-setting-master-via-ioctl.patch new file mode 100644 index 00000000000..b2efaeb3eea --- /dev/null +++ b/queue-4.16/bridge-check-iface-upper-dev-when-setting-master-via-ioctl.patch @@ -0,0 +1,42 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Hangbin Liu +Date: Fri, 27 Apr 2018 20:59:24 +0800 +Subject: bridge: check iface upper dev when setting master via ioctl + +From: Hangbin Liu + +[ Upstream commit e8238fc2bd7b4c3c7554fa2df067e796610212fc ] + +When we set a bond slave's master to bridge via ioctl, we only check +the IFF_BRIDGE_PORT flag. Although we will find the slave's real master +at netdev_master_upper_dev_link() later, it already does some settings +and allocates some resources. It would be better to return as early +as possible. + +v1 -> v2: +use netdev_master_upper_dev_get() instead of netdev_has_any_upper_dev() +to check if we have a master, because not all upper devs are masters, +e.g. vlan device. + +Reported-by: syzbot+de73361ee4971b6e6f75@syzkaller.appspotmail.com +Signed-off-by: Hangbin Liu +Acked-by: Nikolay Aleksandrov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_if.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/bridge/br_if.c ++++ b/net/bridge/br_if.c +@@ -509,8 +509,8 @@ int br_add_if(struct net_bridge *br, str + return -ELOOP; + } + +- /* Device is already being bridged */ +- if (br_port_exists(dev)) ++ /* Device has master upper dev */ ++ if (netdev_master_upper_dev_get(dev)) + return -EBUSY; + + /* No bridging devices that dislike that (e.g. wireless) */ diff --git a/queue-4.16/dccp-fix-tasklet-usage.patch b/queue-4.16/dccp-fix-tasklet-usage.patch new file mode 100644 index 00000000000..a79b9775ca8 --- /dev/null +++ b/queue-4.16/dccp-fix-tasklet-usage.patch @@ -0,0 +1,116 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Eric Dumazet +Date: Thu, 3 May 2018 09:39:20 -0700 +Subject: dccp: fix tasklet usage + +From: Eric Dumazet + +[ Upstream commit a8d7aa17bbc970971ccdf71988ea19230ab368b1 ] + +syzbot reported a crash in tasklet_action_common() caused by dccp. + +dccp needs to make sure socket wont disappear before tasklet handler +has completed. + +This patch takes a reference on the socket when arming the tasklet, +and moves the sock_put() from dccp_write_xmit_timer() to dccp_write_xmitlet() + +kernel BUG at kernel/softirq.c:514! +invalid opcode: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 1 PID: 17 Comm: ksoftirqd/1 Not tainted 4.17.0-rc3+ #30 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:tasklet_action_common.isra.19+0x6db/0x700 kernel/softirq.c:515 +RSP: 0018:ffff8801d9b3faf8 EFLAGS: 00010246 +dccp_close: ABORT with 65423 bytes unread +RAX: 1ffff1003b367f6b RBX: ffff8801daf1f3f0 RCX: 0000000000000000 +RDX: ffff8801cf895498 RSI: 0000000000000004 RDI: 0000000000000000 +RBP: ffff8801d9b3fc40 R08: ffffed0039f12a95 R09: ffffed0039f12a94 +dccp_close: ABORT with 65423 bytes unread +R10: ffffed0039f12a94 R11: ffff8801cf8954a3 R12: 0000000000000000 +R13: ffff8801d9b3fc18 R14: dffffc0000000000 R15: ffff8801cf895490 +FS: 0000000000000000(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000001b2bc28000 CR3: 00000001a08a9000 CR4: 00000000001406e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + tasklet_action+0x1d/0x20 kernel/softirq.c:533 + __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285 +dccp_close: ABORT with 65423 bytes unread + run_ksoftirqd+0x86/0x100 kernel/softirq.c:646 + smpboot_thread_fn+0x417/0x870 kernel/smpboot.c:164 + kthread+0x345/0x410 kernel/kthread.c:238 + ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 +Code: 48 8b 85 e8 fe ff ff 48 8b 95 f0 fe ff ff e9 94 fb ff ff 48 89 95 f0 fe ff ff e8 81 53 6e 00 48 8b 95 f0 fe ff ff e9 62 fb ff ff <0f> 0b 48 89 cf 48 89 8d e8 fe ff ff e8 64 53 6e 00 48 8b 8d e8 +RIP: tasklet_action_common.isra.19+0x6db/0x700 kernel/softirq.c:515 RSP: ffff8801d9b3faf8 + +Fixes: dc841e30eaea ("dccp: Extend CCID packet dequeueing interface") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Gerrit Renker +Cc: dccp@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ccids/ccid2.c | 14 ++++++++++++-- + net/dccp/timer.c | 2 +- + 2 files changed, 13 insertions(+), 3 deletions(-) + +--- a/net/dccp/ccids/ccid2.c ++++ b/net/dccp/ccids/ccid2.c +@@ -126,6 +126,16 @@ static void ccid2_change_l_seq_window(st + DCCPF_SEQ_WMAX)); + } + ++static void dccp_tasklet_schedule(struct sock *sk) ++{ ++ struct tasklet_struct *t = &dccp_sk(sk)->dccps_xmitlet; ++ ++ if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { ++ sock_hold(sk); ++ __tasklet_schedule(t); ++ } ++} ++ + static void ccid2_hc_tx_rto_expire(struct timer_list *t) + { + struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer); +@@ -166,7 +176,7 @@ static void ccid2_hc_tx_rto_expire(struc + + /* if we were blocked before, we may now send cwnd=1 packet */ + if (sender_was_blocked) +- tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); ++ dccp_tasklet_schedule(sk); + /* restart backed-off timer */ + sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); + out: +@@ -706,7 +716,7 @@ static void ccid2_hc_tx_packet_recv(stru + done: + /* check if incoming Acks allow pending packets to be sent */ + if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) +- tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); ++ dccp_tasklet_schedule(sk); + dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); + } + +--- a/net/dccp/timer.c ++++ b/net/dccp/timer.c +@@ -232,6 +232,7 @@ static void dccp_write_xmitlet(unsigned + else + dccp_write_xmit(sk); + bh_unlock_sock(sk); ++ sock_put(sk); + } + + static void dccp_write_xmit_timer(struct timer_list *t) +@@ -240,7 +241,6 @@ static void dccp_write_xmit_timer(struct + struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk; + + dccp_write_xmitlet((unsigned long)sk); +- sock_put(sk); + } + + void dccp_init_xmit_timers(struct sock *sk) diff --git a/queue-4.16/hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch b/queue-4.16/hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch new file mode 100644 index 00000000000..4276a5b694c --- /dev/null +++ b/queue-4.16/hv_netvsc-fix-net-device-attach-on-older-windows-hosts.patch @@ -0,0 +1,38 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Mohammed Gamal +Date: Wed, 9 May 2018 10:17:34 +0200 +Subject: hv_netvsc: Fix net device attach on older Windows hosts + +From: Mohammed Gamal + +[ Upstream commit 55be9f25be1ca5bda75c39808fc77e42691bc07f ] + +On older windows hosts the net_device instance is returned to +the caller of rndis_filter_device_add() without having the presence +bit set first. This would cause any subsequent calls to network device +operations (e.g. MTU change, channel change) to fail after the device +is detached once, returning -ENODEV. + +Instead of returning the device instabce, we take the exit path where +we call netif_device_attach() + +Fixes: 7b2ee50c0cd5 ("hv_netvsc: common detach logic") +Signed-off-by: Mohammed Gamal +Reviewed-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/rndis_filter.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/hyperv/rndis_filter.c ++++ b/drivers/net/hyperv/rndis_filter.c +@@ -1282,7 +1282,7 @@ struct netvsc_device *rndis_filter_devic + rndis_device->link_state ? "down" : "up"); + + if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) +- return net_device; ++ goto out; + + rndis_filter_query_link_speed(rndis_device, net_device); + diff --git a/queue-4.16/hv_netvsc-set-master-device.patch b/queue-4.16/hv_netvsc-set-master-device.patch new file mode 100644 index 00000000000..240d4514ff7 --- /dev/null +++ b/queue-4.16/hv_netvsc-set-master-device.patch @@ -0,0 +1,36 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Stephen Hemminger +Date: Wed, 9 May 2018 14:09:04 -0700 +Subject: hv_netvsc: set master device + +From: Stephen Hemminger + +[ Upstream commit 97f3efb64323beb0690576e9d74e94998ad6e82a ] + +The hyper-v transparent bonding should have used master_dev_link. +The netvsc device should look like a master bond device not +like the upper side of a tunnel. + +This makes the semantics the same so that userspace applications +looking at network devices see the correct master relationshipship. + +Fixes: 0c195567a8f6 ("netvsc: transparent VF management") +Signed-off-by: Stephen Hemminger +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/hyperv/netvsc_drv.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/hyperv/netvsc_drv.c ++++ b/drivers/net/hyperv/netvsc_drv.c +@@ -1840,7 +1840,8 @@ static int netvsc_vf_join(struct net_dev + goto rx_handler_failed; + } + +- ret = netdev_upper_dev_link(vf_netdev, ndev, NULL); ++ ret = netdev_master_upper_dev_link(vf_netdev, ndev, ++ NULL, NULL, NULL); + if (ret != 0) { + netdev_err(vf_netdev, + "can not set master device %s (err = %d)\n", diff --git a/queue-4.16/ipv4-fix-fnhe-usage-by-non-cached-routes.patch b/queue-4.16/ipv4-fix-fnhe-usage-by-non-cached-routes.patch new file mode 100644 index 00000000000..96738732014 --- /dev/null +++ b/queue-4.16/ipv4-fix-fnhe-usage-by-non-cached-routes.patch @@ -0,0 +1,229 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Julian Anastasov +Date: Wed, 2 May 2018 09:41:19 +0300 +Subject: ipv4: fix fnhe usage by non-cached routes + +From: Julian Anastasov + +[ Upstream commit 94720e3aee6884d8c8beb678001629da60ec6366 ] + +Allow some non-cached routes to use non-expired fnhe: + +1. ip_del_fnhe: moved above and now called by find_exception. +The 4.5+ commit deed49df7390 expires fnhe only when caching +routes. Change that to: + +1.1. use fnhe for non-cached local output routes, with the help +from (2) + +1.2. allow __mkroute_input to detect expired fnhe (outdated +fnhe_gw, for example) when do_cache is false, eg. when itag!=0 +for unicast destinations. + +2. __mkroute_output: keep fi to allow local routes with orig_oif != 0 +to use fnhe info even when the new route will not be cached into fnhe. +After commit 839da4d98960 ("net: ipv4: set orig_oif based on fib +result for local traffic") it means all local routes will be affected +because they are not cached. This change is used to solve a PMTU +problem with IPVS (and probably Netfilter DNAT) setups that redirect +local clients from target local IP (local route to Virtual IP) +to new remote IP target, eg. IPVS TUN real server. Loopback has +64K MTU and we need to create fnhe on the local route that will +keep the reduced PMTU for the Virtual IP. Without this change +fnhe_pmtu is updated from ICMP but never exposed to non-cached +local routes. This includes routes with flowi4_oif!=0 for 4.6+ and +with flowi4_oif=any for 4.14+). + +3. update_or_create_fnhe: make sure fnhe_expires is not 0 for +new entries + +Fixes: 839da4d98960 ("net: ipv4: set orig_oif based on fib result for local traffic") +Fixes: d6d5e999e5df ("route: do not cache fib route info on local routes with oif") +Fixes: deed49df7390 ("route: check and remove route cache when we get route") +Cc: David Ahern +Cc: Xin Long +Signed-off-by: Julian Anastasov +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 118 ++++++++++++++++++++++++------------------------------- + 1 file changed, 53 insertions(+), 65 deletions(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -710,7 +710,7 @@ static void update_or_create_fnhe(struct + fnhe->fnhe_gw = gw; + fnhe->fnhe_pmtu = pmtu; + fnhe->fnhe_mtu_locked = lock; +- fnhe->fnhe_expires = expires; ++ fnhe->fnhe_expires = max(1UL, expires); + + /* Exception created; mark the cached routes for the nexthop + * stale, so anyone caching it rechecks if this exception +@@ -1298,6 +1298,36 @@ static unsigned int ipv4_mtu(const struc + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); + } + ++static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) ++{ ++ struct fnhe_hash_bucket *hash; ++ struct fib_nh_exception *fnhe, __rcu **fnhe_p; ++ u32 hval = fnhe_hashfun(daddr); ++ ++ spin_lock_bh(&fnhe_lock); ++ ++ hash = rcu_dereference_protected(nh->nh_exceptions, ++ lockdep_is_held(&fnhe_lock)); ++ hash += hval; ++ ++ fnhe_p = &hash->chain; ++ fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); ++ while (fnhe) { ++ if (fnhe->fnhe_daddr == daddr) { ++ rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( ++ fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); ++ fnhe_flush_routes(fnhe); ++ kfree_rcu(fnhe, rcu); ++ break; ++ } ++ fnhe_p = &fnhe->fnhe_next; ++ fnhe = rcu_dereference_protected(fnhe->fnhe_next, ++ lockdep_is_held(&fnhe_lock)); ++ } ++ ++ spin_unlock_bh(&fnhe_lock); ++} ++ + static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) + { + struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions); +@@ -1311,8 +1341,14 @@ static struct fib_nh_exception *find_exc + + for (fnhe = rcu_dereference(hash[hval].chain); fnhe; + fnhe = rcu_dereference(fnhe->fnhe_next)) { +- if (fnhe->fnhe_daddr == daddr) ++ if (fnhe->fnhe_daddr == daddr) { ++ if (fnhe->fnhe_expires && ++ time_after(jiffies, fnhe->fnhe_expires)) { ++ ip_del_fnhe(nh, daddr); ++ break; ++ } + return fnhe; ++ } + } + return NULL; + } +@@ -1638,36 +1674,6 @@ static void ip_handle_martian_source(str + #endif + } + +-static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +-{ +- struct fnhe_hash_bucket *hash; +- struct fib_nh_exception *fnhe, __rcu **fnhe_p; +- u32 hval = fnhe_hashfun(daddr); +- +- spin_lock_bh(&fnhe_lock); +- +- hash = rcu_dereference_protected(nh->nh_exceptions, +- lockdep_is_held(&fnhe_lock)); +- hash += hval; +- +- fnhe_p = &hash->chain; +- fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); +- while (fnhe) { +- if (fnhe->fnhe_daddr == daddr) { +- rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( +- fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); +- fnhe_flush_routes(fnhe); +- kfree_rcu(fnhe, rcu); +- break; +- } +- fnhe_p = &fnhe->fnhe_next; +- fnhe = rcu_dereference_protected(fnhe->fnhe_next, +- lockdep_is_held(&fnhe_lock)); +- } +- +- spin_unlock_bh(&fnhe_lock); +-} +- + static void set_lwt_redirect(struct rtable *rth) + { + if (lwtunnel_output_redirect(rth->dst.lwtstate)) { +@@ -1734,20 +1740,10 @@ static int __mkroute_input(struct sk_buf + + fnhe = find_exception(&FIB_RES_NH(*res), daddr); + if (do_cache) { +- if (fnhe) { ++ if (fnhe) + rth = rcu_dereference(fnhe->fnhe_rth_input); +- if (rth && rth->dst.expires && +- time_after(jiffies, rth->dst.expires)) { +- ip_del_fnhe(&FIB_RES_NH(*res), daddr); +- fnhe = NULL; +- } else { +- goto rt_cache; +- } +- } +- +- rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); +- +-rt_cache: ++ else ++ rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rt_cache_valid(rth)) { + skb_dst_set_noref(skb, &rth->dst); + goto out; +@@ -2224,39 +2220,31 @@ static struct rtable *__mkroute_output(c + * the loopback interface and the IP_PKTINFO ipi_ifindex will + * be set to the loopback interface as well. + */ +- fi = NULL; ++ do_cache = false; + } + + fnhe = NULL; + do_cache &= fi != NULL; +- if (do_cache) { ++ if (fi) { + struct rtable __rcu **prth; + struct fib_nh *nh = &FIB_RES_NH(*res); + + fnhe = find_exception(nh, fl4->daddr); ++ if (!do_cache) ++ goto add; + if (fnhe) { + prth = &fnhe->fnhe_rth_output; +- rth = rcu_dereference(*prth); +- if (rth && rth->dst.expires && +- time_after(jiffies, rth->dst.expires)) { +- ip_del_fnhe(nh, fl4->daddr); +- fnhe = NULL; +- } else { +- goto rt_cache; ++ } else { ++ if (unlikely(fl4->flowi4_flags & ++ FLOWI_FLAG_KNOWN_NH && ++ !(nh->nh_gw && ++ nh->nh_scope == RT_SCOPE_LINK))) { ++ do_cache = false; ++ goto add; + } ++ prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); + } +- +- if (unlikely(fl4->flowi4_flags & +- FLOWI_FLAG_KNOWN_NH && +- !(nh->nh_gw && +- nh->nh_scope == RT_SCOPE_LINK))) { +- do_cache = false; +- goto add; +- } +- prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); + rth = rcu_dereference(*prth); +- +-rt_cache: + if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) + return rth; + } diff --git a/queue-4.16/ipv4-fix-memory-leaks-in-udp_sendmsg-ping_v4_sendmsg.patch b/queue-4.16/ipv4-fix-memory-leaks-in-udp_sendmsg-ping_v4_sendmsg.patch new file mode 100644 index 00000000000..c3f82576aef --- /dev/null +++ b/queue-4.16/ipv4-fix-memory-leaks-in-udp_sendmsg-ping_v4_sendmsg.patch @@ -0,0 +1,77 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Andrey Ignatov +Date: Thu, 10 May 2018 10:59:34 -0700 +Subject: ipv4: fix memory leaks in udp_sendmsg, ping_v4_sendmsg + +From: Andrey Ignatov + +[ Upstream commit 1b97013bfb11d66f041de691de6f0fec748ce016 ] + +Fix more memory leaks in ip_cmsg_send() callers. Part of them were fixed +earlier in 919483096bfe. + +* udp_sendmsg one was there since the beginning when linux sources were + first added to git; +* ping_v4_sendmsg one was copy/pasted in c319b4d76b9e. + +Whenever return happens in udp_sendmsg() or ping_v4_sendmsg() IP options +have to be freed if they were allocated previously. + +Add label so that future callers (if any) can use it instead of kfree() +before return that is easy to forget. + +Fixes: c319b4d76b9e (net: ipv4: add IPPROTO_ICMP socket kind) +Signed-off-by: Andrey Ignatov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ping.c | 7 +++++-- + net/ipv4/udp.c | 7 +++++-- + 2 files changed, 10 insertions(+), 4 deletions(-) + +--- a/net/ipv4/ping.c ++++ b/net/ipv4/ping.c +@@ -775,8 +775,10 @@ static int ping_v4_sendmsg(struct sock * + ipc.addr = faddr = daddr; + + if (ipc.opt && ipc.opt->opt.srr) { +- if (!daddr) +- return -EINVAL; ++ if (!daddr) { ++ err = -EINVAL; ++ goto out_free; ++ } + faddr = ipc.opt->opt.faddr; + } + tos = get_rttos(&ipc, inet); +@@ -842,6 +844,7 @@ back_from_confirm: + + out: + ip_rt_put(rt); ++out_free: + if (free) + kfree(ipc.opt); + if (!err) { +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -958,8 +958,10 @@ int udp_sendmsg(struct sock *sk, struct + sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags); + + if (ipc.opt && ipc.opt->opt.srr) { +- if (!daddr) +- return -EINVAL; ++ if (!daddr) { ++ err = -EINVAL; ++ goto out_free; ++ } + faddr = ipc.opt->opt.faddr; + connected = 0; + } +@@ -1080,6 +1082,7 @@ do_append_data: + + out: + ip_rt_put(rt); ++out_free: + if (free) + kfree(ipc.opt); + if (!err) diff --git a/queue-4.16/ipv4-reset-fnhe_mtu_locked-after-cache-route-flushed.patch b/queue-4.16/ipv4-reset-fnhe_mtu_locked-after-cache-route-flushed.patch new file mode 100644 index 00000000000..cf47aa55994 --- /dev/null +++ b/queue-4.16/ipv4-reset-fnhe_mtu_locked-after-cache-route-flushed.patch @@ -0,0 +1,34 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Hangbin Liu +Date: Wed, 9 May 2018 18:06:44 +0800 +Subject: ipv4: reset fnhe_mtu_locked after cache route flushed + +From: Hangbin Liu + +[ Upstream commit 0e8411e426e277f55bd21e287ec89fab6f8eacae ] + +After route cache is flushed via ipv4_sysctl_rtcache_flush(), we forget +to reset fnhe_mtu_locked in rt_bind_exception(). When pmtu is updated +in __ip_rt_update_pmtu(), it will return directly since the pmtu is +still locked. e.g. + ++ ip netns exec client ping 10.10.1.1 -c 1 -s 1400 -M do +PING 10.10.1.1 (10.10.1.1) 1400(1428) bytes of data. +>From 10.10.0.254 icmp_seq=1 Frag needed and DF set (mtu = 0) + +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -1376,6 +1376,7 @@ static bool rt_bind_exception(struct rta + fnhe->fnhe_gw = 0; + fnhe->fnhe_pmtu = 0; + fnhe->fnhe_expires = 0; ++ fnhe->fnhe_mtu_locked = false; + fnhe_flush_routes(fnhe); + orig = NULL; + } diff --git a/queue-4.16/ipv6-fix-uninit-value-in-ip6_multipath_l3_keys.patch b/queue-4.16/ipv6-fix-uninit-value-in-ip6_multipath_l3_keys.patch new file mode 100644 index 00000000000..383dffc6c31 --- /dev/null +++ b/queue-4.16/ipv6-fix-uninit-value-in-ip6_multipath_l3_keys.patch @@ -0,0 +1,77 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Eric Dumazet +Date: Sun, 29 Apr 2018 09:54:59 -0700 +Subject: ipv6: fix uninit-value in ip6_multipath_l3_keys() + +From: Eric Dumazet + +[ Upstream commit cea67a2dd6b2419dcc13a39309b9a79a1f773193 ] + +syzbot/KMSAN reported an uninit-value in ip6_multipath_l3_keys(), +root caused to a bad assumption of ICMP header being already +pulled in skb->head + +ip_multipath_l3_keys() does the correct thing, so it is an IPv6 only bug. + +BUG: KMSAN: uninit-value in ip6_multipath_l3_keys net/ipv6/route.c:1830 [inline] +BUG: KMSAN: uninit-value in rt6_multipath_hash+0x5c4/0x640 net/ipv6/route.c:1858 +CPU: 0 PID: 4507 Comm: syz-executor661 Not tainted 4.16.0+ #87 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:17 [inline] + dump_stack+0x185/0x1d0 lib/dump_stack.c:53 + kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 + __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 + ip6_multipath_l3_keys net/ipv6/route.c:1830 [inline] + rt6_multipath_hash+0x5c4/0x640 net/ipv6/route.c:1858 + ip6_route_input+0x65a/0x920 net/ipv6/route.c:1884 + ip6_rcv_finish+0x413/0x6e0 net/ipv6/ip6_input.c:69 + NF_HOOK include/linux/netfilter.h:288 [inline] + ipv6_rcv+0x1e16/0x2340 net/ipv6/ip6_input.c:208 + __netif_receive_skb_core+0x47df/0x4a90 net/core/dev.c:4562 + __netif_receive_skb net/core/dev.c:4627 [inline] + netif_receive_skb_internal+0x49d/0x630 net/core/dev.c:4701 + netif_receive_skb+0x230/0x240 net/core/dev.c:4725 + tun_rx_batched drivers/net/tun.c:1555 [inline] + tun_get_user+0x740f/0x7c60 drivers/net/tun.c:1962 + tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990 + call_write_iter include/linux/fs.h:1782 [inline] + new_sync_write fs/read_write.c:469 [inline] + __vfs_write+0x7fb/0x9f0 fs/read_write.c:482 + vfs_write+0x463/0x8d0 fs/read_write.c:544 + SYSC_write+0x172/0x360 fs/read_write.c:589 + SyS_write+0x55/0x80 fs/read_write.c:581 + do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 + +Fixes: 23aebdacb05d ("ipv6: Compute multipath hash for ICMP errors from offending packet") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Jakub Sitnicki +Acked-by: Jakub Sitnicki +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1822,11 +1822,16 @@ static void ip6_multipath_l3_keys(const + const struct ipv6hdr *inner_iph; + const struct icmp6hdr *icmph; + struct ipv6hdr _inner_iph; ++ struct icmp6hdr _icmph; + + if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6)) + goto out; + +- icmph = icmp6_hdr(skb); ++ icmph = skb_header_pointer(skb, skb_transport_offset(skb), ++ sizeof(_icmph), &_icmph); ++ if (!icmph) ++ goto out; ++ + if (icmph->icmp6_type != ICMPV6_DEST_UNREACH && + icmph->icmp6_type != ICMPV6_PKT_TOOBIG && + icmph->icmp6_type != ICMPV6_TIME_EXCEED && diff --git a/queue-4.16/llc-better-deal-with-too-small-mtu.patch b/queue-4.16/llc-better-deal-with-too-small-mtu.patch new file mode 100644 index 00000000000..9e0c326a22a --- /dev/null +++ b/queue-4.16/llc-better-deal-with-too-small-mtu.patch @@ -0,0 +1,80 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Eric Dumazet +Date: Mon, 7 May 2018 09:02:25 -0700 +Subject: llc: better deal with too small mtu + +From: Eric Dumazet + +[ Upstream commit 2c5d5b13c6eb79f5677e206b8aad59b3a2097f60 ] + +syzbot loves to set very small mtu on devices, since it brings joy. +We must make llc_ui_sendmsg() fool proof. + +usercopy: Kernel memory overwrite attempt detected to wrapped address (offset 0, size 18446612139802320068)! + +kernel BUG at mm/usercopy.c:100! +invalid opcode: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 0 PID: 17464 Comm: syz-executor1 Not tainted 4.17.0-rc3+ #36 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:usercopy_abort+0xbb/0xbd mm/usercopy.c:88 +RSP: 0018:ffff8801868bf800 EFLAGS: 00010282 +RAX: 000000000000006c RBX: ffffffff87d2fb00 RCX: 0000000000000000 +RDX: 000000000000006c RSI: ffffffff81610731 RDI: ffffed0030d17ef6 +RBP: ffff8801868bf858 R08: ffff88018daa4200 R09: ffffed003b5c4fb0 +R10: ffffed003b5c4fb0 R11: ffff8801dae27d87 R12: ffffffff87d2f8e0 +R13: ffffffff87d2f7a0 R14: ffffffff87d2f7a0 R15: ffffffff87d2f7a0 +FS: 00007f56a14ac700(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000001b2bc21000 CR3: 00000001abeb1000 CR4: 00000000001426f0 +DR0: 0000000020000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000030602 +Call Trace: + check_bogus_address mm/usercopy.c:153 [inline] + __check_object_size+0x5d9/0x5d9 mm/usercopy.c:256 + check_object_size include/linux/thread_info.h:108 [inline] + check_copy_size include/linux/thread_info.h:139 [inline] + copy_from_iter_full include/linux/uio.h:121 [inline] + memcpy_from_msg include/linux/skbuff.h:3305 [inline] + llc_ui_sendmsg+0x4b1/0x1530 net/llc/af_llc.c:941 + sock_sendmsg_nosec net/socket.c:629 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:639 + __sys_sendto+0x3d7/0x670 net/socket.c:1789 + __do_sys_sendto net/socket.c:1801 [inline] + __se_sys_sendto net/socket.c:1797 [inline] + __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1797 + do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x49/0xbe +RIP: 0033:0x455979 +RSP: 002b:00007f56a14abc68 EFLAGS: 00000246 ORIG_RAX: 000000000000002c +RAX: ffffffffffffffda RBX: 00007f56a14ac6d4 RCX: 0000000000455979 +RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000018 +RBP: 000000000072bea0 R08: 00000000200012c0 R09: 0000000000000010 +R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff +R13: 0000000000000548 R14: 00000000006fbf60 R15: 0000000000000000 +Code: 55 c0 e8 c0 55 bb ff ff 75 c8 48 8b 55 c0 4d 89 f9 ff 75 d0 4d 89 e8 48 89 d9 4c 89 e6 41 56 48 c7 c7 80 fa d2 87 e8 a0 0b a3 ff <0f> 0b e8 95 55 bb ff e8 c0 a8 f7 ff 8b 95 14 ff ff ff 4d 89 e8 +RIP: usercopy_abort+0xbb/0xbd mm/usercopy.c:88 RSP: ffff8801868bf800 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/llc/af_llc.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/llc/af_llc.c ++++ b/net/llc/af_llc.c +@@ -930,6 +930,9 @@ static int llc_ui_sendmsg(struct socket + if (size > llc->dev->mtu) + size = llc->dev->mtu; + copied = size - hdrlen; ++ rc = -EINVAL; ++ if (copied < 0) ++ goto release; + release_sock(sk); + skb = sock_alloc_send_skb(sk, size, noblock, &rc); + lock_sock(sk); diff --git a/queue-4.16/mlxsw-core-fix-an-error-handling-path-in-mlxsw_core_bus_device_register.patch b/queue-4.16/mlxsw-core-fix-an-error-handling-path-in-mlxsw_core_bus_device_register.patch new file mode 100644 index 00000000000..a80125cc2f0 --- /dev/null +++ b/queue-4.16/mlxsw-core-fix-an-error-handling-path-in-mlxsw_core_bus_device_register.patch @@ -0,0 +1,40 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Christophe JAILLET +Date: Thu, 10 May 2018 13:26:16 +0200 +Subject: mlxsw: core: Fix an error handling path in 'mlxsw_core_bus_device_register()' + +From: Christophe JAILLET + +[ Upstream commit 8ccc113172e4c1ebef45c2433f3c32ed6ae1b9c9 ] + +Resources are not freed in the reverse order of the allocation. +Labels are also mixed-up. + +Fix it and reorder code and labels in the error handling path of +'mlxsw_core_bus_device_register()' + +Fixes: ef3116e5403e ("mlxsw: spectrum: Register KVD resources with devlink") +Signed-off-by: Christophe JAILLET +Reviewed-by: Ido Schimmel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/core.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/core.c +@@ -1099,11 +1099,11 @@ err_emad_init: + err_alloc_lag_mapping: + mlxsw_ports_fini(mlxsw_core); + err_ports_init: +- mlxsw_bus->fini(bus_priv); +-err_bus_init: + if (!reload) + devlink_resources_unregister(devlink, NULL); + err_register_resources: ++ mlxsw_bus->fini(bus_priv); ++err_bus_init: + if (!reload) + devlink_free(devlink); + err_devlink_alloc: diff --git a/queue-4.16/mlxsw-spectrum_switchdev-do-not-remove-mrouter-port-from-mdb-s-ports-list.patch b/queue-4.16/mlxsw-spectrum_switchdev-do-not-remove-mrouter-port-from-mdb-s-ports-list.patch new file mode 100644 index 00000000000..ddee78b272c --- /dev/null +++ b/queue-4.16/mlxsw-spectrum_switchdev-do-not-remove-mrouter-port-from-mdb-s-ports-list.patch @@ -0,0 +1,51 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Ido Schimmel +Date: Thu, 26 Apr 2018 11:46:29 +0300 +Subject: mlxsw: spectrum_switchdev: Do not remove mrouter port from MDB's ports list + +From: Ido Schimmel + +[ Upstream commit c7f46cca8c73a44311e4164b9196b4d791f59ac7 ] + +When IGMP snooping is enabled on a bridge, traffic forwarded by an MDB +entry should be sent to both ports member in the MDB's ports list and +mrouter ports. + +In case a port needs to be removed from an MDB's ports list, but this +port is also configured as an mrouter port, then do not update the +device so that it will continue to forward traffic through that port. + +Fix a copy-paste error that checked that IGMP snooping is enabled twice +instead of checking the port's mrouter state. + +Fixes: ded711c87a04 ("mlxsw: spectrum_switchdev: Consider mrouter status for mdb changes") +Signed-off-by: Ido Schimmel +Reported-by: Colin King +Reviewed-by: Nogah Frankel +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 12 +++++------- + 1 file changed, 5 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +@@ -1718,13 +1718,11 @@ __mlxsw_sp_port_mdb_del(struct mlxsw_sp_ + struct net_device *dev = mlxsw_sp_port->dev; + int err; + +- if (bridge_port->bridge_device->multicast_enabled) { +- if (bridge_port->bridge_device->multicast_enabled) { +- err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, +- false); +- if (err) +- netdev_err(dev, "Unable to remove port from SMID\n"); +- } ++ if (bridge_port->bridge_device->multicast_enabled && ++ !bridge_port->mrouter) { ++ err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); ++ if (err) ++ netdev_err(dev, "Unable to remove port from SMID\n"); + } + + err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); diff --git a/queue-4.16/net-ethernet-sun-niu-set-correct-packet-size-in-skb.patch b/queue-4.16/net-ethernet-sun-niu-set-correct-packet-size-in-skb.patch new file mode 100644 index 00000000000..c36c9e354f3 --- /dev/null +++ b/queue-4.16/net-ethernet-sun-niu-set-correct-packet-size-in-skb.patch @@ -0,0 +1,58 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Rob Taglang +Date: Thu, 3 May 2018 17:13:06 -0400 +Subject: net: ethernet: sun: niu set correct packet size in skb + +From: Rob Taglang + +[ Upstream commit 14224923c3600bae2ac4dcae3bf0c3d4dc2812be ] + +Currently, skb->len and skb->data_len are set to the page size, not +the packet size. This causes the frame check sequence to not be +located at the "end" of the packet resulting in ethernet frame check +errors. The driver does work currently, but stricter kernel facing +networking solutions like OpenVSwitch will drop these packets as +invalid. + +These changes set the packet size correctly so that these errors no +longer occur. The length does not include the frame check sequence, so +that subtraction was removed. + +Tested on Oracle/SUN Multithreaded 10-Gigabit Ethernet Network +Controller [108e:abcd] and validated in wireshark. + +Signed-off-by: Rob Taglang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/sun/niu.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/sun/niu.c ++++ b/drivers/net/ethernet/sun/niu.c +@@ -3443,7 +3443,7 @@ static int niu_process_rx_pkt(struct nap + + len = (val & RCR_ENTRY_L2_LEN) >> + RCR_ENTRY_L2_LEN_SHIFT; +- len -= ETH_FCS_LEN; ++ append_size = len + ETH_HLEN + ETH_FCS_LEN; + + addr = (val & RCR_ENTRY_PKT_BUF_ADDR) << + RCR_ENTRY_PKT_BUF_ADDR_SHIFT; +@@ -3453,7 +3453,6 @@ static int niu_process_rx_pkt(struct nap + RCR_ENTRY_PKTBUFSZ_SHIFT]; + + off = addr & ~PAGE_MASK; +- append_size = rcr_size; + if (num_rcr == 1) { + int ptype; + +@@ -3466,7 +3465,7 @@ static int niu_process_rx_pkt(struct nap + else + skb_checksum_none_assert(skb); + } else if (!(val & RCR_ENTRY_MULTI)) +- append_size = len - skb->len; ++ append_size = append_size - skb->len; + + niu_rx_skb_append(skb, page, off, append_size, rcr_size); + if ((page->index + rp->rbr_block_size) - rcr_size == addr) { diff --git a/queue-4.16/net-ethernet-ti-cpsw-fix-packet-leaking-in-dual_mac-mode.patch b/queue-4.16/net-ethernet-ti-cpsw-fix-packet-leaking-in-dual_mac-mode.patch new file mode 100644 index 00000000000..814dcd173b3 --- /dev/null +++ b/queue-4.16/net-ethernet-ti-cpsw-fix-packet-leaking-in-dual_mac-mode.patch @@ -0,0 +1,46 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Grygorii Strashko +Date: Tue, 1 May 2018 12:41:22 -0500 +Subject: net: ethernet: ti: cpsw: fix packet leaking in dual_mac mode + +From: Grygorii Strashko + +[ Upstream commit 5e5add172ea81152d518b161ec5706503ad3d799 ] + +In dual_mac mode packets arrived on one port should not be forwarded by +switch hw to another port. Only Linux Host can forward packets between +ports. The below test case (reported in [1]) shows that packet arrived on +one port can be leaked to anoter (reproducible with dual port evms): + - connect port 1 (eth0) to linux Host 0 and run tcpdump or Wireshark + - connect port 2 (eth1) to linux Host 1 with vlan 1 configured + - ping from Host 1 through vlan 1 interface. +ARP packets will be seen on Host 0. + +Issue happens because dual_mac mode is implemnted using two vlans: 1 (Port +1+Port 0) and 2 (Port 2+Port 0), so there are vlan records created for for +each vlan. By default, the ALE will find valid vlan record in its table +when vlan 1 tagged packet arrived on Port 2 and so forwards packet to all +ports which are vlan 1 members (like Port. + +To avoid such behaviorr the ALE VLAN ID Ingress Check need to be enabled +for each external CPSW port (ALE_PORTCTLn.VID_INGRESS_CHECK) so ALE will +drop ingress packets if Rx port is not VLAN member. + +Signed-off-by: Grygorii Strashko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/ti/cpsw.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/ethernet/ti/cpsw.c ++++ b/drivers/net/ethernet/ti/cpsw.c +@@ -1278,6 +1278,8 @@ static inline void cpsw_add_dual_emac_de + cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr, + HOST_PORT_NUM, ALE_VLAN | + ALE_SECURE, slave->port_vlan); ++ cpsw_ale_control_set(cpsw->ale, slave_port, ++ ALE_PORT_DROP_UNKNOWN_VLAN, 1); + } + + static void soft_reset_slave(struct cpsw_slave *slave) diff --git a/queue-4.16/net-mlx4_en-fix-an-error-handling-path-in-mlx4_en_init_netdev.patch b/queue-4.16/net-mlx4_en-fix-an-error-handling-path-in-mlx4_en_init_netdev.patch new file mode 100644 index 00000000000..5383afb99dd --- /dev/null +++ b/queue-4.16/net-mlx4_en-fix-an-error-handling-path-in-mlx4_en_init_netdev.patch @@ -0,0 +1,55 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Christophe JAILLET +Date: Thu, 10 May 2018 09:06:04 +0200 +Subject: net/mlx4_en: Fix an error handling path in 'mlx4_en_init_netdev()' + +From: Christophe JAILLET + +[ Upstream commit a577d868b768a3baf16cdd4841ab8cfb165521d6 ] + +If an error occurs, 'mlx4_en_destroy_netdev()' is called. +It then calls 'mlx4_en_free_resources()' which does the needed resources +cleanup. + +So, doing some explicit kfree in the error handling path would lead to +some double kfree. + +Simplify code to avoid such a case. + +Fixes: 67f8b1dcb9ee ("net/mlx4_en: Refactor the XDP forwarding rings scheme") +Signed-off-by: Christophe JAILLET +Reviewed-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +@@ -3320,12 +3320,11 @@ int mlx4_en_init_netdev(struct mlx4_en_d + MAX_TX_RINGS, GFP_KERNEL); + if (!priv->tx_ring[t]) { + err = -ENOMEM; +- goto err_free_tx; ++ goto out; + } + priv->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) * + MAX_TX_RINGS, GFP_KERNEL); + if (!priv->tx_cq[t]) { +- kfree(priv->tx_ring[t]); + err = -ENOMEM; + goto out; + } +@@ -3578,11 +3577,6 @@ int mlx4_en_init_netdev(struct mlx4_en_d + + return 0; + +-err_free_tx: +- while (t--) { +- kfree(priv->tx_ring[t]); +- kfree(priv->tx_cq[t]); +- } + out: + mlx4_en_destroy_netdev(dev); + return err; diff --git a/queue-4.16/net-mlx4_en-verify-coalescing-parameters-are-in-range.patch b/queue-4.16/net-mlx4_en-verify-coalescing-parameters-are-in-range.patch new file mode 100644 index 00000000000..ab1f1d9ecd8 --- /dev/null +++ b/queue-4.16/net-mlx4_en-verify-coalescing-parameters-are-in-range.patch @@ -0,0 +1,79 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Moshe Shemesh +Date: Wed, 9 May 2018 18:35:13 +0300 +Subject: net/mlx4_en: Verify coalescing parameters are in range + +From: Moshe Shemesh + +[ Upstream commit 6ad4e91c6d796b38a7f0e724db1de28eeb122bad ] + +Add check of coalescing parameters received through ethtool are within +range of values supported by the HW. +Driver gets the coalescing rx/tx-usecs and rx/tx-frames as set by the +users through ethtool. The ethtool support up to 32 bit value for each. +However, mlx4 modify cq limits the coalescing time parameter and +coalescing frames parameters to 16 bits. +Return out of range error if user tries to set these parameters to +higher values. +Change type of sample-interval and adaptive_rx_coal parameters in mlx4 +driver to u32 as the ethtool holds them as u32 and these parameters are +not limited due to mlx4 HW. + +Fixes: c27a02cd94d6 ('mlx4_en: Add driver for Mellanox ConnectX 10GbE NIC') +Signed-off-by: Moshe Shemesh +Signed-off-by: Tariq Toukan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 16 ++++++++++++++++ + drivers/net/ethernet/mellanox/mlx4/mlx4_en.h | 7 +++++-- + 2 files changed, 21 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c ++++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +@@ -1013,6 +1013,22 @@ static int mlx4_en_set_coalesce(struct n + if (!coal->tx_max_coalesced_frames_irq) + return -EINVAL; + ++ if (coal->tx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME || ++ coal->rx_coalesce_usecs > MLX4_EN_MAX_COAL_TIME || ++ coal->rx_coalesce_usecs_low > MLX4_EN_MAX_COAL_TIME || ++ coal->rx_coalesce_usecs_high > MLX4_EN_MAX_COAL_TIME) { ++ netdev_info(dev, "%s: maximum coalesce time supported is %d usecs\n", ++ __func__, MLX4_EN_MAX_COAL_TIME); ++ return -ERANGE; ++ } ++ ++ if (coal->tx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS || ++ coal->rx_max_coalesced_frames > MLX4_EN_MAX_COAL_PKTS) { ++ netdev_info(dev, "%s: maximum coalesced frames supported is %d\n", ++ __func__, MLX4_EN_MAX_COAL_PKTS); ++ return -ERANGE; ++ } ++ + priv->rx_frames = (coal->rx_max_coalesced_frames == + MLX4_EN_AUTO_CONF) ? + MLX4_EN_RX_COAL_TARGET : +--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h ++++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +@@ -132,6 +132,9 @@ + #define MLX4_EN_TX_COAL_PKTS 16 + #define MLX4_EN_TX_COAL_TIME 0x10 + ++#define MLX4_EN_MAX_COAL_PKTS U16_MAX ++#define MLX4_EN_MAX_COAL_TIME U16_MAX ++ + #define MLX4_EN_RX_RATE_LOW 400000 + #define MLX4_EN_RX_COAL_TIME_LOW 0 + #define MLX4_EN_RX_RATE_HIGH 450000 +@@ -552,8 +555,8 @@ struct mlx4_en_priv { + u16 rx_usecs_low; + u32 pkt_rate_high; + u16 rx_usecs_high; +- u16 sample_interval; +- u16 adaptive_rx_coal; ++ u32 sample_interval; ++ u32 adaptive_rx_coal; + u32 msg_enable; + u32 loopback_ok; + u32 validate_loopback; diff --git a/queue-4.16/net-mlx5-avoid-cleaning-flow-steering-table-twice-during-error-flow.patch b/queue-4.16/net-mlx5-avoid-cleaning-flow-steering-table-twice-during-error-flow.patch new file mode 100644 index 00000000000..fbba0f37f3c --- /dev/null +++ b/queue-4.16/net-mlx5-avoid-cleaning-flow-steering-table-twice-during-error-flow.patch @@ -0,0 +1,72 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Talat Batheesh +Date: Sun, 15 Apr 2018 11:26:19 +0300 +Subject: net/mlx5: Avoid cleaning flow steering table twice during error flow + +From: Talat Batheesh + +[ Upstream commit 9c26f5f89d01ca21560c6b8a8e4054c271cc3a9c ] + +When we fail to initialize the RX root namespace, we need +to clean only that and not the entire flow steering. + +Currently the code may try to clean the flow steering twice +on error witch leads to null pointer deference. +Make sure we clean correctly. + +Fixes: fba53f7b5719 ("net/mlx5: Introduce mlx5_flow_steering structure") +Signed-off-by: Talat Batheesh +Reviewed-by: Mark Bloch +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 23 +++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +@@ -185,6 +185,7 @@ static void del_sw_ns(struct fs_node *no + static void del_sw_hw_rule(struct fs_node *node); + static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, + struct mlx5_flow_destination *d2); ++static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns); + static struct mlx5_flow_rule * + find_flow_rule(struct fs_fte *fte, + struct mlx5_flow_destination *dest); +@@ -2329,23 +2330,27 @@ static int create_anchor_flow_table(stru + + static int init_root_ns(struct mlx5_flow_steering *steering) + { ++ int err; ++ + steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); + if (!steering->root_ns) +- goto cleanup; ++ return -ENOMEM; + +- if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node)) +- goto cleanup; ++ err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node); ++ if (err) ++ goto out_err; + + set_prio_attrs(steering->root_ns); +- +- if (create_anchor_flow_table(steering)) +- goto cleanup; ++ err = create_anchor_flow_table(steering); ++ if (err) ++ goto out_err; + + return 0; + +-cleanup: +- mlx5_cleanup_fs(steering->dev); +- return -ENOMEM; ++out_err: ++ cleanup_root_ns(steering->root_ns); ++ steering->root_ns = NULL; ++ return err; + } + + static void clean_tree(struct fs_node *node) diff --git a/queue-4.16/net-mlx5-e-switch-include-vf-rdma-stats-in-vport-statistics.patch b/queue-4.16/net-mlx5-e-switch-include-vf-rdma-stats-in-vport-statistics.patch new file mode 100644 index 00000000000..9c86b8e5896 --- /dev/null +++ b/queue-4.16/net-mlx5-e-switch-include-vf-rdma-stats-in-vport-statistics.patch @@ -0,0 +1,61 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Adi Nissim +Date: Wed, 25 Apr 2018 11:21:32 +0300 +Subject: net/mlx5: E-Switch, Include VF RDMA stats in vport statistics + +From: Adi Nissim + +[ Upstream commit 88d725bbb43cd63a40c8ef70dd373f1d38ead2e3 ] + +The host side reporting of VF vport statistics didn't include the VF +RDMA traffic. + +Fixes: 3b751a2a418a ("net/mlx5: E-Switch, Introduce get vf statistics") +Signed-off-by: Adi Nissim +Reported-by: Ariel Almog +Reviewed-by: Or Gerlitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +@@ -2143,26 +2143,35 @@ int mlx5_eswitch_get_vport_stats(struct + memset(vf_stats, 0, sizeof(*vf_stats)); + vf_stats->rx_packets = + MLX5_GET_CTR(out, received_eth_unicast.packets) + ++ MLX5_GET_CTR(out, received_ib_unicast.packets) + + MLX5_GET_CTR(out, received_eth_multicast.packets) + ++ MLX5_GET_CTR(out, received_ib_multicast.packets) + + MLX5_GET_CTR(out, received_eth_broadcast.packets); + + vf_stats->rx_bytes = + MLX5_GET_CTR(out, received_eth_unicast.octets) + ++ MLX5_GET_CTR(out, received_ib_unicast.octets) + + MLX5_GET_CTR(out, received_eth_multicast.octets) + ++ MLX5_GET_CTR(out, received_ib_multicast.octets) + + MLX5_GET_CTR(out, received_eth_broadcast.octets); + + vf_stats->tx_packets = + MLX5_GET_CTR(out, transmitted_eth_unicast.packets) + ++ MLX5_GET_CTR(out, transmitted_ib_unicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.packets) + ++ MLX5_GET_CTR(out, transmitted_ib_multicast.packets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.packets); + + vf_stats->tx_bytes = + MLX5_GET_CTR(out, transmitted_eth_unicast.octets) + ++ MLX5_GET_CTR(out, transmitted_ib_unicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_multicast.octets) + ++ MLX5_GET_CTR(out, transmitted_ib_multicast.octets) + + MLX5_GET_CTR(out, transmitted_eth_broadcast.octets); + + vf_stats->multicast = +- MLX5_GET_CTR(out, received_eth_multicast.packets); ++ MLX5_GET_CTR(out, received_eth_multicast.packets) + ++ MLX5_GET_CTR(out, received_ib_multicast.packets); + + vf_stats->broadcast = + MLX5_GET_CTR(out, received_eth_broadcast.packets); diff --git a/queue-4.16/net-mlx5-fix-mlx5_get_vector_affinity-function.patch b/queue-4.16/net-mlx5-fix-mlx5_get_vector_affinity-function.patch new file mode 100644 index 00000000000..7fbc0671842 --- /dev/null +++ b/queue-4.16/net-mlx5-fix-mlx5_get_vector_affinity-function.patch @@ -0,0 +1,72 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Israel Rukshin +Date: Thu, 12 Apr 2018 09:49:11 +0000 +Subject: net/mlx5: Fix mlx5_get_vector_affinity function + +From: Israel Rukshin + +[ Upstream commit 6082d9c9c94a408d7409b5f2e4e42ac9e8b16d0d ] + +Adding the vector offset when calling to mlx5_vector2eqn() is wrong. +This is because mlx5_vector2eqn() checks if EQ index is equal to vector number +and the fact that the internal completion vectors that mlx5 allocates +don't get an EQ index. + +The second problem here is that using effective_affinity_mask gives the same +CPU for different vectors. +This leads to unmapped queues when calling it from blk_mq_rdma_map_queues(). +This doesn't happen when using affinity_hint mask. + +Fixes: 2572cf57d75a ("mlx5: fix mlx5_get_vector_affinity to start from completion vector 0") +Fixes: 05e0cc84e00c ("net/mlx5: Fix get vector affinity helper function") +Signed-off-by: Israel Rukshin +Reviewed-by: Max Gurtovoy +Reviewed-by: Sagi Grimberg +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/main.c | 2 +- + include/linux/mlx5/driver.h | 12 +++--------- + 2 files changed, 4 insertions(+), 10 deletions(-) + +--- a/drivers/infiniband/hw/mlx5/main.c ++++ b/drivers/infiniband/hw/mlx5/main.c +@@ -4303,7 +4303,7 @@ mlx5_ib_get_vector_affinity(struct ib_de + { + struct mlx5_ib_dev *dev = to_mdev(ibdev); + +- return mlx5_get_vector_affinity(dev->mdev, comp_vector); ++ return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector); + } + + /* The mlx5_ib_multiport_mutex should be held when calling this function */ +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -1269,25 +1269,19 @@ enum { + }; + + static inline const struct cpumask * +-mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector) ++mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) + { +- const struct cpumask *mask; + struct irq_desc *desc; + unsigned int irq; + int eqn; + int err; + +- err = mlx5_vector2eqn(dev, MLX5_EQ_VEC_COMP_BASE + vector, &eqn, &irq); ++ err = mlx5_vector2eqn(dev, vector, &eqn, &irq); + if (err) + return NULL; + + desc = irq_to_desc(irq); +-#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK +- mask = irq_data_get_effective_affinity_mask(&desc->irq_data); +-#else +- mask = desc->irq_common_data.affinity; +-#endif +- return mask; ++ return desc->affinity_hint; + } + + #endif /* MLX5_DRIVER_H */ diff --git a/queue-4.16/net-mlx5e-allow-offloading-ipv4-header-re-write-for-icmp.patch b/queue-4.16/net-mlx5e-allow-offloading-ipv4-header-re-write-for-icmp.patch new file mode 100644 index 00000000000..499ab64c253 --- /dev/null +++ b/queue-4.16/net-mlx5e-allow-offloading-ipv4-header-re-write-for-icmp.patch @@ -0,0 +1,34 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Jianbo Liu +Date: Tue, 27 Mar 2018 09:22:16 +0000 +Subject: net/mlx5e: Allow offloading ipv4 header re-write for icmp + +From: Jianbo Liu + +[ Upstream commit 1ccef350db2f13715040a10df77ae672206004cf ] + +For ICMPv4, the checksum is calculated from the ICMP headers and data. +Since the ICMPv4 checksum doesn't cover the IP header, we can allow to +do L3 header re-write for this protocol. + +Fixes: bdd66ac0aeed ('net/mlx5e: Disallow TC offloading of unsupported match/action combinations') +Signed-off-by: Jianbo Liu +Reviewed-by: Or Gerlitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -1867,7 +1867,8 @@ static bool modify_header_match_supporte + } + + ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); +- if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) { ++ if (modify_ip_header && ip_proto != IPPROTO_TCP && ++ ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { + pr_info("can't offload re-write of ip proto %d\n", ip_proto); + return false; + } diff --git a/queue-4.16/net-mlx5e-dcbnl-fix-min-inline-header-size-for-dscp.patch b/queue-4.16/net-mlx5e-dcbnl-fix-min-inline-header-size-for-dscp.patch new file mode 100644 index 00000000000..a9b55fac77d --- /dev/null +++ b/queue-4.16/net-mlx5e-dcbnl-fix-min-inline-header-size-for-dscp.patch @@ -0,0 +1,44 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Huy Nguyen +Date: Thu, 21 Dec 2017 15:15:24 -0600 +Subject: net/mlx5e: DCBNL fix min inline header size for dscp + +From: Huy Nguyen + +[ Upstream commit 35f80acb24cd53dabd65e0660e46afdf5c45991d ] + +When the trust state is set to dscp and the netdev is down, the inline +header size is not updated. When netdev is up, the inline header size +stays at L2 instead of IP. + +Fix this issue by updating the private parameter when the netdev is in +down so that when netdev is up, it picks up the right header size. + +Fixes: fbcb127e89ba ("net/mlx5e: Support DSCP trust state ...") +Signed-off-by: Huy Nguyen +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +@@ -1007,12 +1007,14 @@ static void mlx5e_trust_update_sq_inline + + mutex_lock(&priv->state_lock); + +- if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) +- goto out; +- + new_channels.params = priv->channels.params; + mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params); + ++ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { ++ priv->channels.params = new_channels.params; ++ goto out; ++ } ++ + /* Skip if tx_min_inline is the same */ + if (new_channels.params.tx_min_inline_mode == + priv->channels.params.tx_min_inline_mode) diff --git a/queue-4.16/net-mlx5e-err-if-asked-to-offload-tc-match-on-frag-being-first.patch b/queue-4.16/net-mlx5e-err-if-asked-to-offload-tc-match-on-frag-being-first.patch new file mode 100644 index 00000000000..cad0055a19c --- /dev/null +++ b/queue-4.16/net-mlx5e-err-if-asked-to-offload-tc-match-on-frag-being-first.patch @@ -0,0 +1,34 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Roi Dayan +Date: Thu, 22 Mar 2018 18:51:37 +0200 +Subject: net/mlx5e: Err if asked to offload TC match on frag being first + +From: Roi Dayan + +[ Upstream commit f85900c3e13fdb61f040c9feecbcda601e0cdcfb ] + +The HW doesn't support matching on frag first/later, return error if we are +asked to offload that. + +Fixes: 3f7d0eb42d59 ("net/mlx5e: Offload TC matching on packets being IP fragments") +Signed-off-by: Roi Dayan +Reviewed-by: Or Gerlitz +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -1260,6 +1260,10 @@ static int __parse_cls_flower(struct mlx + f->mask); + addr_type = key->addr_type; + ++ /* the HW doesn't support frag first/later */ ++ if (mask->flags & FLOW_DIS_FIRST_FRAG) ++ return -EOPNOTSUPP; ++ + if (mask->flags & FLOW_DIS_IS_FRAGMENT) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, diff --git a/queue-4.16/net-mlx5e-tx-use-correct-counter-in-dma_map-error-flow.patch b/queue-4.16/net-mlx5e-tx-use-correct-counter-in-dma_map-error-flow.patch new file mode 100644 index 00000000000..c0c872e04f4 --- /dev/null +++ b/queue-4.16/net-mlx5e-tx-use-correct-counter-in-dma_map-error-flow.patch @@ -0,0 +1,94 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Tariq Toukan +Date: Tue, 20 Mar 2018 18:17:25 +0200 +Subject: net/mlx5e: TX, Use correct counter in dma_map error flow + +From: Tariq Toukan + +[ Upstream commit d9a96ec362e3da878c378854e25321c85bac52c2 ] + +In case of a dma_mapping_error, do not use wi->num_dma +as a parameter for dma unmap function because it's yet +to be set, and holds an out-of-date value. +Use actual value (local variable num_dma) instead. + +Fixes: 34802a42b352 ("net/mlx5e: Do not modify the TX SKB") +Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") +Signed-off-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +@@ -255,7 +255,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txq + dma_addr = dma_map_single(sq->pdev, skb_data, headlen, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) +- return -ENOMEM; ++ goto dma_unmap_wqe_err; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; +@@ -273,7 +273,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txq + dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) +- return -ENOMEM; ++ goto dma_unmap_wqe_err; + + dseg->addr = cpu_to_be64(dma_addr); + dseg->lkey = sq->mkey_be; +@@ -285,6 +285,10 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txq + } + + return num_dma; ++ ++dma_unmap_wqe_err: ++ mlx5e_dma_unmap_wqe_err(sq, num_dma); ++ return -ENOMEM; + } + + static inline void +@@ -380,17 +384,15 @@ static netdev_tx_t mlx5e_sq_xmit(struct + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, + (struct mlx5_wqe_data_seg *)cseg + ds_cnt); + if (unlikely(num_dma < 0)) +- goto dma_unmap_wqe_err; ++ goto err_drop; + + mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, + num_bytes, num_dma, wi, cseg); + + return NETDEV_TX_OK; + +-dma_unmap_wqe_err: ++err_drop: + sq->stats.dropped++; +- mlx5e_dma_unmap_wqe_err(sq, wi->num_dma); +- + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; +@@ -620,17 +622,15 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_t + num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, + (struct mlx5_wqe_data_seg *)cseg + ds_cnt); + if (unlikely(num_dma < 0)) +- goto dma_unmap_wqe_err; ++ goto err_drop; + + mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, + num_bytes, num_dma, wi, cseg); + + return NETDEV_TX_OK; + +-dma_unmap_wqe_err: ++err_drop: + sq->stats.dropped++; +- mlx5e_dma_unmap_wqe_err(sq, wi->num_dma); +- + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; diff --git a/queue-4.16/net-phy-sfp-fix-the-br-min-computation.patch b/queue-4.16/net-phy-sfp-fix-the-br-min-computation.patch new file mode 100644 index 00000000000..4c2cd31f503 --- /dev/null +++ b/queue-4.16/net-phy-sfp-fix-the-br-min-computation.patch @@ -0,0 +1,37 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Antoine Tenart +Date: Fri, 4 May 2018 17:10:54 +0200 +Subject: net: phy: sfp: fix the BR,min computation + +From: Antoine Tenart + +[ Upstream commit 52c5cd1bf0cecf4b146ca07dc513cbe2f4583bb5 ] + +In an SFP EEPROM values can be read to get information about a given SFP +module. One of those is the bitrate, which can be determined using a +nominal bitrate in addition with min and max values (in %). The SFP code +currently compute both BR,min and BR,max values thanks to this nominal +and min,max values. + +This patch fixes the BR,min computation as the min value should be +subtracted to the nominal one, not added. + +Fixes: 9962acf7fb8c ("sfp: add support for 1000Base-PX and 1000Base-BX10") +Signed-off-by: Antoine Tenart +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/phy/sfp-bus.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/phy/sfp-bus.c ++++ b/drivers/net/phy/sfp-bus.c +@@ -190,7 +190,7 @@ void sfp_parse_support(struct sfp_bus *b + if (id->base.br_nominal) { + if (id->base.br_nominal != 255) { + br_nom = id->base.br_nominal * 100; +- br_min = br_nom + id->base.br_nominal * id->ext.br_min; ++ br_min = br_nom - id->base.br_nominal * id->ext.br_min; + br_max = br_nom + id->base.br_nominal * id->ext.br_max; + } else if (id->ext.br_max) { + br_nom = 250 * id->ext.br_max; diff --git a/queue-4.16/net-sched-actions-fix-refcnt-leak-in-skbmod.patch b/queue-4.16/net-sched-actions-fix-refcnt-leak-in-skbmod.patch new file mode 100644 index 00000000000..c6b9e910f96 --- /dev/null +++ b/queue-4.16/net-sched-actions-fix-refcnt-leak-in-skbmod.patch @@ -0,0 +1,55 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Roman Mashak +Date: Fri, 11 May 2018 14:35:33 -0400 +Subject: net sched actions: fix refcnt leak in skbmod + +From: Roman Mashak + +[ Upstream commit a52956dfc503f8cc5cfe6454959b7049fddb4413 ] + +When application fails to pass flags in netlink TLV when replacing +existing skbmod action, the kernel will leak refcnt: + +$ tc actions get action skbmod index 1 +total acts 0 + + action order 0: skbmod pipe set smac 00:11:22:33:44:55 + index 1 ref 1 bind 0 + +For example, at this point a buggy application replaces the action with +index 1 with new smac 00:aa:22:33:44:55, it fails because of zero flags, +however refcnt gets bumped: + +$ tc actions get actions skbmod index 1 +total acts 0 + + action order 0: skbmod pipe set smac 00:11:22:33:44:55 + index 1 ref 2 bind 0 +$ + +Tha patch fixes this by calling tcf_idr_release() on existing actions. + +Fixes: 86da71b57383d ("net_sched: Introduce skbmod action") +Signed-off-by: Roman Mashak +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/act_skbmod.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/net/sched/act_skbmod.c ++++ b/net/sched/act_skbmod.c +@@ -131,8 +131,11 @@ static int tcf_skbmod_init(struct net *n + if (exists && bind) + return 0; + +- if (!lflags) ++ if (!lflags) { ++ if (exists) ++ tcf_idr_release(*a, bind); + return -EINVAL; ++ } + + if (!exists) { + ret = tcf_idr_create(tn, parm->index, est, a, diff --git a/queue-4.16/net-sched-fix-error-path-in-tcf_proto_create-when-modules-are-not-configured.patch b/queue-4.16/net-sched-fix-error-path-in-tcf_proto_create-when-modules-are-not-configured.patch new file mode 100644 index 00000000000..177a3a6d34d --- /dev/null +++ b/queue-4.16/net-sched-fix-error-path-in-tcf_proto_create-when-modules-are-not-configured.patch @@ -0,0 +1,33 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Jiri Pirko +Date: Fri, 11 May 2018 17:45:32 +0200 +Subject: net: sched: fix error path in tcf_proto_create() when modules are not configured + +From: Jiri Pirko + +[ Upstream commit d68d75fdc34b0253c2bded7ed18cd60eb5a9599b ] + +In case modules are not configured, error out when tp->ops is null +and prevent later null pointer dereference. + +Fixes: 33a48927c193 ("sched: push TC filter protocol creation into a separate function") +Signed-off-by: Jiri Pirko +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/cls_api.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sched/cls_api.c ++++ b/net/sched/cls_api.c +@@ -152,8 +152,8 @@ static struct tcf_proto *tcf_proto_creat + NL_SET_ERR_MSG(extack, "TC classifier not found"); + err = -ENOENT; + } +- goto errout; + #endif ++ goto errout; + } + tp->classify = tp->ops->classify; + tp->protocol = protocol; diff --git a/queue-4.16/net-smc-keep-clcsock-reference-in-smc_tcp_listen_work.patch b/queue-4.16/net-smc-keep-clcsock-reference-in-smc_tcp_listen_work.patch new file mode 100644 index 00000000000..e1a19c54e58 --- /dev/null +++ b/queue-4.16/net-smc-keep-clcsock-reference-in-smc_tcp_listen_work.patch @@ -0,0 +1,48 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Ursula Braun +Date: Wed, 25 Apr 2018 12:48:58 +0200 +Subject: net/smc: keep clcsock reference in smc_tcp_listen_work() + +From: Ursula Braun + +[ Upstream commit 070204a34884110ac5e19c1e2e036fcfd033f8e3 ] + +The internal CLC socket should exist till the SMC-socket is released. +Function tcp_listen_worker() releases the internal CLC socket of a +listen socket, if an smc_close_active() is called. This function +is called for the final release(), but it is called for shutdown +SHUT_RDWR as well. This opens a door for protection faults, if +socket calls using the internal CLC socket are called for a +shutdown listen socket. + +With the changes of +commit 3d502067599f ("net/smc: simplify wait when closing listen socket") +there is no need anymore to release the internal CLC socket in +function tcp_listen_worker((). It is sufficient to release it in +smc_release(). + +Fixes: 127f49705823 ("net/smc: release clcsock from tcp_listen_worker") +Signed-off-by: Ursula Braun +Reported-by: syzbot+9045fc589fcd196ef522@syzkaller.appspotmail.com +Reported-by: syzbot+28a2c86cf19c81d871fa@syzkaller.appspotmail.com +Reported-by: syzbot+9605e6cace1b5efd4a0a@syzkaller.appspotmail.com +Reported-by: syzbot+cf9012c597c8379d535c@syzkaller.appspotmail.com +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/smc/af_smc.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -973,10 +973,6 @@ static void smc_tcp_listen_work(struct w + } + + out: +- if (lsmc->clcsock) { +- sock_release(lsmc->clcsock); +- lsmc->clcsock = NULL; +- } + release_sock(lsk); + sock_put(&lsmc->sk); /* sock_hold in smc_listen */ + } diff --git a/queue-4.16/net-smc-restrict-non-blocking-connect-finish.patch b/queue-4.16/net-smc-restrict-non-blocking-connect-finish.patch new file mode 100644 index 00000000000..08aa82e775e --- /dev/null +++ b/queue-4.16/net-smc-restrict-non-blocking-connect-finish.patch @@ -0,0 +1,55 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Ursula Braun +Date: Wed, 2 May 2018 16:53:56 +0200 +Subject: net/smc: restrict non-blocking connect finish + +From: Ursula Braun + +[ Upstream commit 784813aed6ba24a1f24e7e11d9d0f208cee37a7d ] + +The smc_poll code tries to finish connect() if the socket is in +state SMC_INIT and polling of the internal CLC-socket returns with +EPOLLOUT. This makes sense for a select/poll call following a connect +call, but not without preceding connect(). +With this patch smc_poll starts connect logic only, if the CLC-socket +is no longer in its initial state TCP_CLOSE. + +In addition, a poll error on the internal CLC-socket is always +propagated to the SMC socket. + +With this patch the code path mentioned by syzbot +https://syzkaller.appspot.com/bug?extid=03faa2dc16b8b64be396 +is no longer possible. + +Signed-off-by: Ursula Braun +Reported-by: syzbot+03faa2dc16b8b64be396@syzkaller.appspotmail.com +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/smc/af_smc.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -1165,13 +1165,15 @@ static __poll_t smc_poll(struct file *fi + /* delegate to CLC child sock */ + release_sock(sk); + mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); +- /* if non-blocking connect finished ... */ + lock_sock(sk); +- if ((sk->sk_state == SMC_INIT) && (mask & EPOLLOUT)) { +- sk->sk_err = smc->clcsock->sk->sk_err; +- if (sk->sk_err) { +- mask |= EPOLLERR; +- } else { ++ sk->sk_err = smc->clcsock->sk->sk_err; ++ if (sk->sk_err) { ++ mask |= EPOLLERR; ++ } else { ++ /* if non-blocking connect finished ... */ ++ if (sk->sk_state == SMC_INIT && ++ mask & EPOLLOUT && ++ smc->clcsock->sk->sk_state != TCP_CLOSE) { + rc = smc_connect_rdma(smc); + if (rc < 0) + mask |= EPOLLERR; diff --git a/queue-4.16/net-support-compat-64-bit-time-in-s-g-etsockopt.patch b/queue-4.16/net-support-compat-64-bit-time-in-s-g-etsockopt.patch new file mode 100644 index 00000000000..df8c248b38f --- /dev/null +++ b/queue-4.16/net-support-compat-64-bit-time-in-s-g-etsockopt.patch @@ -0,0 +1,140 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Lance Richardson +Date: Wed, 25 Apr 2018 10:21:54 -0400 +Subject: net: support compat 64-bit time in {s,g}etsockopt + +From: Lance Richardson + +[ Upstream commit 988bf7243e03ef69238381594e0334a79cef74a6 ] + +For the x32 ABI, struct timeval has two 64-bit fields. However +the kernel currently interprets the user-space values used for +the SO_RCVTIMEO and SO_SNDTIMEO socket options as having a pair +of 32-bit fields. + +When the seconds portion of the requested timeout is less than 2**32, +the seconds portion of the effective timeout is correct but the +microseconds portion is zero. When the seconds portion of the +requested timeout is zero and the microseconds portion is non-zero, +the kernel interprets the timeout as zero (never timeout). + +Fix by using 64-bit time for SO_RCVTIMEO/SO_SNDTIMEO as required +for the ABI. + +The code included below demonstrates the problem. + +Results before patch: + $ gcc -m64 -Wall -O2 -o socktmo socktmo.c && ./socktmo + recv time: 2.008181 seconds + send time: 2.015985 seconds + + $ gcc -m32 -Wall -O2 -o socktmo socktmo.c && ./socktmo + recv time: 2.016763 seconds + send time: 2.016062 seconds + + $ gcc -mx32 -Wall -O2 -o socktmo socktmo.c && ./socktmo + recv time: 1.007239 seconds + send time: 1.023890 seconds + +Results after patch: + $ gcc -m64 -O2 -Wall -o socktmo socktmo.c && ./socktmo + recv time: 2.010062 seconds + send time: 2.015836 seconds + + $ gcc -m32 -O2 -Wall -o socktmo socktmo.c && ./socktmo + recv time: 2.013974 seconds + send time: 2.015981 seconds + + $ gcc -mx32 -O2 -Wall -o socktmo socktmo.c && ./socktmo + recv time: 2.030257 seconds + send time: 2.013383 seconds + + #include + #include + #include + #include + #include + + void checkrc(char *str, int rc) + { + if (rc >= 0) + return; + + perror(str); + exit(1); + } + + static char buf[1024]; + int main(int argc, char **argv) + { + int rc; + int socks[2]; + struct timeval tv; + struct timeval start, end, delta; + + rc = socketpair(AF_UNIX, SOCK_STREAM, 0, socks); + checkrc("socketpair", rc); + + /* set timeout to 1.999999 seconds */ + tv.tv_sec = 1; + tv.tv_usec = 999999; + rc = setsockopt(socks[0], SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv); + rc = setsockopt(socks[0], SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof tv); + checkrc("setsockopt", rc); + + /* measure actual receive timeout */ + gettimeofday(&start, NULL); + rc = recv(socks[0], buf, sizeof buf, 0); + gettimeofday(&end, NULL); + timersub(&end, &start, &delta); + + printf("recv time: %ld.%06ld seconds\n", + (long)delta.tv_sec, (long)delta.tv_usec); + + /* fill send buffer */ + do { + rc = send(socks[0], buf, sizeof buf, 0); + } while (rc > 0); + + /* measure actual send timeout */ + gettimeofday(&start, NULL); + rc = send(socks[0], buf, sizeof buf, 0); + gettimeofday(&end, NULL); + timersub(&end, &start, &delta); + + printf("send time: %ld.%06ld seconds\n", + (long)delta.tv_sec, (long)delta.tv_usec); + exit(0); + } + +Fixes: 515c7af85ed9 ("x32: Use compat shims for {g,s}etsockopt") +Reported-by: Gopal RajagopalSai +Signed-off-by: Lance Richardson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/compat.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/compat.c ++++ b/net/compat.c +@@ -377,7 +377,8 @@ static int compat_sock_setsockopt(struct + optname == SO_ATTACH_REUSEPORT_CBPF) + return do_set_attach_filter(sock, level, optname, + optval, optlen); +- if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) ++ if (!COMPAT_USE_64BIT_TIME && ++ (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)) + return do_set_sock_timeout(sock, level, optname, optval, optlen); + + return sock_setsockopt(sock, level, optname, optval, optlen); +@@ -442,7 +443,8 @@ static int do_get_sock_timeout(struct so + static int compat_sock_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) + { +- if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) ++ if (!COMPAT_USE_64BIT_TIME && ++ (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)) + return do_get_sock_timeout(sock, level, optname, optval, optlen); + return sock_getsockopt(sock, level, optname, optval, optlen); + } diff --git a/queue-4.16/net-systemport-correclty-disambiguate-driver-instances.patch b/queue-4.16/net-systemport-correclty-disambiguate-driver-instances.patch new file mode 100644 index 00000000000..688ae404e7c --- /dev/null +++ b/queue-4.16/net-systemport-correclty-disambiguate-driver-instances.patch @@ -0,0 +1,85 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Florian Fainelli +Date: Wed, 25 Apr 2018 16:21:51 -0700 +Subject: net: systemport: Correclty disambiguate driver instances + +From: Florian Fainelli + +[ Upstream commit 1f3ccc3c3fc26468be00392ef0b2c215f9c9d054 ] + +While adding the DSA notifier, we will be sending DSA notifications with +info->master that is going to point to a particular net_device instance. + +Our logic in bcm_sysport_map_queues() correctly disambiguates net_device +instances that are not covered by our own driver, but it will not make +sure that info->master points to a particular driver instance that we +are interested in. In a system where e.g: two or more SYSTEMPORT +instances are registered, this would lead in programming two or more +times the queue mapping, completely messing with the logic which does +the queue/port allocation and tracking. + +Fix this by looking at the notifier_block pointer which is unique per +instance and allows us to go back to our driver private structure, and +in turn to the backing net_device instance. + +Fixes: d156576362c0 ("net: systemport: Establish lower/upper queue mapping") +Signed-off-by: Florian Fainelli +Reviewed-by: Vivien Didelot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/bcmsysport.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/broadcom/bcmsysport.c ++++ b/drivers/net/ethernet/broadcom/bcmsysport.c +@@ -2052,14 +2052,21 @@ static const struct net_device_ops bcm_s + .ndo_select_queue = bcm_sysport_select_queue, + }; + +-static int bcm_sysport_map_queues(struct net_device *dev, ++static int bcm_sysport_map_queues(struct notifier_block *nb, + struct dsa_notifier_register_info *info) + { +- struct bcm_sysport_priv *priv = netdev_priv(dev); + struct bcm_sysport_tx_ring *ring; ++ struct bcm_sysport_priv *priv; + struct net_device *slave_dev; + unsigned int num_tx_queues; + unsigned int q, start, port; ++ struct net_device *dev; ++ ++ priv = container_of(nb, struct bcm_sysport_priv, dsa_notifier); ++ if (priv->netdev != info->master) ++ return 0; ++ ++ dev = info->master; + + /* We can't be setting up queue inspection for non directly attached + * switches +@@ -2082,6 +2089,7 @@ static int bcm_sysport_map_queues(struct + if (priv->is_lite) + netif_set_real_num_tx_queues(slave_dev, + slave_dev->num_tx_queues / 2); ++ + num_tx_queues = slave_dev->real_num_tx_queues; + + if (priv->per_port_num_tx_queues && +@@ -2109,7 +2117,7 @@ static int bcm_sysport_map_queues(struct + return 0; + } + +-static int bcm_sysport_dsa_notifier(struct notifier_block *unused, ++static int bcm_sysport_dsa_notifier(struct notifier_block *nb, + unsigned long event, void *ptr) + { + struct dsa_notifier_register_info *info; +@@ -2119,7 +2127,7 @@ static int bcm_sysport_dsa_notifier(stru + + info = ptr; + +- return notifier_from_errno(bcm_sysport_map_queues(info->master, info)); ++ return notifier_from_errno(bcm_sysport_map_queues(nb, info)); + } + + #define REV_FMT "v%2x.%02x" diff --git a/queue-4.16/net-tls-don-t-recursively-call-push_record-during-tls_write_space-callbacks.patch b/queue-4.16/net-tls-don-t-recursively-call-push_record-during-tls_write_space-callbacks.patch new file mode 100644 index 00000000000..3678ac4bc7c --- /dev/null +++ b/queue-4.16/net-tls-don-t-recursively-call-push_record-during-tls_write_space-callbacks.patch @@ -0,0 +1,72 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Dave Watson +Date: Tue, 1 May 2018 13:05:39 -0700 +Subject: net/tls: Don't recursively call push_record during tls_write_space callbacks + +From: Dave Watson + +[ Upstream commit c212d2c7fc4736d49be102fb7a1a545cdc2f1fea ] + +It is reported that in some cases, write_space may be called in +do_tcp_sendpages, such that we recursively invoke do_tcp_sendpages again: + +[ 660.468802] ? do_tcp_sendpages+0x8d/0x580 +[ 660.468826] ? tls_push_sg+0x74/0x130 [tls] +[ 660.468852] ? tls_push_record+0x24a/0x390 [tls] +[ 660.468880] ? tls_write_space+0x6a/0x80 [tls] +... + +tls_push_sg already does a loop over all sending sg's, so ignore +any tls_write_space notifications until we are done sending. +We then have to call the previous write_space to wake up +poll() waiters after we are done with the send loop. + +Reported-by: Andre Tomt +Signed-off-by: Dave Watson +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tls.h | 1 + + net/tls/tls_main.c | 7 +++++++ + 2 files changed, 8 insertions(+) + +--- a/include/net/tls.h ++++ b/include/net/tls.h +@@ -102,6 +102,7 @@ struct tls_context { + struct scatterlist *partially_sent_record; + u16 partially_sent_offset; + unsigned long flags; ++ bool in_tcp_sendpages; + + u16 pending_open_record_frags; + int (*push_pending_record)(struct sock *sk, int flags); +--- a/net/tls/tls_main.c ++++ b/net/tls/tls_main.c +@@ -107,6 +107,7 @@ int tls_push_sg(struct sock *sk, + size = sg->length - offset; + offset += sg->offset; + ++ ctx->in_tcp_sendpages = true; + while (1) { + if (sg_is_last(sg)) + sendpage_flags = flags; +@@ -141,6 +142,8 @@ retry: + } + + clear_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); ++ ctx->in_tcp_sendpages = false; ++ ctx->sk_write_space(sk); + + return 0; + } +@@ -210,6 +213,10 @@ static void tls_write_space(struct sock + { + struct tls_context *ctx = tls_get_ctx(sk); + ++ /* We are already sending pages, ignore notification */ ++ if (ctx->in_tcp_sendpages) ++ return; ++ + if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) { + gfp_t sk_allocation = sk->sk_allocation; + int rc; diff --git a/queue-4.16/net-tls-fix-connection-stall-on-partial-tls-record.patch b/queue-4.16/net-tls-fix-connection-stall-on-partial-tls-record.patch new file mode 100644 index 00000000000..6829da3dc11 --- /dev/null +++ b/queue-4.16/net-tls-fix-connection-stall-on-partial-tls-record.patch @@ -0,0 +1,30 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Andre Tomt +Date: Mon, 7 May 2018 04:24:39 +0200 +Subject: net/tls: Fix connection stall on partial tls record + +From: Andre Tomt + +[ Upstream commit 080324c36ade319f57e505633ab54f6f53289b45 ] + +In the case of writing a partial tls record we forgot to clear the +ctx->in_tcp_sendpages flag, causing some connections to stall. + +Fixes: c212d2c7fc47 ("net/tls: Don't recursively call push_record during tls_write_space callbacks") +Signed-off-by: Andre Tomt +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tls/tls_main.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/tls/tls_main.c ++++ b/net/tls/tls_main.c +@@ -128,6 +128,7 @@ retry: + offset -= sg->offset; + ctx->partially_sent_offset = offset; + ctx->partially_sent_record = (void *)sg; ++ ctx->in_tcp_sendpages = false; + return ret; + } + diff --git a/queue-4.16/net_sched-fq-take-care-of-throttled-flows-before-reuse.patch b/queue-4.16/net_sched-fq-take-care-of-throttled-flows-before-reuse.patch new file mode 100644 index 00000000000..e18c07b9ce8 --- /dev/null +++ b/queue-4.16/net_sched-fq-take-care-of-throttled-flows-before-reuse.patch @@ -0,0 +1,96 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Eric Dumazet +Date: Wed, 2 May 2018 10:03:30 -0700 +Subject: net_sched: fq: take care of throttled flows before reuse + +From: Eric Dumazet + +[ Upstream commit 7df40c2673a1307c3260aab6f9d4b9bf97ca8fd7 ] + +Normally, a socket can not be freed/reused unless all its TX packets +left qdisc and were TX-completed. However connect(AF_UNSPEC) allows +this to happen. + +With commit fc59d5bdf1e3 ("pkt_sched: fq: clear time_next_packet for +reused flows") we cleared f->time_next_packet but took no special +action if the flow was still in the throttled rb-tree. + +Since f->time_next_packet is the key used in the rb-tree searches, +blindly clearing it might break rb-tree integrity. We need to make +sure the flow is no longer in the rb-tree to avoid this problem. + +Fixes: fc59d5bdf1e3 ("pkt_sched: fq: clear time_next_packet for reused flows") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_fq.c | 37 +++++++++++++++++++++++++------------ + 1 file changed, 25 insertions(+), 12 deletions(-) + +--- a/net/sched/sch_fq.c ++++ b/net/sched/sch_fq.c +@@ -128,6 +128,28 @@ static bool fq_flow_is_detached(const st + return f->next == &detached; + } + ++static bool fq_flow_is_throttled(const struct fq_flow *f) ++{ ++ return f->next == &throttled; ++} ++ ++static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow) ++{ ++ if (head->first) ++ head->last->next = flow; ++ else ++ head->first = flow; ++ head->last = flow; ++ flow->next = NULL; ++} ++ ++static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f) ++{ ++ rb_erase(&f->rate_node, &q->delayed); ++ q->throttled_flows--; ++ fq_flow_add_tail(&q->old_flows, f); ++} ++ + static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f) + { + struct rb_node **p = &q->delayed.rb_node, *parent = NULL; +@@ -155,15 +177,6 @@ static void fq_flow_set_throttled(struct + + static struct kmem_cache *fq_flow_cachep __read_mostly; + +-static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow) +-{ +- if (head->first) +- head->last->next = flow; +- else +- head->first = flow; +- head->last = flow; +- flow->next = NULL; +-} + + /* limit number of collected flows per round */ + #define FQ_GC_MAX 8 +@@ -267,6 +280,8 @@ static struct fq_flow *fq_classify(struc + f->socket_hash != sk->sk_hash)) { + f->credit = q->initial_quantum; + f->socket_hash = sk->sk_hash; ++ if (fq_flow_is_throttled(f)) ++ fq_flow_unset_throttled(q, f); + f->time_next_packet = 0ULL; + } + return f; +@@ -438,9 +453,7 @@ static void fq_check_throttled(struct fq + q->time_next_delayed_flow = f->time_next_packet; + break; + } +- rb_erase(p, &q->delayed); +- q->throttled_flows--; +- fq_flow_add_tail(&q->old_flows, f); ++ fq_flow_unset_throttled(q, f); + } + } + diff --git a/queue-4.16/nfp-flower-set-tunnel-ttl-value-to-net-default.patch b/queue-4.16/nfp-flower-set-tunnel-ttl-value-to-net-default.patch new file mode 100644 index 00000000000..a11334dbe6f --- /dev/null +++ b/queue-4.16/nfp-flower-set-tunnel-ttl-value-to-net-default.patch @@ -0,0 +1,86 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: John Hurley +Date: Tue, 1 May 2018 15:49:49 -0700 +Subject: nfp: flower: set tunnel ttl value to net default + +From: John Hurley + +[ Upstream commit 50a5852a657f793a8482fe3af4a141b460d3499e ] + +Firmware requires that the ttl value for an encapsulating ipv4 tunnel +header be included as an action field. Prior to the support of Geneve +tunnel encap (when ttl set was removed completely), ttl value was +extracted from the tunnel key. However, tests have shown that this can +still produce a ttl of 0. + +Fix the issue by setting the namespace default value for each new tunnel. +Follow up patch for net-next will do a full route lookup. + +Fixes: 3ca3059dc3a9 ("nfp: flower: compile Geneve encap actions") +Fixes: b27d6a95a70d ("nfp: compile flower vxlan tunnel set actions") +Signed-off-by: John Hurley +Reviewed-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/flower/action.c | 10 ++++++++-- + drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 5 ++++- + 2 files changed, 12 insertions(+), 3 deletions(-) + +--- a/drivers/net/ethernet/netronome/nfp/flower/action.c ++++ b/drivers/net/ethernet/netronome/nfp/flower/action.c +@@ -183,17 +183,21 @@ static int + nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, + const struct tc_action *action, + struct nfp_fl_pre_tunnel *pre_tun, +- enum nfp_flower_tun_type tun_type) ++ enum nfp_flower_tun_type tun_type, ++ struct net_device *netdev) + { + size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun); + struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); + u32 tmp_set_ip_tun_type_index = 0; + /* Currently support one pre-tunnel so index is always 0. */ + int pretun_idx = 0; ++ struct net *net; + + if (ip_tun->options_len) + return -EOPNOTSUPP; + ++ net = dev_net(netdev); ++ + set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; + set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; + +@@ -204,6 +208,7 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_se + + set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); + set_tun->tun_id = ip_tun->key.tun_id; ++ set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; + + /* Complete pre_tunnel action. */ + pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; +@@ -511,7 +516,8 @@ nfp_flower_loop_action(const struct tc_a + *a_len += sizeof(struct nfp_fl_pre_tunnel); + + set_tun = (void *)&nfp_fl->action_data[*a_len]; +- err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type); ++ err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type, ++ netdev); + if (err) + return err; + *a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun); +--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h ++++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +@@ -178,7 +178,10 @@ struct nfp_fl_set_ipv4_udp_tun { + __be16 reserved; + __be64 tun_id __packed; + __be32 tun_type_index; +- __be32 extra[3]; ++ __be16 reserved2; ++ u8 ttl; ++ u8 reserved3; ++ __be32 extra[2]; + }; + + /* Metadata with L2 (1W/4B) diff --git a/queue-4.16/nsh-fix-infinite-loop.patch b/queue-4.16/nsh-fix-infinite-loop.patch new file mode 100644 index 00000000000..07c8e661a24 --- /dev/null +++ b/queue-4.16/nsh-fix-infinite-loop.patch @@ -0,0 +1,269 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Eric Dumazet +Date: Thu, 3 May 2018 13:37:54 -0700 +Subject: nsh: fix infinite loop + +From: Eric Dumazet + +[ Upstream commit af50e4ba34f4c45e92535364133d4deb5931c1c5 ] + +syzbot caught an infinite recursion in nsh_gso_segment(). + +Problem here is that we need to make sure the NSH header is of +reasonable length. + +BUG: MAX_LOCK_DEPTH too low! +turning off the locking correctness validator. +depth: 48 max: 48! +48 locks held by syz-executor0/10189: + #0: (ptrval) (rcu_read_lock_bh){....}, at: __dev_queue_xmit+0x30f/0x34c0 net/core/dev.c:3517 + #1: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #1: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #2: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #2: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #3: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #3: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #4: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #4: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #5: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #5: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #6: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #6: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #7: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #7: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #8: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #8: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #9: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #9: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #10: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #10: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #11: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #11: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #12: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #12: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #13: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #13: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #14: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #14: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #15: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #15: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #16: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #16: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #17: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #17: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #18: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #18: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #19: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #19: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #20: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #20: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #21: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #21: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #22: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #22: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #23: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #23: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #24: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #24: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #25: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #25: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #26: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #26: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #27: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #27: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #28: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #28: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #29: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #29: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #30: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #30: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #31: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #31: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 +dccp_close: ABORT with 65423 bytes unread + #32: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #32: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #33: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #33: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #34: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #34: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #35: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #35: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #36: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #36: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #37: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #37: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #38: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #38: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #39: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #39: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #40: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #40: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #41: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #41: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #42: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #42: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #43: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #43: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #44: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #44: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #45: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #45: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #46: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #46: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 + #47: (ptrval) (rcu_read_lock){....}, at: __skb_pull include/linux/skbuff.h:2080 [inline] + #47: (ptrval) (rcu_read_lock){....}, at: skb_mac_gso_segment+0x221/0x720 net/core/dev.c:2787 +INFO: lockdep is turned off. +CPU: 1 PID: 10189 Comm: syz-executor0 Not tainted 4.17.0-rc2+ #26 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x1b9/0x294 lib/dump_stack.c:113 + __lock_acquire+0x1788/0x5140 kernel/locking/lockdep.c:3449 + lock_acquire+0x1dc/0x520 kernel/locking/lockdep.c:3920 + rcu_lock_acquire include/linux/rcupdate.h:246 [inline] + rcu_read_lock include/linux/rcupdate.h:632 [inline] + skb_mac_gso_segment+0x25b/0x720 net/core/dev.c:2789 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + nsh_gso_segment+0x405/0xb60 net/nsh/nsh.c:107 + skb_mac_gso_segment+0x3ad/0x720 net/core/dev.c:2792 + __skb_gso_segment+0x3bb/0x870 net/core/dev.c:2865 + skb_gso_segment include/linux/netdevice.h:4025 [inline] + validate_xmit_skb+0x54d/0xd90 net/core/dev.c:3118 + validate_xmit_skb_list+0xbf/0x120 net/core/dev.c:3168 + sch_direct_xmit+0x354/0x11e0 net/sched/sch_generic.c:312 + qdisc_restart net/sched/sch_generic.c:399 [inline] + __qdisc_run+0x741/0x1af0 net/sched/sch_generic.c:410 + __dev_xmit_skb net/core/dev.c:3243 [inline] + __dev_queue_xmit+0x28ea/0x34c0 net/core/dev.c:3551 + dev_queue_xmit+0x17/0x20 net/core/dev.c:3616 + packet_snd net/packet/af_packet.c:2951 [inline] + packet_sendmsg+0x40f8/0x6070 net/packet/af_packet.c:2976 + sock_sendmsg_nosec net/socket.c:629 [inline] + sock_sendmsg+0xd5/0x120 net/socket.c:639 + __sys_sendto+0x3d7/0x670 net/socket.c:1789 + __do_sys_sendto net/socket.c:1801 [inline] + __se_sys_sendto net/socket.c:1797 [inline] + __x64_sys_sendto+0xe1/0x1a0 net/socket.c:1797 + do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x49/0xbe + +Fixes: c411ed854584 ("nsh: add GSO support") +Signed-off-by: Eric Dumazet +Cc: Jiri Benc +Reported-by: syzbot +Acked-by: Jiri Benc +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/nsh/nsh.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/nsh/nsh.c ++++ b/net/nsh/nsh.c +@@ -57,6 +57,8 @@ int nsh_pop(struct sk_buff *skb) + return -ENOMEM; + nh = (struct nshhdr *)(skb->data); + length = nsh_hdr_len(nh); ++ if (length < NSH_BASE_HDR_LEN) ++ return -EINVAL; + inner_proto = tun_p_to_eth_p(nh->np); + if (!pskb_may_pull(skb, length)) + return -ENOMEM; +@@ -90,6 +92,8 @@ static struct sk_buff *nsh_gso_segment(s + if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN))) + goto out; + nsh_len = nsh_hdr_len(nsh_hdr(skb)); ++ if (nsh_len < NSH_BASE_HDR_LEN) ++ goto out; + if (unlikely(!pskb_may_pull(skb, nsh_len))) + goto out; + diff --git a/queue-4.16/openvswitch-don-t-swap-table-in-nlattr_set-after-ovs_attr_nested-is-found.patch b/queue-4.16/openvswitch-don-t-swap-table-in-nlattr_set-after-ovs_attr_nested-is-found.patch new file mode 100644 index 00000000000..753449c0792 --- /dev/null +++ b/queue-4.16/openvswitch-don-t-swap-table-in-nlattr_set-after-ovs_attr_nested-is-found.patch @@ -0,0 +1,110 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Stefano Brivio +Date: Thu, 3 May 2018 18:13:25 +0200 +Subject: openvswitch: Don't swap table in nlattr_set() after OVS_ATTR_NESTED is found + +From: Stefano Brivio + +[ Upstream commit 72f17baf2352ded6a1d3f4bb2d15da8c678cd2cb ] + +If an OVS_ATTR_NESTED attribute type is found while walking +through netlink attributes, we call nlattr_set() recursively +passing the length table for the following nested attributes, if +different from the current one. + +However, once we're done with those sub-nested attributes, we +should continue walking through attributes using the current +table, instead of using the one related to the sub-nested +attributes. + +For example, given this sequence: + +1 OVS_KEY_ATTR_PRIORITY +2 OVS_KEY_ATTR_TUNNEL +3 OVS_TUNNEL_KEY_ATTR_ID +4 OVS_TUNNEL_KEY_ATTR_IPV4_SRC +5 OVS_TUNNEL_KEY_ATTR_IPV4_DST +6 OVS_TUNNEL_KEY_ATTR_TTL +7 OVS_TUNNEL_KEY_ATTR_TP_SRC +8 OVS_TUNNEL_KEY_ATTR_TP_DST +9 OVS_KEY_ATTR_IN_PORT +10 OVS_KEY_ATTR_SKB_MARK +11 OVS_KEY_ATTR_MPLS + +we switch to the 'ovs_tunnel_key_lens' table on attribute #3, +and we don't switch back to 'ovs_key_lens' while setting +attributes #9 to #11 in the sequence. As OVS_KEY_ATTR_MPLS +evaluates to 21, and the array size of 'ovs_tunnel_key_lens' is +15, we also get this kind of KASan splat while accessing the +wrong table: + +[ 7654.586496] ================================================================== +[ 7654.594573] BUG: KASAN: global-out-of-bounds in nlattr_set+0x164/0xde9 [openvswitch] +[ 7654.603214] Read of size 4 at addr ffffffffc169ecf0 by task handler29/87430 +[ 7654.610983] +[ 7654.612644] CPU: 21 PID: 87430 Comm: handler29 Kdump: loaded Not tainted 3.10.0-866.el7.test.x86_64 #1 +[ 7654.623030] Hardware name: Dell Inc. PowerEdge R730/072T6D, BIOS 2.1.7 06/16/2016 +[ 7654.631379] Call Trace: +[ 7654.634108] [] dump_stack+0x19/0x1b +[ 7654.639843] [] print_address_description+0x33/0x290 +[ 7654.647129] [] ? nlattr_set+0x164/0xde9 [openvswitch] +[ 7654.654607] [] kasan_report.part.3+0x242/0x330 +[ 7654.661406] [] __asan_report_load4_noabort+0x34/0x40 +[ 7654.668789] [] nlattr_set+0x164/0xde9 [openvswitch] +[ 7654.676076] [] ovs_nla_get_match+0x10c8/0x1900 [openvswitch] +[ 7654.684234] [] ? genl_rcv+0x28/0x40 +[ 7654.689968] [] ? netlink_unicast+0x3f3/0x590 +[ 7654.696574] [] ? ovs_nla_put_tunnel_info+0xb0/0xb0 [openvswitch] +[ 7654.705122] [] ? unwind_get_return_address+0xb0/0xb0 +[ 7654.712503] [] ? system_call_fastpath+0x1c/0x21 +[ 7654.719401] [] ? update_stack_state+0x229/0x370 +[ 7654.726298] [] ? update_stack_state+0x229/0x370 +[ 7654.733195] [] ? kasan_unpoison_shadow+0x35/0x50 +[ 7654.740187] [] ? kasan_kmalloc+0xaa/0xe0 +[ 7654.746406] [] ? kasan_slab_alloc+0x12/0x20 +[ 7654.752914] [] ? memset+0x31/0x40 +[ 7654.758456] [] ovs_flow_cmd_new+0x2b2/0xf00 [openvswitch] + +[snip] + +[ 7655.132484] The buggy address belongs to the variable: +[ 7655.138226] ovs_tunnel_key_lens+0xf0/0xffffffffffffd400 [openvswitch] +[ 7655.145507] +[ 7655.147166] Memory state around the buggy address: +[ 7655.152514] ffffffffc169eb80: 00 00 00 00 00 00 00 00 00 00 fa fa fa fa fa fa +[ 7655.160585] ffffffffc169ec00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 7655.168644] >ffffffffc169ec80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 fa fa +[ 7655.176701] ^ +[ 7655.184372] ffffffffc169ed00: fa fa fa fa 00 00 00 00 fa fa fa fa 00 00 00 05 +[ 7655.192431] ffffffffc169ed80: fa fa fa fa 00 00 00 00 00 00 00 00 00 00 00 00 +[ 7655.200490] ================================================================== + +Reported-by: Hangbin Liu +Fixes: 982b52700482 ("openvswitch: Fix mask generation for nested attributes.") +Signed-off-by: Stefano Brivio +Reviewed-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/openvswitch/flow_netlink.c | 9 +++------ + 1 file changed, 3 insertions(+), 6 deletions(-) + +--- a/net/openvswitch/flow_netlink.c ++++ b/net/openvswitch/flow_netlink.c +@@ -1712,13 +1712,10 @@ static void nlattr_set(struct nlattr *at + + /* The nlattr stream should already have been validated */ + nla_for_each_nested(nla, attr, rem) { +- if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) { +- if (tbl[nla_type(nla)].next) +- tbl = tbl[nla_type(nla)].next; +- nlattr_set(nla, val, tbl); +- } else { ++ if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) ++ nlattr_set(nla, val, tbl[nla_type(nla)].next ? : tbl); ++ else + memset(nla_data(nla), val, nla_len(nla)); +- } + + if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE) + *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK; diff --git a/queue-4.16/qmi_wwan-do-not-steal-interfaces-from-class-drivers.patch b/queue-4.16/qmi_wwan-do-not-steal-interfaces-from-class-drivers.patch new file mode 100644 index 00000000000..39fb5dd7fdd --- /dev/null +++ b/queue-4.16/qmi_wwan-do-not-steal-interfaces-from-class-drivers.patch @@ -0,0 +1,49 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: "Bjørn Mork" +Date: Wed, 2 May 2018 22:22:54 +0200 +Subject: qmi_wwan: do not steal interfaces from class drivers + +From: "Bjørn Mork" + +[ Upstream commit 5697db4a696c41601a1d15c1922150b4dbf5726c ] + +The USB_DEVICE_INTERFACE_NUMBER matching macro assumes that +the { vendorid, productid, interfacenumber } set uniquely +identifies one specific function. This has proven to fail +for some configurable devices. One example is the Quectel +EM06/EP06 where the same interface number can be either +QMI or MBIM, without the device ID changing either. + +Fix by requiring the vendor-specific class for interface number +based matching. Functions of other classes can and should use +class based matching instead. + +Fixes: 03304bcb5ec4 ("net: qmi_wwan: use fixed interface number matching") +Signed-off-by: Bjørn Mork +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/qmi_wwan.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +--- a/drivers/net/usb/qmi_wwan.c ++++ b/drivers/net/usb/qmi_wwan.c +@@ -1344,6 +1344,18 @@ static int qmi_wwan_probe(struct usb_int + id->driver_info = (unsigned long)&qmi_wwan_info; + } + ++ /* There are devices where the same interface number can be ++ * configured as different functions. We should only bind to ++ * vendor specific functions when matching on interface number ++ */ ++ if (id->match_flags & USB_DEVICE_ID_MATCH_INT_NUMBER && ++ desc->bInterfaceClass != USB_CLASS_VENDOR_SPEC) { ++ dev_dbg(&intf->dev, ++ "Rejecting interface number match for class %02x\n", ++ desc->bInterfaceClass); ++ return -ENODEV; ++ } ++ + /* Quectel EC20 quirk where we've QMI on interface 4 instead of 0 */ + if (quectel_ec20_detected(intf) && desc->bInterfaceNumber == 0) { + dev_dbg(&intf->dev, "Quectel EC20 quirk, skipping interface 0\n"); diff --git a/queue-4.16/r8169-fix-powering-up-rtl8168h.patch b/queue-4.16/r8169-fix-powering-up-rtl8168h.patch new file mode 100644 index 00000000000..3b6f98203dc --- /dev/null +++ b/queue-4.16/r8169-fix-powering-up-rtl8168h.patch @@ -0,0 +1,49 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Heiner Kallweit +Date: Mon, 7 May 2018 21:11:21 +0200 +Subject: r8169: fix powering up RTL8168h + +From: Heiner Kallweit + +[ Upstream commit 3148dedfe79e422f448a10250d3e2cdf8b7ee617 ] + +Since commit a92a08499b1f "r8169: improve runtime pm in general and +suspend unused ports" interfaces w/o link are runtime-suspended after +10s. On systems where drivers take longer to load this can lead to the +situation that the interface is runtime-suspended already when it's +initially brought up. +This shouldn't be a problem because rtl_open() resumes MAC/PHY. +However with at least one chip version the interface doesn't properly +come up, as reported here: +https://bugzilla.kernel.org/show_bug.cgi?id=199549 + +The vendor driver uses a delay to give certain chip versions some +time to resume before starting the PHY configuration. So let's do +the same. I don't know which chip versions may be affected, +therefore apply this delay always. + +This patch was reported to fix the issue for RTL8168h. +I was able to reproduce the issue on an Asus H310I-Plus which also +uses a RTL8168h. Also in my case the patch fixed the issue. + +Reported-by: Slava Kardakov +Tested-by: Slava Kardakov +Signed-off-by: Heiner Kallweit +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -5087,6 +5087,9 @@ static void rtl_pll_power_down(struct rt + static void rtl_pll_power_up(struct rtl8169_private *tp) + { + rtl_generic_op(tp, tp->pll_power_ops.up); ++ ++ /* give MAC/PHY some time to resume */ ++ msleep(20); + } + + static void rtl_init_pll_power_ops(struct rtl8169_private *tp) diff --git a/queue-4.16/rds-do-not-leak-kernel-memory-to-user-land.patch b/queue-4.16/rds-do-not-leak-kernel-memory-to-user-land.patch new file mode 100644 index 00000000000..431f5a3df34 --- /dev/null +++ b/queue-4.16/rds-do-not-leak-kernel-memory-to-user-land.patch @@ -0,0 +1,59 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Eric Dumazet +Date: Wed, 2 May 2018 14:53:39 -0700 +Subject: rds: do not leak kernel memory to user land + +From: Eric Dumazet + +[ Upstream commit eb80ca476ec11f67a62691a93604b405ffc7d80c ] + +syzbot/KMSAN reported an uninit-value in put_cmsg(), originating +from rds_cmsg_recv(). + +Simply clear the structure, since we have holes there, or since +rx_traces might be smaller than RDS_MSG_RX_DGRAM_TRACE_MAX. + +BUG: KMSAN: uninit-value in copy_to_user include/linux/uaccess.h:184 [inline] +BUG: KMSAN: uninit-value in put_cmsg+0x600/0x870 net/core/scm.c:242 +CPU: 0 PID: 4459 Comm: syz-executor582 Not tainted 4.16.0+ #87 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:17 [inline] + dump_stack+0x185/0x1d0 lib/dump_stack.c:53 + kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 + kmsan_internal_check_memory+0x135/0x1e0 mm/kmsan/kmsan.c:1157 + kmsan_copy_to_user+0x69/0x160 mm/kmsan/kmsan.c:1199 + copy_to_user include/linux/uaccess.h:184 [inline] + put_cmsg+0x600/0x870 net/core/scm.c:242 + rds_cmsg_recv net/rds/recv.c:570 [inline] + rds_recvmsg+0x2db5/0x3170 net/rds/recv.c:657 + sock_recvmsg_nosec net/socket.c:803 [inline] + sock_recvmsg+0x1d0/0x230 net/socket.c:810 + ___sys_recvmsg+0x3fb/0x810 net/socket.c:2205 + __sys_recvmsg net/socket.c:2250 [inline] + SYSC_recvmsg+0x298/0x3c0 net/socket.c:2262 + SyS_recvmsg+0x54/0x80 net/socket.c:2257 + do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x3d/0xa2 + +Fixes: 3289025aedc0 ("RDS: add receive message trace used by application") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Santosh Shilimkar +Cc: linux-rdma +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/rds/recv.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/rds/recv.c ++++ b/net/rds/recv.c +@@ -558,6 +558,7 @@ static int rds_cmsg_recv(struct rds_inco + struct rds_cmsg_rx_trace t; + int i, j; + ++ memset(&t, 0, sizeof(t)); + inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock(); + t.rx_traces = rs->rs_rx_traces; + for (i = 0; i < rs->rs_rx_traces; i++) { diff --git a/queue-4.16/sctp-clear-the-new-asoc-s-stream-outcnt-in-sctp_stream_update.patch b/queue-4.16/sctp-clear-the-new-asoc-s-stream-outcnt-in-sctp_stream_update.patch new file mode 100644 index 00000000000..fd86ad08b17 --- /dev/null +++ b/queue-4.16/sctp-clear-the-new-asoc-s-stream-outcnt-in-sctp_stream_update.patch @@ -0,0 +1,42 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Xin Long +Date: Thu, 26 Apr 2018 15:21:44 +0800 +Subject: sctp: clear the new asoc's stream outcnt in sctp_stream_update + +From: Xin Long + +[ Upstream commit 6a9a27d5397fc6c52f90c09ddab91e65053584aa ] + +When processing a duplicate cookie-echo chunk, sctp moves the new +temp asoc's stream out/in into the old asoc, and later frees this +new temp asoc. + +But now after this move, the new temp asoc's stream->outcnt is not +cleared while stream->out is set to NULL, which would cause a same +crash as the one fixed in Commit 79d0895140e9 ("sctp: fix error +path in sctp_stream_init") when freeing this asoc later. + +This fix is to clear this outcnt in sctp_stream_update. + +Fixes: f952be79cebd ("sctp: introduce struct sctp_stream_out_ext") +Reported-by: Jianwen Ji +Signed-off-by: Xin Long +Acked-by: Neil Horman +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/stream.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/sctp/stream.c ++++ b/net/sctp/stream.c +@@ -240,6 +240,8 @@ void sctp_stream_update(struct sctp_stre + + new->out = NULL; + new->in = NULL; ++ new->outcnt = 0; ++ new->incnt = 0; + } + + static int sctp_send_reconf(struct sctp_association *asoc, diff --git a/queue-4.16/sctp-delay-the-authentication-for-the-duplicated-cookie-echo-chunk.patch b/queue-4.16/sctp-delay-the-authentication-for-the-duplicated-cookie-echo-chunk.patch new file mode 100644 index 00000000000..ccc870c8950 --- /dev/null +++ b/queue-4.16/sctp-delay-the-authentication-for-the-duplicated-cookie-echo-chunk.patch @@ -0,0 +1,247 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Xin Long +Date: Sat, 5 May 2018 14:59:47 +0800 +Subject: sctp: delay the authentication for the duplicated cookie-echo chunk + +From: Xin Long + +[ Upstream commit 59d8d4434f429b4fa8a346fd889058bda427a837 ] + +Now sctp only delays the authentication for the normal cookie-echo +chunk by setting chunk->auth_chunk in sctp_endpoint_bh_rcv(). But +for the duplicated one with auth, in sctp_assoc_bh_rcv(), it does +authentication first based on the old asoc, which will definitely +fail due to the different auth info in the old asoc. + +The duplicated cookie-echo chunk will create a new asoc with the +auth info from this chunk, and the authentication should also be +done with the new asoc's auth info for all of the collision 'A', +'B' and 'D'. Otherwise, the duplicated cookie-echo chunk with auth +will never pass the authentication and create the new connection. + +This issue exists since very beginning, and this fix is to make +sctp_assoc_bh_rcv() follow the way sctp_endpoint_bh_rcv() does +for the normal cookie-echo chunk to delay the authentication. + +While at it, remove the unused params from sctp_sf_authenticate() +and define sctp_auth_chunk_verify() used for all the places that +do the delayed authentication. + +v1->v2: + fix the typo in changelog as Marcelo noticed. + +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: Xin Long +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/associola.c | 30 ++++++++++++++++ + net/sctp/sm_statefuns.c | 86 +++++++++++++++++++++++++----------------------- + 2 files changed, 75 insertions(+), 41 deletions(-) + +--- a/net/sctp/associola.c ++++ b/net/sctp/associola.c +@@ -1024,8 +1024,9 @@ static void sctp_assoc_bh_rcv(struct wor + struct sctp_endpoint *ep; + struct sctp_chunk *chunk; + struct sctp_inq *inqueue; +- int state; ++ int first_time = 1; /* is this the first time through the loop */ + int error = 0; ++ int state; + + /* The association should be held so we should be safe. */ + ep = asoc->ep; +@@ -1036,6 +1037,30 @@ static void sctp_assoc_bh_rcv(struct wor + state = asoc->state; + subtype = SCTP_ST_CHUNK(chunk->chunk_hdr->type); + ++ /* If the first chunk in the packet is AUTH, do special ++ * processing specified in Section 6.3 of SCTP-AUTH spec ++ */ ++ if (first_time && subtype.chunk == SCTP_CID_AUTH) { ++ struct sctp_chunkhdr *next_hdr; ++ ++ next_hdr = sctp_inq_peek(inqueue); ++ if (!next_hdr) ++ goto normal; ++ ++ /* If the next chunk is COOKIE-ECHO, skip the AUTH ++ * chunk while saving a pointer to it so we can do ++ * Authentication later (during cookie-echo ++ * processing). ++ */ ++ if (next_hdr->type == SCTP_CID_COOKIE_ECHO) { ++ chunk->auth_chunk = skb_clone(chunk->skb, ++ GFP_ATOMIC); ++ chunk->auth = 1; ++ continue; ++ } ++ } ++ ++normal: + /* SCTP-AUTH, Section 6.3: + * The receiver has a list of chunk types which it expects + * to be received only after an AUTH-chunk. This list has +@@ -1074,6 +1099,9 @@ static void sctp_assoc_bh_rcv(struct wor + /* If there is an error on chunk, discard this packet. */ + if (error && chunk) + chunk->pdiscard = 1; ++ ++ if (first_time) ++ first_time = 0; + } + sctp_association_put(asoc); + } +--- a/net/sctp/sm_statefuns.c ++++ b/net/sctp/sm_statefuns.c +@@ -153,10 +153,7 @@ static enum sctp_disposition sctp_sf_vio + struct sctp_cmd_seq *commands); + + static enum sctp_ierror sctp_sf_authenticate( +- struct net *net, +- const struct sctp_endpoint *ep, + const struct sctp_association *asoc, +- const union sctp_subtype type, + struct sctp_chunk *chunk); + + static enum sctp_disposition __sctp_sf_do_9_1_abort( +@@ -621,6 +618,38 @@ enum sctp_disposition sctp_sf_do_5_1C_ac + return SCTP_DISPOSITION_CONSUME; + } + ++static bool sctp_auth_chunk_verify(struct net *net, struct sctp_chunk *chunk, ++ const struct sctp_association *asoc) ++{ ++ struct sctp_chunk auth; ++ ++ if (!chunk->auth_chunk) ++ return true; ++ ++ /* SCTP-AUTH: auth_chunk pointer is only set when the cookie-echo ++ * is supposed to be authenticated and we have to do delayed ++ * authentication. We've just recreated the association using ++ * the information in the cookie and now it's much easier to ++ * do the authentication. ++ */ ++ ++ /* Make sure that we and the peer are AUTH capable */ ++ if (!net->sctp.auth_enable || !asoc->peer.auth_capable) ++ return false; ++ ++ /* set-up our fake chunk so that we can process it */ ++ auth.skb = chunk->auth_chunk; ++ auth.asoc = chunk->asoc; ++ auth.sctp_hdr = chunk->sctp_hdr; ++ auth.chunk_hdr = (struct sctp_chunkhdr *) ++ skb_push(chunk->auth_chunk, ++ sizeof(struct sctp_chunkhdr)); ++ skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr)); ++ auth.transport = chunk->transport; ++ ++ return sctp_sf_authenticate(asoc, &auth) == SCTP_IERROR_NO_ERROR; ++} ++ + /* + * Respond to a normal COOKIE ECHO chunk. + * We are the side that is being asked for an association. +@@ -758,37 +787,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce + if (error) + goto nomem_init; + +- /* SCTP-AUTH: auth_chunk pointer is only set when the cookie-echo +- * is supposed to be authenticated and we have to do delayed +- * authentication. We've just recreated the association using +- * the information in the cookie and now it's much easier to +- * do the authentication. +- */ +- if (chunk->auth_chunk) { +- struct sctp_chunk auth; +- enum sctp_ierror ret; +- +- /* Make sure that we and the peer are AUTH capable */ +- if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) { +- sctp_association_free(new_asoc); +- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); +- } +- +- /* set-up our fake chunk so that we can process it */ +- auth.skb = chunk->auth_chunk; +- auth.asoc = chunk->asoc; +- auth.sctp_hdr = chunk->sctp_hdr; +- auth.chunk_hdr = (struct sctp_chunkhdr *) +- skb_push(chunk->auth_chunk, +- sizeof(struct sctp_chunkhdr)); +- skb_pull(chunk->auth_chunk, sizeof(struct sctp_chunkhdr)); +- auth.transport = chunk->transport; +- +- ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth); +- if (ret != SCTP_IERROR_NO_ERROR) { +- sctp_association_free(new_asoc); +- return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); +- } ++ if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) { ++ sctp_association_free(new_asoc); ++ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + } + + repl = sctp_make_cookie_ack(new_asoc, chunk); +@@ -1758,13 +1759,15 @@ static enum sctp_disposition sctp_sf_do_ + GFP_ATOMIC)) + goto nomem; + ++ if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) ++ return SCTP_DISPOSITION_DISCARD; ++ + /* Make sure no new addresses are being added during the + * restart. Though this is a pretty complicated attack + * since you'd have to get inside the cookie. + */ +- if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) { ++ if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) + return SCTP_DISPOSITION_CONSUME; +- } + + /* If the endpoint is in the SHUTDOWN-ACK-SENT state and recognizes + * the peer has restarted (Action A), it MUST NOT setup a new +@@ -1870,6 +1873,9 @@ static enum sctp_disposition sctp_sf_do_ + GFP_ATOMIC)) + goto nomem; + ++ if (!sctp_auth_chunk_verify(net, chunk, new_asoc)) ++ return SCTP_DISPOSITION_DISCARD; ++ + /* Update the content of current association. */ + sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, +@@ -1964,6 +1970,9 @@ static enum sctp_disposition sctp_sf_do_ + * a COOKIE ACK. + */ + ++ if (!sctp_auth_chunk_verify(net, chunk, asoc)) ++ return SCTP_DISPOSITION_DISCARD; ++ + /* Don't accidentally move back into established state. */ + if (asoc->state < SCTP_STATE_ESTABLISHED) { + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, +@@ -4108,10 +4117,7 @@ gen_shutdown: + * The return value is the disposition of the chunk. + */ + static enum sctp_ierror sctp_sf_authenticate( +- struct net *net, +- const struct sctp_endpoint *ep, + const struct sctp_association *asoc, +- const union sctp_subtype type, + struct sctp_chunk *chunk) + { + struct sctp_authhdr *auth_hdr; +@@ -4209,7 +4215,7 @@ enum sctp_disposition sctp_sf_eat_auth(s + commands); + + auth_hdr = (struct sctp_authhdr *)chunk->skb->data; +- error = sctp_sf_authenticate(net, ep, asoc, type, chunk); ++ error = sctp_sf_authenticate(asoc, chunk); + switch (error) { + case SCTP_IERROR_AUTH_BAD_HMAC: + /* Generate the ERROR chunk and discard the rest diff --git a/queue-4.16/sctp-fix-the-issue-that-the-cookie-ack-with-auth-can-t-get-processed.patch b/queue-4.16/sctp-fix-the-issue-that-the-cookie-ack-with-auth-can-t-get-processed.patch new file mode 100644 index 00000000000..add0eed1534 --- /dev/null +++ b/queue-4.16/sctp-fix-the-issue-that-the-cookie-ack-with-auth-can-t-get-processed.patch @@ -0,0 +1,42 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Xin Long +Date: Wed, 2 May 2018 13:45:12 +0800 +Subject: sctp: fix the issue that the cookie-ack with auth can't get processed + +From: Xin Long + +[ Upstream commit ce402f044e4e432c296f90eaabb8dbe8f3624391 ] + +When auth is enabled for cookie-ack chunk, in sctp_inq_pop, sctp +processes auth chunk first, then continues to the next chunk in +this packet if chunk_end + chunk_hdr size < skb_tail_pointer(). +Otherwise, it will go to the next packet or discard this chunk. + +However, it missed the fact that cookie-ack chunk's size is equal +to chunk_hdr size, which couldn't match that check, and thus this +chunk would not get processed. + +This patch fixes it by changing the check to chunk_end + chunk_hdr +size <= skb_tail_pointer(). + +Fixes: 26b87c788100 ("net: sctp: fix remote memory pressure from excessive queueing") +Signed-off-by: Xin Long +Acked-by: Neil Horman +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/inqueue.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sctp/inqueue.c ++++ b/net/sctp/inqueue.c +@@ -217,7 +217,7 @@ new_skb: + skb_pull(chunk->skb, sizeof(*ch)); + chunk->subh.v = NULL; /* Subheader is no longer valid. */ + +- if (chunk->chunk_end + sizeof(*ch) < skb_tail_pointer(chunk->skb)) { ++ if (chunk->chunk_end + sizeof(*ch) <= skb_tail_pointer(chunk->skb)) { + /* This is not a singleton */ + chunk->singleton = 0; + } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) { diff --git a/queue-4.16/sctp-handle-two-v4-addrs-comparison-in-sctp_inet6_cmp_addr.patch b/queue-4.16/sctp-handle-two-v4-addrs-comparison-in-sctp_inet6_cmp_addr.patch new file mode 100644 index 00000000000..3df646a1dcb --- /dev/null +++ b/queue-4.16/sctp-handle-two-v4-addrs-comparison-in-sctp_inet6_cmp_addr.patch @@ -0,0 +1,47 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Xin Long +Date: Thu, 26 Apr 2018 14:13:57 +0800 +Subject: sctp: handle two v4 addrs comparison in sctp_inet6_cmp_addr + +From: Xin Long + +[ Upstream commit d625329b06e46bd20baf9ee40847d11982569204 ] + +Since sctp ipv6 socket also supports v4 addrs, it's possible to +compare two v4 addrs in pf v6 .cmp_addr, sctp_inet6_cmp_addr. + +However after Commit 1071ec9d453a ("sctp: do not check port in +sctp_inet6_cmp_addr"), it no longer calls af1->cmp_addr, which +in this case is sctp_v4_cmp_addr, but calls __sctp_v6_cmp_addr +where it handles them as two v6 addrs. It would cause a out of +bounds crash. + +syzbot found this crash when trying to bind two v4 addrs to a +v6 socket. + +This patch fixes it by adding the process for two v4 addrs in +sctp_inet6_cmp_addr. + +Fixes: 1071ec9d453a ("sctp: do not check port in sctp_inet6_cmp_addr") +Reported-by: syzbot+cd494c1dd681d4d93ebb@syzkaller.appspotmail.com +Signed-off-by: Xin Long +Acked-by: Neil Horman +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/ipv6.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/sctp/ipv6.c ++++ b/net/sctp/ipv6.c +@@ -866,6 +866,9 @@ static int sctp_inet6_cmp_addr(const uni + if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2)) + return 1; + ++ if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET) ++ return addr1->v4.sin_addr.s_addr == addr2->v4.sin_addr.s_addr; ++ + return __sctp_v6_cmp_addr(addr1, addr2); + } + diff --git a/queue-4.16/sctp-remove-sctp_chunk_put-from-fail_mark-err-path-in-sctp_ulpevent_make_rcvmsg.patch b/queue-4.16/sctp-remove-sctp_chunk_put-from-fail_mark-err-path-in-sctp_ulpevent_make_rcvmsg.patch new file mode 100644 index 00000000000..2a5bfe94318 --- /dev/null +++ b/queue-4.16/sctp-remove-sctp_chunk_put-from-fail_mark-err-path-in-sctp_ulpevent_make_rcvmsg.patch @@ -0,0 +1,40 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Xin Long +Date: Thu, 10 May 2018 17:34:13 +0800 +Subject: sctp: remove sctp_chunk_put from fail_mark err path in sctp_ulpevent_make_rcvmsg + +From: Xin Long + +[ Upstream commit 6910e25de2257e2c82c7a2d126e3463cd8e50810 ] + +In Commit 1f45f78f8e51 ("sctp: allow GSO frags to access the chunk too"), +it held the chunk in sctp_ulpevent_make_rcvmsg to access it safely later +in recvmsg. However, it also added sctp_chunk_put in fail_mark err path, +which is only triggered before holding the chunk. + +syzbot reported a use-after-free crash happened on this err path, where +it shouldn't call sctp_chunk_put. + +This patch simply removes this call. + +Fixes: 1f45f78f8e51 ("sctp: allow GSO frags to access the chunk too") +Reported-by: syzbot+141d898c5f24489db4aa@syzkaller.appspotmail.com +Signed-off-by: Xin Long +Acked-by: Neil Horman +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/ulpevent.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/net/sctp/ulpevent.c ++++ b/net/sctp/ulpevent.c +@@ -715,7 +715,6 @@ struct sctp_ulpevent *sctp_ulpevent_make + return event; + + fail_mark: +- sctp_chunk_put(chunk); + kfree_skb(skb); + fail: + return NULL; diff --git a/queue-4.16/sctp-use-the-old-asoc-when-making-the-cookie-ack-chunk-in-dupcook_d.patch b/queue-4.16/sctp-use-the-old-asoc-when-making-the-cookie-ack-chunk-in-dupcook_d.patch new file mode 100644 index 00000000000..1f04245d144 --- /dev/null +++ b/queue-4.16/sctp-use-the-old-asoc-when-making-the-cookie-ack-chunk-in-dupcook_d.patch @@ -0,0 +1,40 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Xin Long +Date: Wed, 2 May 2018 13:39:46 +0800 +Subject: sctp: use the old asoc when making the cookie-ack chunk in dupcook_d + +From: Xin Long + +[ Upstream commit 46e16d4b956867013e0bbd7f2bad206f4aa55752 ] + +When processing a duplicate cookie-echo chunk, for case 'D', sctp will +not process the param from this chunk. It means old asoc has nothing +to be updated, and the new temp asoc doesn't have the complete info. + +So there's no reason to use the new asoc when creating the cookie-ack +chunk. Otherwise, like when auth is enabled for cookie-ack, the chunk +can not be set with auth, and it will definitely be dropped by peer. + +This issue is there since very beginning, and we fix it by using the +old asoc instead. + +Signed-off-by: Xin Long +Acked-by: Neil Horman +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/sm_statefuns.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sctp/sm_statefuns.c ++++ b/net/sctp/sm_statefuns.c +@@ -2012,7 +2012,7 @@ static enum sctp_disposition sctp_sf_do_ + } + } + +- repl = sctp_make_cookie_ack(new_asoc, chunk); ++ repl = sctp_make_cookie_ack(asoc, chunk); + if (!repl) + goto nomem; + diff --git a/queue-4.16/tcp-ignore-fast-open-on-repair-mode.patch b/queue-4.16/tcp-ignore-fast-open-on-repair-mode.patch new file mode 100644 index 00000000000..17ab6a41da8 --- /dev/null +++ b/queue-4.16/tcp-ignore-fast-open-on-repair-mode.patch @@ -0,0 +1,46 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Yuchung Cheng +Date: Wed, 25 Apr 2018 11:33:08 -0700 +Subject: tcp: ignore Fast Open on repair mode + +From: Yuchung Cheng + +[ Upstream commit 16ae6aa1705299789f71fdea59bfb119c1fbd9c0 ] + +The TCP repair sequence of operation is to first set the socket in +repair mode, then inject the TCP stats into the socket with repair +socket options, then call connect() to re-activate the socket. The +connect syscall simply returns and set state to ESTABLISHED +mode. As a result Fast Open is meaningless for TCP repair. + +However allowing sendto() system call with MSG_FASTOPEN flag half-way +during the repair operation could unexpectedly cause data to be +sent, before the operation finishes changing the internal TCP stats +(e.g. MSS). This in turn triggers TCP warnings on inconsistent +packet accounting. + +The fix is to simply disallow Fast Open operation once the socket +is in the repair mode. + +Reported-by: syzbot +Signed-off-by: Yuchung Cheng +Reviewed-by: Neal Cardwell +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -1210,7 +1210,8 @@ int tcp_sendmsg_locked(struct sock *sk, + uarg->zerocopy = 0; + } + +- if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) { ++ if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) && ++ !tp->repair) { + err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); + if (err == -EINPROGRESS && copied_syn > 0) + goto out; diff --git a/queue-4.16/tcp-restore-autocorking.patch b/queue-4.16/tcp-restore-autocorking.patch new file mode 100644 index 00000000000..c421b9592fc --- /dev/null +++ b/queue-4.16/tcp-restore-autocorking.patch @@ -0,0 +1,86 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Eric Dumazet +Date: Wed, 2 May 2018 20:25:13 -0700 +Subject: tcp: restore autocorking + +From: Eric Dumazet + +[ Upstream commit 114f39feab360e6c7b0c4238697f223444d662a1 ] + +When adding rb-tree for TCP retransmit queue, we inadvertently broke +TCP autocorking. + +tcp_should_autocork() should really check if the rtx queue is not empty. + +Tested: + +Before the fix : +$ nstat -n;./netperf -H 10.246.7.152 -Cc -- -m 500;nstat | grep AutoCork +MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.246.7.152 () port 0 AF_INET +Recv Send Send Utilization Service Demand +Socket Socket Message Elapsed Send Recv Send Recv +Size Size Size Time Throughput local remote local remote +bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB + +540000 262144 500 10.00 2682.85 2.47 1.59 3.618 2.329 +TcpExtTCPAutoCorking 33 0.0 + +// Same test, but forcing TCP_NODELAY +$ nstat -n;./netperf -H 10.246.7.152 -Cc -- -D -m 500;nstat | grep AutoCork +MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.246.7.152 () port 0 AF_INET : nodelay +Recv Send Send Utilization Service Demand +Socket Socket Message Elapsed Send Recv Send Recv +Size Size Size Time Throughput local remote local remote +bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB + +540000 262144 500 10.00 1408.75 2.44 2.96 6.802 8.259 +TcpExtTCPAutoCorking 1 0.0 + +After the fix : +$ nstat -n;./netperf -H 10.246.7.152 -Cc -- -m 500;nstat | grep AutoCork +MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.246.7.152 () port 0 AF_INET +Recv Send Send Utilization Service Demand +Socket Socket Message Elapsed Send Recv Send Recv +Size Size Size Time Throughput local remote local remote +bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB + +540000 262144 500 10.00 5472.46 2.45 1.43 1.761 1.027 +TcpExtTCPAutoCorking 361293 0.0 + +// With TCP_NODELAY option +$ nstat -n;./netperf -H 10.246.7.152 -Cc -- -D -m 500;nstat | grep AutoCork +MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.246.7.152 () port 0 AF_INET : nodelay +Recv Send Send Utilization Service Demand +Socket Socket Message Elapsed Send Recv Send Recv +Size Size Size Time Throughput local remote local remote +bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB + +540000 262144 500 10.00 5454.96 2.46 1.63 1.775 1.174 +TcpExtTCPAutoCorking 315448 0.0 + +Fixes: 75c119afe14f ("tcp: implement rb-tree based retransmit queue") +Signed-off-by: Eric Dumazet +Reported-by: Michael Wenig +Tested-by: Michael Wenig +Signed-off-by: Eric Dumazet +Reported-by: Michael Wenig +Tested-by: Michael Wenig +Acked-by: Neal Cardwell +Acked-by: Soheil Hassas Yeganeh +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -688,7 +688,7 @@ static bool tcp_should_autocork(struct s + { + return skb->len < size_goal && + sock_net(sk)->ipv4.sysctl_tcp_autocorking && +- skb != tcp_write_queue_head(sk) && ++ !tcp_rtx_queue_empty(sk) && + refcount_read(&sk->sk_wmem_alloc) > skb->truesize; + } + diff --git a/queue-4.16/tcp_bbr-fix-to-zero-idle_restart-only-upon-s-acked-data.patch b/queue-4.16/tcp_bbr-fix-to-zero-idle_restart-only-upon-s-acked-data.patch new file mode 100644 index 00000000000..2357e2be51e --- /dev/null +++ b/queue-4.16/tcp_bbr-fix-to-zero-idle_restart-only-upon-s-acked-data.patch @@ -0,0 +1,48 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Neal Cardwell +Date: Tue, 1 May 2018 21:45:41 -0400 +Subject: tcp_bbr: fix to zero idle_restart only upon S/ACKed data + +From: Neal Cardwell + +[ Upstream commit e6e6a278b1eaffa19d42186bfacd1ffc15a50b3f ] + +Previously the bbr->idle_restart tracking was zeroing out the +bbr->idle_restart bit upon ACKs that did not SACK or ACK anything, +e.g. receiving incoming data or receiver window updates. In such +situations BBR would forget that this was a restart-from-idle +situation, and if the min_rtt had expired it would unnecessarily enter +PROBE_RTT (even though we were actually restarting from idle but had +merely forgotten that fact). + +The fix is simple: we need to remember we are restarting from idle +until we receive a S/ACK for some data (a S/ACK for the first flight +of data we send as we are restarting). + +This commit is a stable candidate for kernels back as far as 4.9. + +Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") +Signed-off-by: Neal Cardwell +Signed-off-by: Yuchung Cheng +Signed-off-by: Soheil Hassas Yeganeh +Signed-off-by: Priyaranjan Jha +Signed-off-by: Yousuk Seung +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_bbr.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/ipv4/tcp_bbr.c ++++ b/net/ipv4/tcp_bbr.c +@@ -803,7 +803,9 @@ static void bbr_update_min_rtt(struct so + } + } + } +- bbr->idle_restart = 0; ++ /* Restart after idle ends only once we process a new S/ACK for data */ ++ if (rs->delivered > 0) ++ bbr->idle_restart = 0; + } + + static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) diff --git a/queue-4.16/tg3-fix-vunmap-bug_on-triggered-from-tg3_free_consistent.patch b/queue-4.16/tg3-fix-vunmap-bug_on-triggered-from-tg3_free_consistent.patch new file mode 100644 index 00000000000..720bbe53c76 --- /dev/null +++ b/queue-4.16/tg3-fix-vunmap-bug_on-triggered-from-tg3_free_consistent.patch @@ -0,0 +1,57 @@ +From foo@baz Wed May 16 10:37:27 CEST 2018 +From: Michael Chan +Date: Thu, 3 May 2018 20:04:27 -0400 +Subject: tg3: Fix vunmap() BUG_ON() triggered from tg3_free_consistent(). + +From: Michael Chan + +[ Upstream commit d89a2adb8bfe6f8949ff389acdb9fa298b6e8e12 ] + +tg3_free_consistent() calls dma_free_coherent() to free tp->hw_stats +under spinlock and can trigger BUG_ON() in vunmap() because vunmap() +may sleep. Fix it by removing the spinlock and relying on the +TG3_FLAG_INIT_COMPLETE flag to prevent race conditions between +tg3_get_stats64() and tg3_free_consistent(). TG3_FLAG_INIT_COMPLETE +is always cleared under tp->lock before tg3_free_consistent() +and therefore tg3_get_stats64() can safely access tp->hw_stats +under tp->lock if TG3_FLAG_INIT_COMPLETE is set. + +Fixes: f5992b72ebe0 ("tg3: Fix race condition in tg3_get_stats64().") +Reported-by: Zumeng Chen +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/broadcom/tg3.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/drivers/net/ethernet/broadcom/tg3.c ++++ b/drivers/net/ethernet/broadcom/tg3.c +@@ -8733,14 +8733,15 @@ static void tg3_free_consistent(struct t + tg3_mem_rx_release(tp); + tg3_mem_tx_release(tp); + +- /* Protect tg3_get_stats64() from reading freed tp->hw_stats. */ +- tg3_full_lock(tp, 0); ++ /* tp->hw_stats can be referenced safely: ++ * 1. under rtnl_lock ++ * 2. or under tp->lock if TG3_FLAG_INIT_COMPLETE is set. ++ */ + if (tp->hw_stats) { + dma_free_coherent(&tp->pdev->dev, sizeof(struct tg3_hw_stats), + tp->hw_stats, tp->stats_mapping); + tp->hw_stats = NULL; + } +- tg3_full_unlock(tp); + } + + /* +@@ -14178,7 +14179,7 @@ static void tg3_get_stats64(struct net_d + struct tg3 *tp = netdev_priv(dev); + + spin_lock_bh(&tp->lock); +- if (!tp->hw_stats) { ++ if (!tp->hw_stats || !tg3_flag(tp, INIT_COMPLETE)) { + *stats = tp->net_stats_prev; + spin_unlock_bh(&tp->lock); + return; diff --git a/queue-4.16/tipc-fix-one-byte-leak-in-tipc_sk_set_orig_addr.patch b/queue-4.16/tipc-fix-one-byte-leak-in-tipc_sk_set_orig_addr.patch new file mode 100644 index 00000000000..4910e520b72 --- /dev/null +++ b/queue-4.16/tipc-fix-one-byte-leak-in-tipc_sk_set_orig_addr.patch @@ -0,0 +1,87 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Eric Dumazet +Date: Wed, 9 May 2018 09:50:22 -0700 +Subject: tipc: fix one byte leak in tipc_sk_set_orig_addr() + +From: Eric Dumazet + +[ Upstream commit 09c8b9718a7af674036643fa2e0dbb2f09aba75e ] + +sysbot/KMSAN reported an uninit-value in recvmsg() that +I tracked down to tipc_sk_set_orig_addr(), missing +srcaddr->member.scope initialization. + +This patches moves srcaddr->sock.scope init to follow +fields order and ease future verifications. + +BUG: KMSAN: uninit-value in copy_to_user include/linux/uaccess.h:184 [inline] +BUG: KMSAN: uninit-value in move_addr_to_user+0x32e/0x530 net/socket.c:226 +CPU: 0 PID: 4549 Comm: syz-executor287 Not tainted 4.17.0-rc3+ #88 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:77 [inline] + dump_stack+0x185/0x1d0 lib/dump_stack.c:113 + kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 + kmsan_internal_check_memory+0x135/0x1e0 mm/kmsan/kmsan.c:1157 + kmsan_copy_to_user+0x69/0x160 mm/kmsan/kmsan.c:1199 + copy_to_user include/linux/uaccess.h:184 [inline] + move_addr_to_user+0x32e/0x530 net/socket.c:226 + ___sys_recvmsg+0x4e2/0x810 net/socket.c:2285 + __sys_recvmsg net/socket.c:2328 [inline] + __do_sys_recvmsg net/socket.c:2338 [inline] + __se_sys_recvmsg net/socket.c:2335 [inline] + __x64_sys_recvmsg+0x325/0x460 net/socket.c:2335 + do_syscall_64+0x154/0x220 arch/x86/entry/common.c:287 + entry_SYSCALL_64_after_hwframe+0x44/0xa9 +RIP: 0033:0x4455e9 +RSP: 002b:00007fe3bd36ddb8 EFLAGS: 00000246 ORIG_RAX: 000000000000002f +RAX: ffffffffffffffda RBX: 00000000006dac24 RCX: 00000000004455e9 +RDX: 0000000000002002 RSI: 0000000020000400 RDI: 0000000000000003 +RBP: 00000000006dac20 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 +R13: 00007fff98ce4b6f R14: 00007fe3bd36e9c0 R15: 0000000000000003 + +Local variable description: ----addr@___sys_recvmsg +Variable was created at: + ___sys_recvmsg+0xd5/0x810 net/socket.c:2246 + __sys_recvmsg net/socket.c:2328 [inline] + __do_sys_recvmsg net/socket.c:2338 [inline] + __se_sys_recvmsg net/socket.c:2335 [inline] + __x64_sys_recvmsg+0x325/0x460 net/socket.c:2335 + +Byte 19 of 32 is uninitialized + +Fixes: 31c82a2d9d51 ("tipc: add second source address to recvmsg()/recvfrom()") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Cc: Jon Maloy +Cc: Ying Xue +Acked-by: Jon Maloy +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/socket.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/tipc/socket.c ++++ b/net/tipc/socket.c +@@ -1518,10 +1518,10 @@ static void tipc_sk_set_orig_addr(struct + + srcaddr->sock.family = AF_TIPC; + srcaddr->sock.addrtype = TIPC_ADDR_ID; ++ srcaddr->sock.scope = 0; + srcaddr->sock.addr.id.ref = msg_origport(hdr); + srcaddr->sock.addr.id.node = msg_orignode(hdr); + srcaddr->sock.addr.name.domain = 0; +- srcaddr->sock.scope = 0; + m->msg_namelen = sizeof(struct sockaddr_tipc); + + if (!msg_in_group(hdr)) +@@ -1530,6 +1530,7 @@ static void tipc_sk_set_orig_addr(struct + /* Group message users may also want to know sending member's id */ + srcaddr->member.family = AF_TIPC; + srcaddr->member.addrtype = TIPC_ADDR_NAME; ++ srcaddr->member.scope = 0; + srcaddr->member.addr.name.name.type = msg_nametype(hdr); + srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member; + srcaddr->member.addr.name.domain = 0; diff --git a/queue-4.16/udp-fix-so_bindtodevice.patch b/queue-4.16/udp-fix-so_bindtodevice.patch new file mode 100644 index 00000000000..283e9b48a6e --- /dev/null +++ b/queue-4.16/udp-fix-so_bindtodevice.patch @@ -0,0 +1,58 @@ +From foo@baz Wed May 16 10:37:28 CEST 2018 +From: Paolo Abeni +Date: Wed, 9 May 2018 12:42:34 +0200 +Subject: udp: fix SO_BINDTODEVICE + +From: Paolo Abeni + +[ Upstream commit 69678bcd4d2dedbc3e8fcd6d7d99f283d83c531a ] + +Damir reported a breakage of SO_BINDTODEVICE for UDP sockets. +In absence of VRF devices, after commit fb74c27735f0 ("net: +ipv4: add second dif to udp socket lookups") the dif mismatch +isn't fatal anymore for UDP socket lookup with non null +sk_bound_dev_if, breaking SO_BINDTODEVICE semantics. + +This changeset addresses the issue making the dif match mandatory +again in the above scenario. + +Reported-by: Damir Mansurov +Fixes: fb74c27735f0 ("net: ipv4: add second dif to udp socket lookups") +Fixes: 1801b570dd2a ("net: ipv6: add second dif to udp socket lookups") +Signed-off-by: Paolo Abeni +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/udp.c | 4 ++-- + net/ipv6/udp.c | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -407,9 +407,9 @@ static int compute_score(struct sock *sk + bool dev_match = (sk->sk_bound_dev_if == dif || + sk->sk_bound_dev_if == sdif); + +- if (exact_dif && !dev_match) ++ if (!dev_match) + return -1; +- if (sk->sk_bound_dev_if && dev_match) ++ if (sk->sk_bound_dev_if) + score += 4; + } + +--- a/net/ipv6/udp.c ++++ b/net/ipv6/udp.c +@@ -148,9 +148,9 @@ static int compute_score(struct sock *sk + bool dev_match = (sk->sk_bound_dev_if == dif || + sk->sk_bound_dev_if == sdif); + +- if (exact_dif && !dev_match) ++ if (!dev_match) + return -1; +- if (sk->sk_bound_dev_if && dev_match) ++ if (sk->sk_bound_dev_if) + score++; + } +