From: Greg Kroah-Hartman Date: Sun, 28 Jan 2018 16:38:52 +0000 (+0100) Subject: 4.14-stable patches X-Git-Tag: v4.4.114~17 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=68910bfc71a90411f44353259e34ef697cfe6ddb;p=thirdparty%2Fkernel%2Fstable-queue.git 4.14-stable patches added patches: be2net-restore-properly-promisc-mode-after-queues-reconfiguration.patch dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch flow_dissector-properly-cap-thoff-field.patch gso-validate-gso_type-in-gso-handlers.patch ip6_gre-init-dev-mtu-and-dev-hard_header_len-correctly.patch ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch ipv6-fix-getsockopt-for-sockets-with-default-ipv6_autoflowlabel.patch ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch ipv6-ip6_make_skb-needs-to-clear-cork.base.dst.patch lan78xx-fix-failure-in-usb-full-speed.patch mlxsw-spectrum_router-don-t-log-an-error-on-missing-neighbor.patch net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch net-ib-mlx5-don-t-disable-local-loopback-multicast-traffic-when-needed.patch net-igmp-fix-source-address-check-for-igmpv3-reports.patch net-ipv4-make-ip-route-get-match-iif-lo-rules-again.patch net-mlx5-fix-get-vector-affinity-helper-function.patch net-mlx5e-fix-fixpoint-divide-exception-in-mlx5e_am_stats_compare.patch net-qdisc_pkt_len_init-should-be-more-robust.patch net-tcp-close-sock-if-net-namespace-is-exiting.patch net-tls-fix-inverted-error-codes-to-avoid-endless-loop.patch net-tls-only-attach-to-sockets-in-established-state.patch net-vrf-add-support-for-sends-to-local-broadcast-address.patch netlink-extack-needs-to-be-reset-each-time-through-loop.patch netlink-reset-extack-earlier-in-netlink_rcv_skb.patch nfp-use-the-correct-index-for-link-speed-table.patch ppp-unlock-all_ppp_mutex-before-registering-device.patch pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch r8169-fix-memory-corruption-on-retrieval-of-hardware-statistics.patch sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch sctp-reinit-stream-if-stream-outcnt-has-been-change-by-sinit-in-sendmsg.patch sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch tipc-fix-a-memory-leak-in-tipc_nl_node_get_link.patch tls-fix-sw_ctx-leak.patch tls-reset-crypto_info-when-do_tls_setsockopt_tx-fails.patch tls-return-ebusy-if-crypto_info-is-already-set.patch tun-fix-a-memory-leak-for-tfile-tx_array.patch vmxnet3-repair-memory-leak.patch --- diff --git a/queue-4.14/be2net-restore-properly-promisc-mode-after-queues-reconfiguration.patch b/queue-4.14/be2net-restore-properly-promisc-mode-after-queues-reconfiguration.patch new file mode 100644 index 00000000000..5da2914ae89 --- /dev/null +++ b/queue-4.14/be2net-restore-properly-promisc-mode-after-queues-reconfiguration.patch @@ -0,0 +1,51 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Ivan Vecera +Date: Fri, 19 Jan 2018 20:23:50 +0100 +Subject: be2net: restore properly promisc mode after queues reconfiguration + +From: Ivan Vecera + + +[ Upstream commit 52acf06451930eb4cefabd5ecea56e2d46c32f76 ] + +The commit 622190669403 ("be2net: Request RSS capability of Rx interface +depending on number of Rx rings") modified be_update_queues() so the +IFACE (HW representation of the netdevice) is destroyed and then +re-created. This causes a regression because potential promiscuous mode +is not restored properly during be_open() because the driver thinks +that the HW has promiscuous mode already enabled. + +Note that Lancer is not affected by this bug because RX-filter flags are +disabled during be_close() for this chipset. + +Cc: Sathya Perla +Cc: Ajit Khaparde +Cc: Sriharsha Basavapatna +Cc: Somnath Kotur + +Fixes: 622190669403 ("be2net: Request RSS capability of Rx interface depending on number of Rx rings") +Signed-off-by: Ivan Vecera +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/emulex/benet/be_main.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/drivers/net/ethernet/emulex/benet/be_main.c ++++ b/drivers/net/ethernet/emulex/benet/be_main.c +@@ -4634,6 +4634,15 @@ int be_update_queues(struct be_adapter * + + be_schedule_worker(adapter); + ++ /* ++ * The IF was destroyed and re-created. We need to clear ++ * all promiscuous flags valid for the destroyed IF. ++ * Without this promisc mode is not restored during ++ * be_open() because the driver thinks that it is ++ * already enabled in HW. ++ */ ++ adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS; ++ + if (netif_running(netdev)) + status = be_open(netdev); + diff --git a/queue-4.14/dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch b/queue-4.14/dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch new file mode 100644 index 00000000000..f748c0a8ea7 --- /dev/null +++ b/queue-4.14/dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch @@ -0,0 +1,44 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Alexey Kodanev +Date: Fri, 26 Jan 2018 15:14:16 +0300 +Subject: dccp: don't restart ccid2_hc_tx_rto_expire() if sk in closed state + +From: Alexey Kodanev + + +[ Upstream commit dd5684ecae3bd8e44b644f50e2c12c7e57fdfef5 ] + +ccid2_hc_tx_rto_expire() timer callback always restarts the timer +again and can run indefinitely (unless it is stopped outside), and after +commit 120e9dabaf55 ("dccp: defer ccid_hc_tx_delete() at dismantle time"), +which moved ccid_hc_tx_delete() (also includes sk_stop_timer()) from +dccp_destroy_sock() to sk_destruct(), this started to happen quite often. +The timer prevents releasing the socket, as a result, sk_destruct() won't +be called. + +Found with LTP/dccp_ipsec tests running on the bonding device, +which later couldn't be unloaded after the tests were completed: + + unregister_netdevice: waiting for bond0 to become free. Usage count = 148 + +Fixes: 2a91aa396739 ("[DCCP] CCID2: Initial CCID2 (TCP-Like) implementation") +Signed-off-by: Alexey Kodanev +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ccids/ccid2.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/dccp/ccids/ccid2.c ++++ b/net/dccp/ccids/ccid2.c +@@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(unsig + + ccid2_pr_debug("RTO_EXPIRE\n"); + ++ if (sk->sk_state == DCCP_CLOSED) ++ goto out; ++ + /* back-off timer */ + hc->tx_rto <<= 1; + if (hc->tx_rto > DCCP_RTO_MAX) diff --git a/queue-4.14/flow_dissector-properly-cap-thoff-field.patch b/queue-4.14/flow_dissector-properly-cap-thoff-field.patch new file mode 100644 index 00000000000..198a3bf4e5f --- /dev/null +++ b/queue-4.14/flow_dissector-properly-cap-thoff-field.patch @@ -0,0 +1,95 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Eric Dumazet +Date: Wed, 17 Jan 2018 14:21:13 -0800 +Subject: flow_dissector: properly cap thoff field + +From: Eric Dumazet + + +[ Upstream commit d0c081b49137cd3200f2023c0875723be66e7ce5 ] + +syzbot reported yet another crash [1] that is caused by +insufficient validation of DODGY packets. + +Two bugs are happening here to trigger the crash. + +1) Flow dissection leaves with incorrect thoff field. + +2) skb_probe_transport_header() sets transport header to this invalid +thoff, even if pointing after skb valid data. + +3) qdisc_pkt_len_init() reads out-of-bound data because it +trusts tcp_hdrlen(skb) + +Possible fixes : + +- Full flow dissector validation before injecting bad DODGY packets in +the stack. + This approach was attempted here : https://patchwork.ozlabs.org/patch/ +861874/ + +- Have more robust functions in the core. + This might be needed anyway for stable versions. + +This patch fixes the flow dissection issue. + +[1] +CPU: 1 PID: 3144 Comm: syzkaller271204 Not tainted 4.15.0-rc4-mm1+ #49 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:17 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:53 + print_address_description+0x73/0x250 mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:355 [inline] + kasan_report+0x23b/0x360 mm/kasan/report.c:413 + __asan_report_load2_noabort+0x14/0x20 mm/kasan/report.c:432 + __tcp_hdrlen include/linux/tcp.h:35 [inline] + tcp_hdrlen include/linux/tcp.h:40 [inline] + qdisc_pkt_len_init net/core/dev.c:3160 [inline] + __dev_queue_xmit+0x20d3/0x2200 net/core/dev.c:3465 + dev_queue_xmit+0x17/0x20 net/core/dev.c:3554 + packet_snd net/packet/af_packet.c:2943 [inline] + packet_sendmsg+0x3ad5/0x60a0 net/packet/af_packet.c:2968 + sock_sendmsg_nosec net/socket.c:628 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:638 + sock_write_iter+0x31a/0x5d0 net/socket.c:907 + call_write_iter include/linux/fs.h:1776 [inline] + new_sync_write fs/read_write.c:469 [inline] + __vfs_write+0x684/0x970 fs/read_write.c:482 + vfs_write+0x189/0x510 fs/read_write.c:544 + SYSC_write fs/read_write.c:589 [inline] + SyS_write+0xef/0x220 fs/read_write.c:581 + entry_SYSCALL_64_fastpath+0x1f/0x96 + +Fixes: 34fad54c2537 ("net: __skb_flow_dissect() must cap its return value") +Fixes: a6e544b0a88b ("flow_dissector: Jump to exit code in __skb_flow_dissect") +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Reported-by: syzbot +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/flow_dissector.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/core/flow_dissector.c ++++ b/net/core/flow_dissector.c +@@ -876,8 +876,8 @@ ip_proto_again: + out_good: + ret = true; + +- key_control->thoff = (u16)nhoff; + out: ++ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; + +@@ -885,7 +885,6 @@ out: + + out_bad: + ret = false; +- key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); + goto out; + } + EXPORT_SYMBOL(__skb_flow_dissect); diff --git a/queue-4.14/gso-validate-gso_type-in-gso-handlers.patch b/queue-4.14/gso-validate-gso_type-in-gso-handlers.patch new file mode 100644 index 00000000000..3ca962b5245 --- /dev/null +++ b/queue-4.14/gso-validate-gso_type-in-gso-handlers.patch @@ -0,0 +1,121 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Willem de Bruijn +Date: Fri, 19 Jan 2018 09:29:18 -0500 +Subject: gso: validate gso_type in GSO handlers + +From: Willem de Bruijn + + +[ Upstream commit 121d57af308d0cf943f08f4738d24d3966c38cd9 ] + +Validate gso_type during segmentation as SKB_GSO_DODGY sources +may pass packets where the gso_type does not match the contents. + +Syzkaller was able to enter the SCTP gso handler with a packet of +gso_type SKB_GSO_TCPV4. + +On entry of transport layer gso handlers, verify that the gso_type +matches the transport protocol. + +Fixes: 90017accff61 ("sctp: Add GSO support") +Link: http://lkml.kernel.org/r/<001a1137452496ffc305617e5fe0@google.com> +Reported-by: syzbot+fee64147a25aecd48055@syzkaller.appspotmail.com +Signed-off-by: Willem de Bruijn +Acked-by: Jason Wang +Reviewed-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/esp4_offload.c | 3 +++ + net/ipv4/tcp_offload.c | 3 +++ + net/ipv4/udp_offload.c | 3 +++ + net/ipv6/esp6_offload.c | 3 +++ + net/ipv6/tcpv6_offload.c | 3 +++ + net/ipv6/udp_offload.c | 3 +++ + net/sctp/offload.c | 3 +++ + 7 files changed, 21 insertions(+) + +--- a/net/ipv4/esp4_offload.c ++++ b/net/ipv4/esp4_offload.c +@@ -121,6 +121,9 @@ static struct sk_buff *esp4_gso_segment( + if (!xo) + goto out; + ++ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) ++ goto out; ++ + seq = xo->seq.low; + + x = skb->sp->xvec[skb->sp->len - 1]; +--- a/net/ipv4/tcp_offload.c ++++ b/net/ipv4/tcp_offload.c +@@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buf + static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, + netdev_features_t features) + { ++ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)) ++ return ERR_PTR(-EINVAL); ++ + if (!pskb_may_pull(skb, sizeof(struct tcphdr))) + return ERR_PTR(-EINVAL); + +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -203,6 +203,9 @@ static struct sk_buff *udp4_ufo_fragment + goto out; + } + ++ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) ++ goto out; ++ + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + +--- a/net/ipv6/esp6_offload.c ++++ b/net/ipv6/esp6_offload.c +@@ -148,6 +148,9 @@ static struct sk_buff *esp6_gso_segment( + if (!xo) + goto out; + ++ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) ++ goto out; ++ + seq = xo->seq.low; + + x = skb->sp->xvec[skb->sp->len - 1]; +--- a/net/ipv6/tcpv6_offload.c ++++ b/net/ipv6/tcpv6_offload.c +@@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment( + { + struct tcphdr *th; + ++ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) ++ return ERR_PTR(-EINVAL); ++ + if (!pskb_may_pull(skb, sizeof(*th))) + return ERR_PTR(-EINVAL); + +--- a/net/ipv6/udp_offload.c ++++ b/net/ipv6/udp_offload.c +@@ -42,6 +42,9 @@ static struct sk_buff *udp6_ufo_fragment + const struct ipv6hdr *ipv6h; + struct udphdr *uh; + ++ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) ++ goto out; ++ + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + +--- a/net/sctp/offload.c ++++ b/net/sctp/offload.c +@@ -45,6 +45,9 @@ static struct sk_buff *sctp_gso_segment( + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct sctphdr *sh; + ++ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) ++ goto out; ++ + sh = sctp_hdr(skb); + if (!pskb_may_pull(skb, sizeof(*sh))) + goto out; diff --git a/queue-4.14/ip6_gre-init-dev-mtu-and-dev-hard_header_len-correctly.patch b/queue-4.14/ip6_gre-init-dev-mtu-and-dev-hard_header_len-correctly.patch new file mode 100644 index 00000000000..0ea4d185f13 --- /dev/null +++ b/queue-4.14/ip6_gre-init-dev-mtu-and-dev-hard_header_len-correctly.patch @@ -0,0 +1,94 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Alexey Kodanev +Date: Thu, 18 Jan 2018 20:51:12 +0300 +Subject: ip6_gre: init dev->mtu and dev->hard_header_len correctly + +From: Alexey Kodanev + + +[ Upstream commit 128bb975dc3c25d00de04e503e2fe0a780d04459 ] + +Commit b05229f44228 ("gre6: Cleanup GREv6 transmit path, +call common GRE functions") moved dev->mtu initialization +from ip6gre_tunnel_setup() to ip6gre_tunnel_init(), as a +result, the previously set values, before ndo_init(), are +reset in the following cases: + +* rtnl_create_link() can update dev->mtu from IFLA_MTU + parameter. + +* ip6gre_tnl_link_config() is invoked before ndo_init() in + netlink and ioctl setup, so ndo_init() can reset MTU + adjustments with the lower device MTU as well, dev->mtu + and dev->hard_header_len. + + Not applicable for ip6gretap because it has one more call + to ip6gre_tnl_link_config(tunnel, 1) in ip6gre_tap_init(). + +Fix the first case by updating dev->mtu with 'tb[IFLA_MTU]' +parameter if a user sets it manually on a device creation, +and fix the second one by moving ip6gre_tnl_link_config() +call after register_netdevice(). + +Fixes: b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") +Fixes: db2ec95d1ba4 ("ip6_gre: Fix MTU setting") +Signed-off-by: Alexey Kodanev +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -337,11 +337,12 @@ static struct ip6_tnl *ip6gre_tunnel_loc + + nt->dev = dev; + nt->net = dev_net(dev); +- ip6gre_tnl_link_config(nt, 1); + + if (register_netdevice(dev) < 0) + goto failed_free; + ++ ip6gre_tnl_link_config(nt, 1); ++ + /* Can use a lockless transmit, unless we generate output sequences */ + if (!(nt->parms.o_flags & TUNNEL_SEQ)) + dev->features |= NETIF_F_LLTX; +@@ -1307,7 +1308,6 @@ static void ip6gre_netlink_parms(struct + + static int ip6gre_tap_init(struct net_device *dev) + { +- struct ip6_tnl *tunnel; + int ret; + + ret = ip6gre_tunnel_init_common(dev); +@@ -1316,10 +1316,6 @@ static int ip6gre_tap_init(struct net_de + + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + +- tunnel = netdev_priv(dev); +- +- ip6gre_tnl_link_config(tunnel, 1); +- + return 0; + } + +@@ -1411,12 +1407,16 @@ static int ip6gre_newlink(struct net *sr + + nt->dev = dev; + nt->net = dev_net(dev); +- ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + + err = register_netdevice(dev); + if (err) + goto out; + ++ ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); ++ ++ if (tb[IFLA_MTU]) ++ ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); ++ + dev_hold(dev); + ip6gre_tunnel_link(ign, nt); + diff --git a/queue-4.14/ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch b/queue-4.14/ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch new file mode 100644 index 00000000000..41bed520f80 --- /dev/null +++ b/queue-4.14/ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch @@ -0,0 +1,58 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Jim Westfall +Date: Sun, 14 Jan 2018 04:18:51 -0800 +Subject: ipv4: Make neigh lookup keys for loopback/point-to-point devices be INADDR_ANY + +From: Jim Westfall + + +[ Upstream commit cd9ff4de0107c65d69d02253bb25d6db93c3dbc1 ] + +Map all lookup neigh keys to INADDR_ANY for loopback/point-to-point devices +to avoid making an entry for every remote ip the device needs to talk to. + +This used the be the old behavior but became broken in a263b3093641f +(ipv4: Make neigh lookups directly in output packet path) and later removed +in 0bb4087cbec0 (ipv4: Fix neigh lookup keying over loopback/point-to-point +devices) because it was broken. + +Signed-off-by: Jim Westfall +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/arp.h | 3 +++ + net/ipv4/arp.c | 7 ++++++- + 2 files changed, 9 insertions(+), 1 deletion(-) + +--- a/include/net/arp.h ++++ b/include/net/arp.h +@@ -20,6 +20,9 @@ static inline u32 arp_hashfn(const void + + static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) + { ++ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) ++ key = INADDR_ANY; ++ + return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev); + } + +--- a/net/ipv4/arp.c ++++ b/net/ipv4/arp.c +@@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neig + + static int arp_constructor(struct neighbour *neigh) + { +- __be32 addr = *(__be32 *)neigh->primary_key; ++ __be32 addr; + struct net_device *dev = neigh->dev; + struct in_device *in_dev; + struct neigh_parms *parms; ++ u32 inaddr_any = INADDR_ANY; + ++ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) ++ memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len); ++ ++ addr = *(__be32 *)neigh->primary_key; + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) { diff --git a/queue-4.14/ipv6-fix-getsockopt-for-sockets-with-default-ipv6_autoflowlabel.patch b/queue-4.14/ipv6-fix-getsockopt-for-sockets-with-default-ipv6_autoflowlabel.patch new file mode 100644 index 00000000000..25a5e338b25 --- /dev/null +++ b/queue-4.14/ipv6-fix-getsockopt-for-sockets-with-default-ipv6_autoflowlabel.patch @@ -0,0 +1,62 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Ben Hutchings +Date: Mon, 22 Jan 2018 20:06:42 +0000 +Subject: ipv6: Fix getsockopt() for sockets with default IPV6_AUTOFLOWLABEL + +From: Ben Hutchings + + +[ Upstream commit e9191ffb65d8e159680ce0ad2224e1acbde6985c ] + +Commit 513674b5a2c9 ("net: reevalulate autoflowlabel setting after +sysctl setting") removed the initialisation of +ipv6_pinfo::autoflowlabel and added a second flag to indicate +whether this field or the net namespace default should be used. + +The getsockopt() handling for this case was not updated, so it +currently returns 0 for all sockets for which IPV6_AUTOFLOWLABEL is +not explicitly enabled. Fix it to return the effective value, whether +that has been set at the socket or net namespace level. + +Fixes: 513674b5a2c9 ("net: reevalulate autoflowlabel setting after sysctl ...") +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ipv6.h | 1 + + net/ipv6/ip6_output.c | 2 +- + net/ipv6/ipv6_sockglue.c | 2 +- + 3 files changed, 3 insertions(+), 2 deletions(-) + +--- a/include/net/ipv6.h ++++ b/include/net/ipv6.h +@@ -291,6 +291,7 @@ int ipv6_flowlabel_opt_get(struct sock * + int flags); + int ip6_flowlabel_init(void); + void ip6_flowlabel_cleanup(void); ++bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); + + static inline void fl6_sock_release(struct ip6_flowlabel *fl) + { +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -166,7 +166,7 @@ int ip6_output(struct net *net, struct s + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); + } + +-static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) ++bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) + { + if (!np->autoflowlabel_set) + return ip6_default_np_autolabel(net); +--- a/net/ipv6/ipv6_sockglue.c ++++ b/net/ipv6/ipv6_sockglue.c +@@ -1324,7 +1324,7 @@ static int do_ipv6_getsockopt(struct soc + break; + + case IPV6_AUTOFLOWLABEL: +- val = np->autoflowlabel; ++ val = ip6_autoflowlabel(sock_net(sk), np); + break; + + case IPV6_RECVFRAGSIZE: diff --git a/queue-4.14/ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch b/queue-4.14/ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch new file mode 100644 index 00000000000..d723e10e10e --- /dev/null +++ b/queue-4.14/ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch @@ -0,0 +1,94 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Mike Maloney +Date: Wed, 10 Jan 2018 12:45:10 -0500 +Subject: ipv6: fix udpv6 sendmsg crash caused by too small MTU + +From: Mike Maloney + + +[ Upstream commit 749439bfac6e1a2932c582e2699f91d329658196 ] + +The logic in __ip6_append_data() assumes that the MTU is at least large +enough for the headers. A device's MTU may be adjusted after being +added while sendmsg() is processing data, resulting in +__ip6_append_data() seeing any MTU. For an mtu smaller than the size of +the fragmentation header, the math results in a negative 'maxfraglen', +which causes problems when refragmenting any previous skb in the +skb_write_queue, leaving it possibly malformed. + +Instead sendmsg returns EINVAL when the mtu is calculated to be less +than IPV6_MIN_MTU. + +Found by syzkaller: +kernel BUG at ./include/linux/skbuff.h:2064! +invalid opcode: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 1 PID: 14216 Comm: syz-executor5 Not tainted 4.13.0-rc4+ #2 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +task: ffff8801d0b68580 task.stack: ffff8801ac6b8000 +RIP: 0010:__skb_pull include/linux/skbuff.h:2064 [inline] +RIP: 0010:__ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 +RSP: 0018:ffff8801ac6bf570 EFLAGS: 00010216 +RAX: 0000000000010000 RBX: 0000000000000028 RCX: ffffc90003cce000 +RDX: 00000000000001b8 RSI: ffffffff839df06f RDI: ffff8801d9478ca0 +RBP: ffff8801ac6bf780 R08: ffff8801cc3f1dbc R09: 0000000000000000 +R10: ffff8801ac6bf7a0 R11: 43cb4b7b1948a9e7 R12: ffff8801cc3f1dc8 +R13: ffff8801cc3f1d40 R14: 0000000000001036 R15: dffffc0000000000 +FS: 00007f43d740c700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f7834984000 CR3: 00000001d79b9000 CR4: 00000000001406e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + ip6_finish_skb include/net/ipv6.h:911 [inline] + udp_v6_push_pending_frames+0x255/0x390 net/ipv6/udp.c:1093 + udpv6_sendmsg+0x280d/0x31a0 net/ipv6/udp.c:1363 + inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762 + sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:643 + SYSC_sendto+0x352/0x5a0 net/socket.c:1750 + SyS_sendto+0x40/0x50 net/socket.c:1718 + entry_SYSCALL_64_fastpath+0x1f/0xbe +RIP: 0033:0x4512e9 +RSP: 002b:00007f43d740bc08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c +RAX: ffffffffffffffda RBX: 00000000007180a8 RCX: 00000000004512e9 +RDX: 000000000000002e RSI: 0000000020d08000 RDI: 0000000000000005 +RBP: 0000000000000086 R08: 00000000209c1000 R09: 000000000000001c +R10: 0000000000040800 R11: 0000000000000216 R12: 00000000004b9c69 +R13: 00000000ffffffff R14: 0000000000000005 R15: 00000000202c2000 +Code: 9e 01 fe e9 c5 e8 ff ff e8 7f 9e 01 fe e9 4a ea ff ff 48 89 f7 e8 52 9e 01 fe e9 aa eb ff ff e8 a8 b6 cf fd 0f 0b e8 a1 b6 cf fd <0f> 0b 49 8d 45 78 4d 8d 45 7c 48 89 85 78 fe ff ff 49 8d 85 ba +RIP: __skb_pull include/linux/skbuff.h:2064 [inline] RSP: ffff8801ac6bf570 +RIP: __ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 RSP: ffff8801ac6bf570 + +Reported-by: syzbot +Signed-off-by: Mike Maloney +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1206,14 +1206,16 @@ static int ip6_setup_cork(struct sock *s + v6_cork->tclass = ipc6->tclass; + if (rt->dst.flags & DST_XFRM_TUNNEL) + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? +- rt->dst.dev->mtu : dst_mtu(&rt->dst); ++ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); + else + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? +- rt->dst.dev->mtu : dst_mtu(rt->dst.path); ++ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path); + if (np->frag_size < mtu) { + if (np->frag_size) + mtu = np->frag_size; + } ++ if (mtu < IPV6_MIN_MTU) ++ return -EINVAL; + cork->base.fragsize = mtu; + if (dst_allfrag(rt->dst.path)) + cork->base.flags |= IPCORK_ALLFRAG; diff --git a/queue-4.14/ipv6-ip6_make_skb-needs-to-clear-cork.base.dst.patch b/queue-4.14/ipv6-ip6_make_skb-needs-to-clear-cork.base.dst.patch new file mode 100644 index 00000000000..5301ed0a6c8 --- /dev/null +++ b/queue-4.14/ipv6-ip6_make_skb-needs-to-clear-cork.base.dst.patch @@ -0,0 +1,35 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Eric Dumazet +Date: Thu, 11 Jan 2018 22:31:18 -0800 +Subject: ipv6: ip6_make_skb() needs to clear cork.base.dst + +From: Eric Dumazet + + +[ Upstream commit 95ef498d977bf44ac094778fd448b98af158a3e6 ] + +In my last patch, I missed fact that cork.base.dst was not initialized +in ip6_make_skb() : + +If ip6_setup_cork() returns an error, we might attempt a dst_release() +on some random pointer. + +Fixes: 862c03ee1deb ("ipv6: fix possible mem leaks in ipv6_make_skb()") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1735,6 +1735,7 @@ struct sk_buff *ip6_make_skb(struct sock + cork.base.flags = 0; + cork.base.addr = 0; + cork.base.opt = NULL; ++ cork.base.dst = NULL; + v6_cork.opt = NULL; + err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); + if (err) { diff --git a/queue-4.14/lan78xx-fix-failure-in-usb-full-speed.patch b/queue-4.14/lan78xx-fix-failure-in-usb-full-speed.patch new file mode 100644 index 00000000000..71c0682416a --- /dev/null +++ b/queue-4.14/lan78xx-fix-failure-in-usb-full-speed.patch @@ -0,0 +1,31 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Yuiko Oshino +Date: Mon, 15 Jan 2018 13:24:28 -0500 +Subject: lan78xx: Fix failure in USB Full Speed + +From: Yuiko Oshino + + +[ Upstream commit a5b1379afbfabf91e3a689e82ac619a7157336b3 ] + +Fix initialize the uninitialized tx_qlen to an appropriate value when USB +Full Speed is used. + +Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") +Signed-off-by: Yuiko Oshino +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/lan78xx.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -2396,6 +2396,7 @@ static int lan78xx_reset(struct lan78xx_ + buf = DEFAULT_BURST_CAP_SIZE / FS_USB_PKT_SIZE; + dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE; + dev->rx_qlen = 4; ++ dev->tx_qlen = 4; + } + + ret = lan78xx_write_reg(dev, BURST_CAP, buf); diff --git a/queue-4.14/mlxsw-spectrum_router-don-t-log-an-error-on-missing-neighbor.patch b/queue-4.14/mlxsw-spectrum_router-don-t-log-an-error-on-missing-neighbor.patch new file mode 100644 index 00000000000..0baf16499dd --- /dev/null +++ b/queue-4.14/mlxsw-spectrum_router-don-t-log-an-error-on-missing-neighbor.patch @@ -0,0 +1,60 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Yuval Mintz +Date: Wed, 24 Jan 2018 10:02:09 +0100 +Subject: mlxsw: spectrum_router: Don't log an error on missing neighbor + +From: Yuval Mintz + + +[ Upstream commit 1ecdaea02ca6bfacf2ecda500dc1af51e9780c42 ] + +Driver periodically samples all neighbors configured in device +in order to update the kernel regarding their state. When finding +an entry configured in HW that doesn't show in neigh_lookup() +driver logs an error message. +This introduces a race when removing multiple neighbors - +it's possible that a given entry would still be configured in HW +as its removal is still being processed but is already removed +from the kernel's neighbor tables. + +Simply remove the error message and gracefully accept such events. + +Fixes: c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table") +Fixes: 60f040ca11b9 ("mlxsw: spectrum_router: Periodically dump active IPv6 neighbours") +Signed-off-by: Yuval Mintz +Reviewed-by: Ido Schimmel +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 10 ++-------- + 1 file changed, 2 insertions(+), 8 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +@@ -1531,11 +1531,8 @@ static void mlxsw_sp_router_neigh_ent_ip + dipn = htonl(dip); + dev = mlxsw_sp->router->rifs[rif]->dev; + n = neigh_lookup(&arp_tbl, &dipn, dev); +- if (!n) { +- netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", +- &dip); ++ if (!n) + return; +- } + + netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); + neigh_event_send(n, NULL); +@@ -1562,11 +1559,8 @@ static void mlxsw_sp_router_neigh_ent_ip + + dev = mlxsw_sp->router->rifs[rif]->dev; + n = neigh_lookup(&nd_tbl, &dip, dev); +- if (!n) { +- netdev_err(dev, "Failed to find matching neighbour for IP=%pI6c\n", +- &dip); ++ if (!n) + return; +- } + + netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip); + neigh_event_send(n, NULL); diff --git a/queue-4.14/net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch b/queue-4.14/net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch new file mode 100644 index 00000000000..b733973272d --- /dev/null +++ b/queue-4.14/net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch @@ -0,0 +1,40 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Jim Westfall +Date: Sun, 14 Jan 2018 04:18:50 -0800 +Subject: net: Allow neigh contructor functions ability to modify the primary_key + +From: Jim Westfall + + +[ Upstream commit 096b9854c04df86f03b38a97d40b6506e5730919 ] + +Use n->primary_key instead of pkey to account for the possibility that a neigh +constructor function may have modified the primary_key value. + +Signed-off-by: Jim Westfall +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/neighbour.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/core/neighbour.c ++++ b/net/core/neighbour.c +@@ -532,7 +532,7 @@ struct neighbour *__neigh_create(struct + if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) + nht = neigh_hash_grow(tbl, nht->hash_shift + 1); + +- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); ++ hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); + + if (n->parms->dead) { + rc = ERR_PTR(-EINVAL); +@@ -544,7 +544,7 @@ struct neighbour *__neigh_create(struct + n1 != NULL; + n1 = rcu_dereference_protected(n1->next, + lockdep_is_held(&tbl->lock))) { +- if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { ++ if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { + if (want_ref) + neigh_hold(n1); + rc = n1; diff --git a/queue-4.14/net-ib-mlx5-don-t-disable-local-loopback-multicast-traffic-when-needed.patch b/queue-4.14/net-ib-mlx5-don-t-disable-local-loopback-multicast-traffic-when-needed.patch new file mode 100644 index 00000000000..35707d543a3 --- /dev/null +++ b/queue-4.14/net-ib-mlx5-don-t-disable-local-loopback-multicast-traffic-when-needed.patch @@ -0,0 +1,192 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Eran Ben Elisha +Date: Tue, 9 Jan 2018 11:41:10 +0200 +Subject: {net,ib}/mlx5: Don't disable local loopback multicast traffic when needed + +From: Eran Ben Elisha + + +[ Upstream commit 8978cc921fc7fad3f4d6f91f1da01352aeeeff25 ] + +There are systems platform information management interfaces (such as +HOST2BMC) for which we cannot disable local loopback multicast traffic. + +Separate disable_local_lb_mc and disable_local_lb_uc capability bits so +driver will not disable multicast loopback traffic if not supported. +(It is expected that Firmware will not set disable_local_lb_mc if +HOST2BMC is running for example.) + +Function mlx5_nic_vport_update_local_lb will do best effort to +disable/enable UC/MC loopback traffic and return success only in case it +succeeded to changed all allowed by Firmware. + +Adapt mlx5_ib and mlx5e to support the new cap bits. + +Fixes: 2c43c5a036be ("net/mlx5e: Enable local loopback in loopback selftest") +Fixes: c85023e153e3 ("IB/mlx5: Add raw ethernet local loopback support") +Fixes: bded747bb432 ("net/mlx5: Add raw ethernet local loopback firmware command") +Signed-off-by: Eran Ben Elisha +Cc: kernel-team@fb.com +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + drivers/infiniband/hw/mlx5/main.c | 9 ++++-- + drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 27 ++++++++++++------ + drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 -- + drivers/net/ethernet/mellanox/mlx5/core/vport.c | 22 ++++++++++---- + include/linux/mlx5/mlx5_ifc.h | 5 ++- + 5 files changed, 44 insertions(+), 22 deletions(-) + +--- a/drivers/infiniband/hw/mlx5/main.c ++++ b/drivers/infiniband/hw/mlx5/main.c +@@ -1276,7 +1276,8 @@ static int mlx5_ib_alloc_transport_domai + return err; + + if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || +- !MLX5_CAP_GEN(dev->mdev, disable_local_lb)) ++ (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) && ++ !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) + return err; + + mutex_lock(&dev->lb_mutex); +@@ -1294,7 +1295,8 @@ static void mlx5_ib_dealloc_transport_do + mlx5_core_dealloc_transport_domain(dev->mdev, tdn); + + if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) || +- !MLX5_CAP_GEN(dev->mdev, disable_local_lb)) ++ (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) && ++ !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc))) + return; + + mutex_lock(&dev->lb_mutex); +@@ -4161,7 +4163,8 @@ static void *mlx5_ib_add(struct mlx5_cor + } + + if ((MLX5_CAP_GEN(mdev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && +- MLX5_CAP_GEN(mdev, disable_local_lb)) ++ (MLX5_CAP_GEN(mdev, disable_local_lb_uc) || ++ MLX5_CAP_GEN(mdev, disable_local_lb_mc))) + mutex_init(&dev->lb_mutex); + + dev->ib_active = true; +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +@@ -238,15 +238,19 @@ static int mlx5e_test_loopback_setup(str + int err = 0; + + /* Temporarily enable local_lb */ +- if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) { +- mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb); +- if (!lbtp->local_lb) +- mlx5_nic_vport_update_local_lb(priv->mdev, true); ++ err = mlx5_nic_vport_query_local_lb(priv->mdev, &lbtp->local_lb); ++ if (err) ++ return err; ++ ++ if (!lbtp->local_lb) { ++ err = mlx5_nic_vport_update_local_lb(priv->mdev, true); ++ if (err) ++ return err; + } + + err = mlx5e_refresh_tirs(priv, true); + if (err) +- return err; ++ goto out; + + lbtp->loopback_ok = false; + init_completion(&lbtp->comp); +@@ -256,16 +260,21 @@ static int mlx5e_test_loopback_setup(str + lbtp->pt.dev = priv->netdev; + lbtp->pt.af_packet_priv = lbtp; + dev_add_pack(&lbtp->pt); ++ ++ return 0; ++ ++out: ++ if (!lbtp->local_lb) ++ mlx5_nic_vport_update_local_lb(priv->mdev, false); ++ + return err; + } + + static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv, + struct mlx5e_lbt_priv *lbtp) + { +- if (MLX5_CAP_GEN(priv->mdev, disable_local_lb)) { +- if (!lbtp->local_lb) +- mlx5_nic_vport_update_local_lb(priv->mdev, false); +- } ++ if (!lbtp->local_lb) ++ mlx5_nic_vport_update_local_lb(priv->mdev, false); + + dev_remove_pack(&lbtp->pt); + mlx5e_refresh_tirs(priv, false); +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -577,8 +577,7 @@ static int mlx5_core_set_hca_defaults(st + int ret = 0; + + /* Disable local_lb by default */ +- if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) && +- MLX5_CAP_GEN(dev, disable_local_lb)) ++ if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH) + ret = mlx5_nic_vport_update_local_lb(dev, false); + + return ret; +--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c +@@ -908,23 +908,33 @@ int mlx5_nic_vport_update_local_lb(struc + void *in; + int err; + +- mlx5_core_dbg(mdev, "%s local_lb\n", enable ? "enable" : "disable"); ++ if (!MLX5_CAP_GEN(mdev, disable_local_lb_mc) && ++ !MLX5_CAP_GEN(mdev, disable_local_lb_uc)) ++ return 0; ++ + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, +- field_select.disable_mc_local_lb, 1); +- MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.disable_mc_local_lb, !enable); +- +- MLX5_SET(modify_nic_vport_context_in, in, +- field_select.disable_uc_local_lb, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.disable_uc_local_lb, !enable); + ++ if (MLX5_CAP_GEN(mdev, disable_local_lb_mc)) ++ MLX5_SET(modify_nic_vport_context_in, in, ++ field_select.disable_mc_local_lb, 1); ++ ++ if (MLX5_CAP_GEN(mdev, disable_local_lb_uc)) ++ MLX5_SET(modify_nic_vport_context_in, in, ++ field_select.disable_uc_local_lb, 1); ++ + err = mlx5_modify_nic_vport_context(mdev, in, inlen); + ++ if (!err) ++ mlx5_core_dbg(mdev, "%s local_lb\n", ++ enable ? "enable" : "disable"); ++ + kvfree(in); + return err; + } +--- a/include/linux/mlx5/mlx5_ifc.h ++++ b/include/linux/mlx5/mlx5_ifc.h +@@ -1023,8 +1023,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { + u8 log_max_wq_sz[0x5]; + + u8 nic_vport_change_event[0x1]; +- u8 disable_local_lb[0x1]; +- u8 reserved_at_3e2[0x9]; ++ u8 disable_local_lb_uc[0x1]; ++ u8 disable_local_lb_mc[0x1]; ++ u8 reserved_at_3e3[0x8]; + u8 log_max_vlan_list[0x5]; + u8 reserved_at_3f0[0x3]; + u8 log_max_current_mc_list[0x5]; diff --git a/queue-4.14/net-igmp-fix-source-address-check-for-igmpv3-reports.patch b/queue-4.14/net-igmp-fix-source-address-check-for-igmpv3-reports.patch new file mode 100644 index 00000000000..a97400f5a51 --- /dev/null +++ b/queue-4.14/net-igmp-fix-source-address-check-for-igmpv3-reports.patch @@ -0,0 +1,41 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Felix Fietkau +Date: Fri, 19 Jan 2018 11:50:46 +0100 +Subject: net: igmp: fix source address check for IGMPv3 reports + +From: Felix Fietkau + + +[ Upstream commit ad23b750933ea7bf962678972a286c78a8fa36aa ] + +Commit "net: igmp: Use correct source address on IGMPv3 reports" +introduced a check to validate the source address of locally generated +IGMPv3 packets. +Instead of checking the local interface address directly, it uses +inet_ifa_match(fl4->saddr, ifa), which checks if the address is on the +local subnet (or equal to the point-to-point address if used). + +This breaks for point-to-point interfaces, so check against +ifa->ifa_local directly. + +Cc: Kevin Cernekee +Fixes: a46182b00290 ("net: igmp: Use correct source address on IGMPv3 reports") +Reported-by: Sebastian Gottschall +Signed-off-by: Felix Fietkau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/igmp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -332,7 +332,7 @@ static __be32 igmpv3_get_srcaddr(struct + return htonl(INADDR_ANY); + + for_ifa(in_dev) { +- if (inet_ifa_match(fl4->saddr, ifa)) ++ if (fl4->saddr == ifa->ifa_local) + return fl4->saddr; + } endfor_ifa(in_dev); + diff --git a/queue-4.14/net-ipv4-make-ip-route-get-match-iif-lo-rules-again.patch b/queue-4.14/net-ipv4-make-ip-route-get-match-iif-lo-rules-again.patch new file mode 100644 index 00000000000..8e206d48113 --- /dev/null +++ b/queue-4.14/net-ipv4-make-ip-route-get-match-iif-lo-rules-again.patch @@ -0,0 +1,47 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Lorenzo Colitti +Date: Thu, 11 Jan 2018 18:36:26 +0900 +Subject: net: ipv4: Make "ip route get" match iif lo rules again. + +From: Lorenzo Colitti + + +[ Upstream commit 6503a30440962f1e1ccb8868816b4e18201218d4 ] + +Commit 3765d35ed8b9 ("net: ipv4: Convert inet_rtm_getroute to rcu +versions of route lookup") broke "ip route get" in the presence +of rules that specify iif lo. + +Host-originated traffic always has iif lo, because +ip_route_output_key_hash and ip6_route_output_flags set the flow +iif to LOOPBACK_IFINDEX. Thus, putting "iif lo" in an ip rule is a +convenient way to select only originated traffic and not forwarded +traffic. + +inet_rtm_getroute used to match these rules correctly because +even though it sets the flow iif to 0, it called +ip_route_output_key which overwrites iif with LOOPBACK_IFINDEX. +But now that it calls ip_route_output_key_hash_rcu, the ifindex +will remain 0 and not match the iif lo in the rule. As a result, +"ip route get" will return ENETUNREACH. + +Fixes: 3765d35ed8b9 ("net: ipv4: Convert inet_rtm_getroute to rcu versions of route lookup") +Tested: https://android.googlesource.com/kernel/tests/+/master/net/test/multinetwork_test.py passes again +Signed-off-by: Lorenzo Colitti +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2762,6 +2762,7 @@ static int inet_rtm_getroute(struct sk_b + if (err == 0 && rt->dst.error) + err = -rt->dst.error; + } else { ++ fl4.flowi4_iif = LOOPBACK_IFINDEX; + rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb); + err = 0; + if (IS_ERR(rt)) diff --git a/queue-4.14/net-mlx5-fix-get-vector-affinity-helper-function.patch b/queue-4.14/net-mlx5-fix-get-vector-affinity-helper-function.patch new file mode 100644 index 00000000000..99dccb56a96 --- /dev/null +++ b/queue-4.14/net-mlx5-fix-get-vector-affinity-helper-function.patch @@ -0,0 +1,61 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Saeed Mahameed +Date: Thu, 4 Jan 2018 04:35:51 +0200 +Subject: net/mlx5: Fix get vector affinity helper function + +From: Saeed Mahameed + + +[ Upstream commit 05e0cc84e00c54fb152d1f4b86bc211823a83d0c ] + +mlx5_get_vector_affinity used to call pci_irq_get_affinity and after +reverting the patch that sets the device affinity via PCI_IRQ_AFFINITY +API, calling pci_irq_get_affinity becomes useless and it breaks RDMA +mlx5 users. To fix this, this patch provides an alternative way to +retrieve IRQ vector affinity using legacy IRQ API, following +smp_affinity read procfs implementation. + +Fixes: 231243c82793 ("Revert mlx5: move affinity hints assignments to generic code") +Fixes: a435393acafb ("mlx5: move affinity hints assignments to generic code") +Cc: Sagi Grimberg +Signed-off-by: Saeed Mahameed +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mlx5/driver.h | 19 ++++++++++++++++++- + 1 file changed, 18 insertions(+), 1 deletion(-) + +--- a/include/linux/mlx5/driver.h ++++ b/include/linux/mlx5/driver.h +@@ -36,6 +36,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -1194,7 +1195,23 @@ enum { + static inline const struct cpumask * + mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector) + { +- return pci_irq_get_affinity(dev->pdev, MLX5_EQ_VEC_COMP_BASE + vector); ++ const struct cpumask *mask; ++ struct irq_desc *desc; ++ unsigned int irq; ++ int eqn; ++ int err; ++ ++ err = mlx5_vector2eqn(dev, vector, &eqn, &irq); ++ if (err) ++ return NULL; ++ ++ desc = irq_to_desc(irq); ++#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK ++ mask = irq_data_get_effective_affinity_mask(&desc->irq_data); ++#else ++ mask = desc->irq_common_data.affinity; ++#endif ++ return mask; + } + + #endif /* MLX5_DRIVER_H */ diff --git a/queue-4.14/net-mlx5e-fix-fixpoint-divide-exception-in-mlx5e_am_stats_compare.patch b/queue-4.14/net-mlx5e-fix-fixpoint-divide-exception-in-mlx5e_am_stats_compare.patch new file mode 100644 index 00000000000..418926da108 --- /dev/null +++ b/queue-4.14/net-mlx5e-fix-fixpoint-divide-exception-in-mlx5e_am_stats_compare.patch @@ -0,0 +1,47 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Talat Batheesh +Date: Sun, 21 Jan 2018 05:30:42 +0200 +Subject: net/mlx5e: Fix fixpoint divide exception in mlx5e_am_stats_compare + +From: Talat Batheesh + + +[ Upstream commit e58edaa4863583b54409444f11b4f80dff0af1cd ] + +Helmut reported a bug about division by zero while +running traffic and doing physical cable pull test. + +When the cable unplugged the ppms become zero, so when +dividing the current ppms by the previous ppms in the +next dim iteration there is division by zero. + +This patch prevent this division for both ppms and epms. + +Fixes: c3164d2fc48f ("net/mlx5e: Added BW check for DIM decision mechanism") +Reported-by: Helmut Grauer +Signed-off-by: Talat Batheesh +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c +@@ -197,9 +197,15 @@ static int mlx5e_am_stats_compare(struct + return (curr->bpms > prev->bpms) ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_WORSE; + ++ if (!prev->ppms) ++ return curr->ppms ? MLX5E_AM_STATS_BETTER : ++ MLX5E_AM_STATS_SAME; ++ + if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms)) + return (curr->ppms > prev->ppms) ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_WORSE; ++ if (!prev->epms) ++ return MLX5E_AM_STATS_SAME; + + if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms)) + return (curr->epms < prev->epms) ? MLX5E_AM_STATS_BETTER : diff --git a/queue-4.14/net-qdisc_pkt_len_init-should-be-more-robust.patch b/queue-4.14/net-qdisc_pkt_len_init-should-be-more-robust.patch new file mode 100644 index 00000000000..c9cf76fe0e7 --- /dev/null +++ b/queue-4.14/net-qdisc_pkt_len_init-should-be-more-robust.patch @@ -0,0 +1,61 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Eric Dumazet +Date: Thu, 18 Jan 2018 19:59:19 -0800 +Subject: net: qdisc_pkt_len_init() should be more robust + +From: Eric Dumazet + + +[ Upstream commit 7c68d1a6b4db9012790af7ac0f0fdc0d2083422a ] + +Without proper validation of DODGY packets, we might very well +feed qdisc_pkt_len_init() with invalid GSO packets. + +tcp_hdrlen() might access out-of-bound data, so let's use +skb_header_pointer() and proper checks. + +Whole story is described in commit d0c081b49137 ("flow_dissector: +properly cap thoff field") + +We have the goal of validating DODGY packets earlier in the stack, +so we might very well revert this fix in the future. + +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Cc: Jason Wang +Reported-by: syzbot+9da69ebac7dddd804552@syzkaller.appspotmail.com +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3128,10 +3128,21 @@ static void qdisc_pkt_len_init(struct sk + hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + + /* + transport layer */ +- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) +- hdr_len += tcp_hdrlen(skb); +- else +- hdr_len += sizeof(struct udphdr); ++ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { ++ const struct tcphdr *th; ++ struct tcphdr _tcphdr; ++ ++ th = skb_header_pointer(skb, skb_transport_offset(skb), ++ sizeof(_tcphdr), &_tcphdr); ++ if (likely(th)) ++ hdr_len += __tcp_hdrlen(th); ++ } else { ++ struct udphdr _udphdr; ++ ++ if (skb_header_pointer(skb, skb_transport_offset(skb), ++ sizeof(_udphdr), &_udphdr)) ++ hdr_len += sizeof(struct udphdr); ++ } + + if (shinfo->gso_type & SKB_GSO_DODGY) + gso_segs = DIV_ROUND_UP(skb->len - hdr_len, diff --git a/queue-4.14/net-tcp-close-sock-if-net-namespace-is-exiting.patch b/queue-4.14/net-tcp-close-sock-if-net-namespace-is-exiting.patch new file mode 100644 index 00000000000..e2bdca735e7 --- /dev/null +++ b/queue-4.14/net-tcp-close-sock-if-net-namespace-is-exiting.patch @@ -0,0 +1,120 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Dan Streetman +Date: Thu, 18 Jan 2018 16:14:26 -0500 +Subject: net: tcp: close sock if net namespace is exiting + +From: Dan Streetman + + +[ Upstream commit 4ee806d51176ba7b8ff1efd81f271d7252e03a1d ] + +When a tcp socket is closed, if it detects that its net namespace is +exiting, close immediately and do not wait for FIN sequence. + +For normal sockets, a reference is taken to their net namespace, so it will +never exit while the socket is open. However, kernel sockets do not take a +reference to their net namespace, so it may begin exiting while the kernel +socket is still open. In this case if the kernel socket is a tcp socket, +it will stay open trying to complete its close sequence. The sock's dst(s) +hold a reference to their interface, which are all transferred to the +namespace's loopback interface when the real interfaces are taken down. +When the namespace tries to take down its loopback interface, it hangs +waiting for all references to the loopback interface to release, which +results in messages like: + +unregister_netdevice: waiting for lo to become free. Usage count = 1 + +These messages continue until the socket finally times out and closes. +Since the net namespace cleanup holds the net_mutex while calling its +registered pernet callbacks, any new net namespace initialization is +blocked until the current net namespace finishes exiting. + +After this change, the tcp socket notices the exiting net namespace, and +closes immediately, releasing its dst(s) and their reference to the +loopback interface, which lets the net namespace continue exiting. + +Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1711407 +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=97811 +Signed-off-by: Dan Streetman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/net_namespace.h | 10 ++++++++++ + net/ipv4/tcp.c | 3 +++ + net/ipv4/tcp_timer.c | 15 +++++++++++++++ + 3 files changed, 28 insertions(+) + +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -223,6 +223,11 @@ int net_eq(const struct net *net1, const + return net1 == net2; + } + ++static inline int check_net(const struct net *net) ++{ ++ return atomic_read(&net->count) != 0; ++} ++ + void net_drop_ns(void *); + + #else +@@ -246,6 +251,11 @@ int net_eq(const struct net *net1, const + { + return 1; + } ++ ++static inline int check_net(const struct net *net) ++{ ++ return 1; ++} + + #define net_drop_ns NULL + #endif +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2273,6 +2273,9 @@ adjudge_to_death: + tcp_send_active_reset(sk, GFP_ATOMIC); + __NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPABORTONMEMORY); ++ } else if (!check_net(sock_net(sk))) { ++ /* Not possible to send reset; just close */ ++ tcp_set_state(sk, TCP_CLOSE); + } + } + +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -50,11 +50,19 @@ static void tcp_write_err(struct sock *s + * to prevent DoS attacks. It is called when a retransmission timeout + * or zero probe timeout occurs on orphaned socket. + * ++ * Also close if our net namespace is exiting; in that case there is no ++ * hope of ever communicating again since all netns interfaces are already ++ * down (or about to be down), and we need to release our dst references, ++ * which have been moved to the netns loopback interface, so the namespace ++ * can finish exiting. This condition is only possible if we are a kernel ++ * socket, as those do not hold references to the namespace. ++ * + * Criteria is still not confirmed experimentally and may change. + * We kill the socket, if: + * 1. If number of orphaned sockets exceeds an administratively configured + * limit. + * 2. If we have strong memory pressure. ++ * 3. If our net namespace is exiting. + */ + static int tcp_out_of_resources(struct sock *sk, bool do_reset) + { +@@ -83,6 +91,13 @@ static int tcp_out_of_resources(struct s + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); + return 1; + } ++ ++ if (!check_net(sock_net(sk))) { ++ /* Not possible to send reset; just close */ ++ tcp_done(sk); ++ return 1; ++ } ++ + return 0; + } + diff --git a/queue-4.14/net-tls-fix-inverted-error-codes-to-avoid-endless-loop.patch b/queue-4.14/net-tls-fix-inverted-error-codes-to-avoid-endless-loop.patch new file mode 100644 index 00000000000..fd5ffc71fe6 --- /dev/null +++ b/queue-4.14/net-tls-fix-inverted-error-codes-to-avoid-endless-loop.patch @@ -0,0 +1,55 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: "r.hering@avm.de" +Date: Fri, 12 Jan 2018 15:42:06 +0100 +Subject: net/tls: Fix inverted error codes to avoid endless loop + +From: "r.hering@avm.de" + + +[ Upstream commit 30be8f8dba1bd2aff73e8447d59228471233a3d4 ] + +sendfile() calls can hang endless with using Kernel TLS if a socket error occurs. +Socket error codes must be inverted by Kernel TLS before returning because +they are stored with positive sign. If returned non-inverted they are +interpreted as number of bytes sent, causing endless looping of the +splice mechanic behind sendfile(). + +Signed-off-by: Robert Hering +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tls.h | 2 +- + net/tls/tls_sw.c | 4 ++-- + 2 files changed, 3 insertions(+), 3 deletions(-) + +--- a/include/net/tls.h ++++ b/include/net/tls.h +@@ -168,7 +168,7 @@ static inline bool tls_is_pending_open_r + + static inline void tls_err_abort(struct sock *sk) + { +- sk->sk_err = -EBADMSG; ++ sk->sk_err = EBADMSG; + sk->sk_error_report(sk); + } + +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -407,7 +407,7 @@ int tls_sw_sendmsg(struct sock *sk, stru + + while (msg_data_left(msg)) { + if (sk->sk_err) { +- ret = sk->sk_err; ++ ret = -sk->sk_err; + goto send_end; + } + +@@ -560,7 +560,7 @@ int tls_sw_sendpage(struct sock *sk, str + size_t copy, required_size; + + if (sk->sk_err) { +- ret = sk->sk_err; ++ ret = -sk->sk_err; + goto sendpage_end; + } + diff --git a/queue-4.14/net-tls-only-attach-to-sockets-in-established-state.patch b/queue-4.14/net-tls-only-attach-to-sockets-in-established-state.patch new file mode 100644 index 00000000000..c4d92f58fe5 --- /dev/null +++ b/queue-4.14/net-tls-only-attach-to-sockets-in-established-state.patch @@ -0,0 +1,46 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Ilya Lesokhin +Date: Tue, 16 Jan 2018 15:31:52 +0200 +Subject: net/tls: Only attach to sockets in ESTABLISHED state + +From: Ilya Lesokhin + + +[ Upstream commit d91c3e17f75f218022140dee18cf515292184a8f ] + +Calling accept on a TCP socket with a TLS ulp attached results +in two sockets that share the same ulp context. +The ulp context is freed while a socket is destroyed, so +after one of the sockets is released, the second second will +trigger a use after free when it tries to access the ulp context +attached to it. +We restrict the TLS ulp to sockets in ESTABLISHED state +to prevent the scenario above. + +Fixes: 3c4d7559159b ("tls: kernel TLS support") +Reported-by: syzbot+904e7cd6c5c741609228@syzkaller.appspotmail.com +Signed-off-by: Ilya Lesokhin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tls/tls_main.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/net/tls/tls_main.c ++++ b/net/tls/tls_main.c +@@ -444,6 +444,15 @@ static int tls_init(struct sock *sk) + struct tls_context *ctx; + int rc = 0; + ++ /* The TLS ulp is currently supported only for TCP sockets ++ * in ESTABLISHED state. ++ * Supporting sockets in LISTEN state will require us ++ * to modify the accept implementation to clone rather then ++ * share the ulp context. ++ */ ++ if (sk->sk_state != TCP_ESTABLISHED) ++ return -ENOTSUPP; ++ + /* allocate tls context */ + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { diff --git a/queue-4.14/net-vrf-add-support-for-sends-to-local-broadcast-address.patch b/queue-4.14/net-vrf-add-support-for-sends-to-local-broadcast-address.patch new file mode 100644 index 00000000000..d97e798ce19 --- /dev/null +++ b/queue-4.14/net-vrf-add-support-for-sends-to-local-broadcast-address.patch @@ -0,0 +1,43 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: David Ahern +Date: Wed, 24 Jan 2018 19:37:37 -0800 +Subject: net: vrf: Add support for sends to local broadcast address + +From: David Ahern + + +[ Upstream commit 1e19c4d689dc1e95bafd23ef68fbc0c6b9e05180 ] + +Sukumar reported that sends to the local broadcast address +(255.255.255.255) are broken. Check for the address in vrf driver +and do not redirect to the VRF device - similar to multicast +packets. + +With this change sockets can use SO_BINDTODEVICE to specify an +egress interface and receive responses. Note: the egress interface +can not be a VRF device but needs to be the enslaved device. + +https://bugzilla.kernel.org/show_bug.cgi?id=198521 + +Reported-by: Sukumar Gopalakrishnan +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vrf.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -674,8 +674,9 @@ static struct sk_buff *vrf_ip_out(struct + struct sock *sk, + struct sk_buff *skb) + { +- /* don't divert multicast */ +- if (ipv4_is_multicast(ip_hdr(skb)->daddr)) ++ /* don't divert multicast or local broadcast */ ++ if (ipv4_is_multicast(ip_hdr(skb)->daddr) || ++ ipv4_is_lbcast(ip_hdr(skb)->daddr)) + return skb; + + if (qdisc_tx_is_default(vrf_dev)) diff --git a/queue-4.14/netlink-extack-needs-to-be-reset-each-time-through-loop.patch b/queue-4.14/netlink-extack-needs-to-be-reset-each-time-through-loop.patch new file mode 100644 index 00000000000..ab0fa330a88 --- /dev/null +++ b/queue-4.14/netlink-extack-needs-to-be-reset-each-time-through-loop.patch @@ -0,0 +1,43 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: David Ahern +Date: Wed, 10 Jan 2018 13:00:39 -0800 +Subject: netlink: extack needs to be reset each time through loop + +From: David Ahern + + +[ Upstream commit cbbdf8433a5f117b1a2119ea30fc651b61ef7570 ] + +syzbot triggered the WARN_ON in netlink_ack testing the bad_attr value. +The problem is that netlink_rcv_skb loops over the skb repeatedly invoking +the callback and without resetting the extack leaving potentially stale +data. Initializing each time through avoids the WARN_ON. + +Fixes: 2d4bc93368f5a ("netlink: extended ACK reporting") +Reported-by: syzbot+315fa6766d0f7c359327@syzkaller.appspotmail.com +Signed-off-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -2393,7 +2393,7 @@ int netlink_rcv_skb(struct sk_buff *skb, + struct nlmsghdr *, + struct netlink_ext_ack *)) + { +- struct netlink_ext_ack extack = {}; ++ struct netlink_ext_ack extack; + struct nlmsghdr *nlh; + int err; + +@@ -2414,6 +2414,7 @@ int netlink_rcv_skb(struct sk_buff *skb, + if (nlh->nlmsg_type < NLMSG_MIN_TYPE) + goto ack; + ++ memset(&extack, 0, sizeof(extack)); + err = cb(skb, nlh, &extack); + if (err == -EINTR) + goto skip; diff --git a/queue-4.14/netlink-reset-extack-earlier-in-netlink_rcv_skb.patch b/queue-4.14/netlink-reset-extack-earlier-in-netlink_rcv_skb.patch new file mode 100644 index 00000000000..6ae462aefd4 --- /dev/null +++ b/queue-4.14/netlink-reset-extack-earlier-in-netlink_rcv_skb.patch @@ -0,0 +1,42 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Xin Long +Date: Thu, 18 Jan 2018 14:48:03 +0800 +Subject: netlink: reset extack earlier in netlink_rcv_skb + +From: Xin Long + + +[ Upstream commit cd443f1e91ca600a092e780e8250cd6a2954b763 ] + +Move up the extack reset/initialization in netlink_rcv_skb, so that +those 'goto ack' will not skip it. Otherwise, later on netlink_ack +may use the uninitialized extack and cause kernel crash. + +Fixes: cbbdf8433a5f ("netlink: extack needs to be reset each time through loop") +Reported-by: syzbot+03bee3680a37466775e7@syzkaller.appspotmail.com +Signed-off-by: Xin Long +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -2400,6 +2400,7 @@ int netlink_rcv_skb(struct sk_buff *skb, + while (skb->len >= nlmsg_total_size(0)) { + int msglen; + ++ memset(&extack, 0, sizeof(extack)); + nlh = nlmsg_hdr(skb); + err = 0; + +@@ -2414,7 +2415,6 @@ int netlink_rcv_skb(struct sk_buff *skb, + if (nlh->nlmsg_type < NLMSG_MIN_TYPE) + goto ack; + +- memset(&extack, 0, sizeof(extack)); + err = cb(skb, nlh, &extack); + if (err == -EINTR) + goto skip; diff --git a/queue-4.14/nfp-use-the-correct-index-for-link-speed-table.patch b/queue-4.14/nfp-use-the-correct-index-for-link-speed-table.patch new file mode 100644 index 00000000000..3dcd3e10a5b --- /dev/null +++ b/queue-4.14/nfp-use-the-correct-index-for-link-speed-table.patch @@ -0,0 +1,32 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Jakub Kicinski +Date: Mon, 15 Jan 2018 11:47:53 -0800 +Subject: nfp: use the correct index for link speed table + +From: Jakub Kicinski + + +[ Upstream commit 0d9c9f0f40ca262b67fc06a702b85f3976f5e1a1 ] + +sts variable is holding link speed as well as state. We should +be using ls to index into ls_to_ethtool. + +Fixes: 265aeb511bd5 ("nfp: add support for .get_link_ksettings()") +Signed-off-by: Jakub Kicinski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c ++++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +@@ -306,7 +306,7 @@ nfp_net_get_link_ksettings(struct net_de + ls >= ARRAY_SIZE(ls_to_ethtool)) + return 0; + +- cmd->base.speed = ls_to_ethtool[sts]; ++ cmd->base.speed = ls_to_ethtool[ls]; + cmd->base.duplex = DUPLEX_FULL; + + return 0; diff --git a/queue-4.14/ppp-unlock-all_ppp_mutex-before-registering-device.patch b/queue-4.14/ppp-unlock-all_ppp_mutex-before-registering-device.patch new file mode 100644 index 00000000000..e0e0cf22e0e --- /dev/null +++ b/queue-4.14/ppp-unlock-all_ppp_mutex-before-registering-device.patch @@ -0,0 +1,58 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Guillaume Nault +Date: Wed, 10 Jan 2018 16:24:45 +0100 +Subject: ppp: unlock all_ppp_mutex before registering device + +From: Guillaume Nault + + +[ Upstream commit 0171c41835591e9aa2e384b703ef9a6ae367c610 ] + +ppp_dev_uninit(), which is the .ndo_uninit() handler of PPP devices, +needs to lock pn->all_ppp_mutex. Therefore we mustn't call +register_netdevice() with pn->all_ppp_mutex already locked, or we'd +deadlock in case register_netdevice() fails and calls .ndo_uninit(). + +Fortunately, we can unlock pn->all_ppp_mutex before calling +register_netdevice(). This lock protects pn->units_idr, which isn't +used in the device registration process. + +However, keeping pn->all_ppp_mutex locked during device registration +did ensure that no device in transient state would be published in +pn->units_idr. In practice, unlocking it before calling +register_netdevice() doesn't change this property: ppp_unit_register() +is called with 'ppp_mutex' locked and all searches done in +pn->units_idr hold this lock too. + +Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion") +Reported-and-tested-by: syzbot+367889b9c9e279219175@syzkaller.appspotmail.com +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/ppp_generic.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -1003,17 +1003,18 @@ static int ppp_unit_register(struct ppp + if (!ifname_is_set) + snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ppp->file.index); + ++ mutex_unlock(&pn->all_ppp_mutex); ++ + ret = register_netdevice(ppp->dev); + if (ret < 0) + goto err_unit; + + atomic_inc(&ppp_unit_count); + +- mutex_unlock(&pn->all_ppp_mutex); +- + return 0; + + err_unit: ++ mutex_lock(&pn->all_ppp_mutex); + unit_put(&pn->units_idr, ppp->file.index); + err: + mutex_unlock(&pn->all_ppp_mutex); diff --git a/queue-4.14/pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch b/queue-4.14/pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch new file mode 100644 index 00000000000..4964e13e363 --- /dev/null +++ b/queue-4.14/pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch @@ -0,0 +1,126 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Guillaume Nault +Date: Mon, 22 Jan 2018 18:06:37 +0100 +Subject: pppoe: take ->needed_headroom of lower device into account on xmit + +From: Guillaume Nault + + +[ Upstream commit 02612bb05e51df8489db5e94d0cf8d1c81f87b0c ] + +In pppoe_sendmsg(), reserving dev->hard_header_len bytes of headroom +was probably fine before the introduction of ->needed_headroom in +commit f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom"). + +But now, virtual devices typically advertise the size of their overhead +in dev->needed_headroom, so we must also take it into account in +skb_reserve(). +Allocation size of skb is also updated to take dev->needed_tailroom +into account and replace the arbitrary 32 bytes with the real size of +a PPPoE header. + +This issue was discovered by syzbot, who connected a pppoe socket to a +gre device which had dev->header_ops->create == ipgre_header and +dev->hard_header_len == 0. Therefore, PPPoE didn't reserve any +headroom, and dev_hard_header() crashed when ipgre_header() tried to +prepend its header to skb->data. + +skbuff: skb_under_panic: text:000000001d390b3a len:31 put:24 +head:00000000d8ed776f data:000000008150e823 tail:0x7 end:0xc0 dev:gre0 +------------[ cut here ]------------ +kernel BUG at net/core/skbuff.c:104! +invalid opcode: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 1 PID: 3670 Comm: syzkaller801466 Not tainted +4.15.0-rc7-next-20180115+ #97 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS +Google 01/01/2011 +RIP: 0010:skb_panic+0x162/0x1f0 net/core/skbuff.c:100 +RSP: 0018:ffff8801d9bd7840 EFLAGS: 00010282 +RAX: 0000000000000083 RBX: ffff8801d4f083c0 RCX: 0000000000000000 +RDX: 0000000000000083 RSI: 1ffff1003b37ae92 RDI: ffffed003b37aefc +RBP: ffff8801d9bd78a8 R08: 1ffff1003b37ae8a R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff86200de0 +R13: ffffffff84a981ad R14: 0000000000000018 R15: ffff8801d2d34180 +FS: 00000000019c4880(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00000000208bc000 CR3: 00000001d9111001 CR4: 00000000001606e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + skb_under_panic net/core/skbuff.c:114 [inline] + skb_push+0xce/0xf0 net/core/skbuff.c:1714 + ipgre_header+0x6d/0x4e0 net/ipv4/ip_gre.c:879 + dev_hard_header include/linux/netdevice.h:2723 [inline] + pppoe_sendmsg+0x58e/0x8b0 drivers/net/ppp/pppoe.c:890 + sock_sendmsg_nosec net/socket.c:630 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:640 + sock_write_iter+0x31a/0x5d0 net/socket.c:909 + call_write_iter include/linux/fs.h:1775 [inline] + do_iter_readv_writev+0x525/0x7f0 fs/read_write.c:653 + do_iter_write+0x154/0x540 fs/read_write.c:932 + vfs_writev+0x18a/0x340 fs/read_write.c:977 + do_writev+0xfc/0x2a0 fs/read_write.c:1012 + SYSC_writev fs/read_write.c:1085 [inline] + SyS_writev+0x27/0x30 fs/read_write.c:1082 + entry_SYSCALL_64_fastpath+0x29/0xa0 + +Admittedly PPPoE shouldn't be allowed to run on non Ethernet-like +interfaces, but reserving space for ->needed_headroom is a more +fundamental issue that needs to be addressed first. + +Same problem exists for __pppoe_xmit(), which also needs to take +dev->needed_headroom into account in skb_cow_head(). + +Fixes: f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom") +Reported-by: syzbot+ed0838d0fa4c4f2b528e20286e6dc63effc7c14d@syzkaller.appspotmail.com +Signed-off-by: Guillaume Nault +Reviewed-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/pppoe.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/net/ppp/pppoe.c ++++ b/drivers/net/ppp/pppoe.c +@@ -842,6 +842,7 @@ static int pppoe_sendmsg(struct socket * + struct pppoe_hdr *ph; + struct net_device *dev; + char *start; ++ int hlen; + + lock_sock(sk); + if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { +@@ -860,16 +861,16 @@ static int pppoe_sendmsg(struct socket * + if (total_len > (dev->mtu + dev->hard_header_len)) + goto end; + +- +- skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32, +- 0, GFP_KERNEL); ++ hlen = LL_RESERVED_SPACE(dev); ++ skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + ++ dev->needed_tailroom, 0, GFP_KERNEL); + if (!skb) { + error = -ENOMEM; + goto end; + } + + /* Reserve space for headers. */ +- skb_reserve(skb, dev->hard_header_len); ++ skb_reserve(skb, hlen); + skb_reset_network_header(skb); + + skb->dev = dev; +@@ -930,7 +931,7 @@ static int __pppoe_xmit(struct sock *sk, + /* Copy the data if there is no space for the header or if it's + * read-only. + */ +- if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len)) ++ if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) + goto abort; + + __skb_push(skb, sizeof(*ph)); diff --git a/queue-4.14/r8169-fix-memory-corruption-on-retrieval-of-hardware-statistics.patch b/queue-4.14/r8169-fix-memory-corruption-on-retrieval-of-hardware-statistics.patch new file mode 100644 index 00000000000..958272ea8ff --- /dev/null +++ b/queue-4.14/r8169-fix-memory-corruption-on-retrieval-of-hardware-statistics.patch @@ -0,0 +1,47 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Francois Romieu +Date: Fri, 26 Jan 2018 01:53:26 +0100 +Subject: r8169: fix memory corruption on retrieval of hardware statistics. + +From: Francois Romieu + + +[ Upstream commit a78e93661c5fd30b9e1dee464b2f62f966883ef7 ] + +Hardware statistics retrieval hurts in tight invocation loops. + +Avoid extraneous write and enforce strict ordering of writes targeted to +the tally counters dump area address registers. + +Signed-off-by: Francois Romieu +Tested-by: Oliver Freyermuth +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169.c | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -2239,19 +2239,14 @@ static bool rtl8169_do_counters(struct n + void __iomem *ioaddr = tp->mmio_addr; + dma_addr_t paddr = tp->counters_phys_addr; + u32 cmd; +- bool ret; + + RTL_W32(CounterAddrHigh, (u64)paddr >> 32); ++ RTL_R32(CounterAddrHigh); + cmd = (u64)paddr & DMA_BIT_MASK(32); + RTL_W32(CounterAddrLow, cmd); + RTL_W32(CounterAddrLow, cmd | counter_cmd); + +- ret = rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); +- +- RTL_W32(CounterAddrLow, 0); +- RTL_W32(CounterAddrHigh, 0); +- +- return ret; ++ return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); + } + + static bool rtl8169_reset_counters(struct net_device *dev) diff --git a/queue-4.14/sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch b/queue-4.14/sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch new file mode 100644 index 00000000000..73e081bcc17 --- /dev/null +++ b/queue-4.14/sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch @@ -0,0 +1,56 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Xin Long +Date: Mon, 15 Jan 2018 17:02:00 +0800 +Subject: sctp: do not allow the v4 socket to bind a v4mapped v6 address + +From: Xin Long + + +[ Upstream commit c5006b8aa74599ce19104b31d322d2ea9ff887cc ] + +The check in sctp_sockaddr_af is not robust enough to forbid binding a +v4mapped v6 addr on a v4 socket. + +The worse thing is that v4 socket's bind_verify would not convert this +v4mapped v6 addr to a v4 addr. syzbot even reported a crash as the v4 +socket bound a v6 addr. + +This patch is to fix it by doing the common sa.sa_family check first, +then AF_INET check for v4mapped v6 addrs. + +Fixes: 7dab83de50c7 ("sctp: Support ipv6only AF_INET6 sockets.") +Reported-by: syzbot+7b7b518b1228d2743963@syzkaller.appspotmail.com +Acked-by: Neil Horman +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -334,16 +334,14 @@ static struct sctp_af *sctp_sockaddr_af( + if (len < sizeof (struct sockaddr)) + return NULL; + ++ if (!opt->pf->af_supported(addr->sa.sa_family, opt)) ++ return NULL; ++ + /* V4 mapped address are really of AF_INET family */ + if (addr->sa.sa_family == AF_INET6 && +- ipv6_addr_v4mapped(&addr->v6.sin6_addr)) { +- if (!opt->pf->af_supported(AF_INET, opt)) +- return NULL; +- } else { +- /* Does this PF support this AF? */ +- if (!opt->pf->af_supported(addr->sa.sa_family, opt)) +- return NULL; +- } ++ ipv6_addr_v4mapped(&addr->v6.sin6_addr) && ++ !opt->pf->af_supported(AF_INET, opt)) ++ return NULL; + + /* If we get this far, af is valid. */ + af = sctp_get_af_specific(addr->sa.sa_family); diff --git a/queue-4.14/sctp-reinit-stream-if-stream-outcnt-has-been-change-by-sinit-in-sendmsg.patch b/queue-4.14/sctp-reinit-stream-if-stream-outcnt-has-been-change-by-sinit-in-sendmsg.patch new file mode 100644 index 00000000000..c3f2efb8144 --- /dev/null +++ b/queue-4.14/sctp-reinit-stream-if-stream-outcnt-has-been-change-by-sinit-in-sendmsg.patch @@ -0,0 +1,50 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Xin Long +Date: Mon, 15 Jan 2018 17:01:19 +0800 +Subject: sctp: reinit stream if stream outcnt has been change by sinit in sendmsg + +From: Xin Long + + +[ Upstream commit 625637bf4afa45204bd87e4218645182a919485a ] + +After introducing sctp_stream structure, sctp uses stream->outcnt as the +out stream nums instead of c.sinit_num_ostreams. + +However when users use sinit in cmsg, it only updates c.sinit_num_ostreams +in sctp_sendmsg. At that moment, stream->outcnt is still using previous +value. If it's value is not updated, the sinit_num_ostreams of sinit could +not really work. + +This patch is to fix it by updating stream->outcnt and reiniting stream +if stream outcnt has been change by sinit in sendmsg. + +Fixes: a83863174a61 ("sctp: prepare asoc stream for stream reconf") +Signed-off-by: Xin Long +Acked-by: Neil Horman +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -1880,8 +1880,14 @@ static int sctp_sendmsg(struct sock *sk, + */ + if (sinit) { + if (sinit->sinit_num_ostreams) { +- asoc->c.sinit_num_ostreams = +- sinit->sinit_num_ostreams; ++ __u16 outcnt = sinit->sinit_num_ostreams; ++ ++ asoc->c.sinit_num_ostreams = outcnt; ++ /* outcnt has been changed, so re-init stream */ ++ err = sctp_stream_init(&asoc->stream, outcnt, 0, ++ GFP_KERNEL); ++ if (err) ++ goto out_free; + } + if (sinit->sinit_max_instreams) { + asoc->c.sinit_max_instreams = diff --git a/queue-4.14/sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch b/queue-4.14/sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch new file mode 100644 index 00000000000..5339583d304 --- /dev/null +++ b/queue-4.14/sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch @@ -0,0 +1,87 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Xin Long +Date: Mon, 15 Jan 2018 17:01:36 +0800 +Subject: sctp: return error if the asoc has been peeled off in sctp_wait_for_sndbuf + +From: Xin Long + + +[ Upstream commit a0ff660058b88d12625a783ce9e5c1371c87951f ] + +After commit cea0cc80a677 ("sctp: use the right sk after waking up from +wait_buf sleep"), it may change to lock another sk if the asoc has been +peeled off in sctp_wait_for_sndbuf. + +However, the asoc's new sk could be already closed elsewhere, as it's in +the sendmsg context of the old sk that can't avoid the new sk's closing. +If the sk's last one refcnt is held by this asoc, later on after putting +this asoc, the new sk will be freed, while under it's own lock. + +This patch is to revert that commit, but fix the old issue by returning +error under the old sk's lock. + +Fixes: cea0cc80a677 ("sctp: use the right sk after waking up from wait_buf sleep") +Reported-by: syzbot+ac6ea7baa4432811eb50@syzkaller.appspotmail.com +Signed-off-by: Xin Long +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -84,7 +84,7 @@ + static int sctp_writeable(struct sock *sk); + static void sctp_wfree(struct sk_buff *skb); + static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, +- size_t msg_len, struct sock **orig_sk); ++ size_t msg_len); + static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); + static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); + static int sctp_wait_for_accept(struct sock *sk, long timeo); +@@ -1961,7 +1961,7 @@ static int sctp_sendmsg(struct sock *sk, + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + if (!sctp_wspace(asoc)) { + /* sk can be changed by peel off when waiting for buf. */ +- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk); ++ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); + if (err) { + if (err == -ESRCH) { + /* asoc is already dead. */ +@@ -7825,12 +7825,12 @@ void sctp_sock_rfree(struct sk_buff *skb + + /* Helper function to wait for space in the sndbuf. */ + static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, +- size_t msg_len, struct sock **orig_sk) ++ size_t msg_len) + { + struct sock *sk = asoc->base.sk; +- int err = 0; + long current_timeo = *timeo_p; + DEFINE_WAIT(wait); ++ int err = 0; + + pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, + *timeo_p, msg_len); +@@ -7859,17 +7859,13 @@ static int sctp_wait_for_sndbuf(struct s + release_sock(sk); + current_timeo = schedule_timeout(current_timeo); + lock_sock(sk); +- if (sk != asoc->base.sk) { +- release_sock(sk); +- sk = asoc->base.sk; +- lock_sock(sk); +- } ++ if (sk != asoc->base.sk) ++ goto do_error; + + *timeo_p = current_timeo; + } + + out: +- *orig_sk = sk; + finish_wait(&asoc->wait, &wait); + + /* Release the association's refcnt. */ diff --git a/queue-4.14/series b/queue-4.14/series index e3dae0fa7b2..194cce229b5 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -20,3 +20,40 @@ arm-net-bpf-fix-register-saving.patch arm-net-bpf-fix-ldx-instructions.patch arm-net-bpf-clarify-tail_call-index.patch drm-vc4-fix-null-pointer-dereference-in-vc4_save_hang_state.patch +net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch +ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch +dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch +ipv6-fix-getsockopt-for-sockets-with-default-ipv6_autoflowlabel.patch +ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch +ipv6-ip6_make_skb-needs-to-clear-cork.base.dst.patch +lan78xx-fix-failure-in-usb-full-speed.patch +net-igmp-fix-source-address-check-for-igmpv3-reports.patch +net-qdisc_pkt_len_init-should-be-more-robust.patch +net-tcp-close-sock-if-net-namespace-is-exiting.patch +net-tls-fix-inverted-error-codes-to-avoid-endless-loop.patch +net-vrf-add-support-for-sends-to-local-broadcast-address.patch +pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch +r8169-fix-memory-corruption-on-retrieval-of-hardware-statistics.patch +sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch +sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch +tipc-fix-a-memory-leak-in-tipc_nl_node_get_link.patch +net-ib-mlx5-don-t-disable-local-loopback-multicast-traffic-when-needed.patch +net-mlx5-fix-get-vector-affinity-helper-function.patch +ppp-unlock-all_ppp_mutex-before-registering-device.patch +be2net-restore-properly-promisc-mode-after-queues-reconfiguration.patch +ip6_gre-init-dev-mtu-and-dev-hard_header_len-correctly.patch +gso-validate-gso_type-in-gso-handlers.patch +mlxsw-spectrum_router-don-t-log-an-error-on-missing-neighbor.patch +tun-fix-a-memory-leak-for-tfile-tx_array.patch +flow_dissector-properly-cap-thoff-field.patch +sctp-reinit-stream-if-stream-outcnt-has-been-change-by-sinit-in-sendmsg.patch +netlink-extack-needs-to-be-reset-each-time-through-loop.patch +net-mlx5e-fix-fixpoint-divide-exception-in-mlx5e_am_stats_compare.patch +nfp-use-the-correct-index-for-link-speed-table.patch +netlink-reset-extack-earlier-in-netlink_rcv_skb.patch +net-tls-only-attach-to-sockets-in-established-state.patch +tls-fix-sw_ctx-leak.patch +tls-return-ebusy-if-crypto_info-is-already-set.patch +tls-reset-crypto_info-when-do_tls_setsockopt_tx-fails.patch +net-ipv4-make-ip-route-get-match-iif-lo-rules-again.patch +vmxnet3-repair-memory-leak.patch diff --git a/queue-4.14/tipc-fix-a-memory-leak-in-tipc_nl_node_get_link.patch b/queue-4.14/tipc-fix-a-memory-leak-in-tipc_nl_node_get_link.patch new file mode 100644 index 00000000000..c213ec26442 --- /dev/null +++ b/queue-4.14/tipc-fix-a-memory-leak-in-tipc_nl_node_get_link.patch @@ -0,0 +1,81 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Cong Wang +Date: Wed, 10 Jan 2018 12:50:25 -0800 +Subject: tipc: fix a memory leak in tipc_nl_node_get_link() + +From: Cong Wang + + +[ Upstream commit 59b36613e85fb16ebf9feaf914570879cd5c2a21 ] + +When tipc_node_find_by_name() fails, the nlmsg is not +freed. + +While on it, switch to a goto label to properly +free it. + +Fixes: be9c086715c ("tipc: narrow down exposure of struct tipc_node") +Reported-by: Dmitry Vyukov +Cc: Jon Maloy +Cc: Ying Xue +Signed-off-by: Cong Wang +Acked-by: Ying Xue +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/node.c | 26 ++++++++++++++------------ + 1 file changed, 14 insertions(+), 12 deletions(-) + +--- a/net/tipc/node.c ++++ b/net/tipc/node.c +@@ -1848,36 +1848,38 @@ int tipc_nl_node_get_link(struct sk_buff + + if (strcmp(name, tipc_bclink_name) == 0) { + err = tipc_nl_add_bc_link(net, &msg); +- if (err) { +- nlmsg_free(msg.skb); +- return err; +- } ++ if (err) ++ goto err_free; + } else { + int bearer_id; + struct tipc_node *node; + struct tipc_link *link; + + node = tipc_node_find_by_name(net, name, &bearer_id); +- if (!node) +- return -EINVAL; ++ if (!node) { ++ err = -EINVAL; ++ goto err_free; ++ } + + tipc_node_read_lock(node); + link = node->links[bearer_id].link; + if (!link) { + tipc_node_read_unlock(node); +- nlmsg_free(msg.skb); +- return -EINVAL; ++ err = -EINVAL; ++ goto err_free; + } + + err = __tipc_nl_add_link(net, &msg, link, 0); + tipc_node_read_unlock(node); +- if (err) { +- nlmsg_free(msg.skb); +- return err; +- } ++ if (err) ++ goto err_free; + } + + return genlmsg_reply(msg.skb, info); ++ ++err_free: ++ nlmsg_free(msg.skb); ++ return err; + } + + int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) diff --git a/queue-4.14/tls-fix-sw_ctx-leak.patch b/queue-4.14/tls-fix-sw_ctx-leak.patch new file mode 100644 index 00000000000..714d107f45e --- /dev/null +++ b/queue-4.14/tls-fix-sw_ctx-leak.patch @@ -0,0 +1,66 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Sabrina Dubroca +Date: Tue, 16 Jan 2018 16:04:26 +0100 +Subject: tls: fix sw_ctx leak + +From: Sabrina Dubroca + + +[ Upstream commit cf6d43ef66f416282121f436ce1bee9a25199d52 ] + +During setsockopt(SOL_TCP, TLS_TX), if initialization of the software +context fails in tls_set_sw_offload(), we leak sw_ctx. We also don't +reassign ctx->priv_ctx to NULL, so we can't even do another attempt to +set it up on the same socket, as it will fail with -EEXIST. + +Fixes: 3c4d7559159b ('tls: kernel TLS support') +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tls/tls_sw.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/net/tls/tls_sw.c ++++ b/net/tls/tls_sw.c +@@ -697,18 +697,17 @@ int tls_set_sw_offload(struct sock *sk, + } + default: + rc = -EINVAL; +- goto out; ++ goto free_priv; + } + + ctx->prepend_size = TLS_HEADER_SIZE + nonce_size; + ctx->tag_size = tag_size; + ctx->overhead_size = ctx->prepend_size + ctx->tag_size; + ctx->iv_size = iv_size; +- ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, +- GFP_KERNEL); ++ ctx->iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, GFP_KERNEL); + if (!ctx->iv) { + rc = -ENOMEM; +- goto out; ++ goto free_priv; + } + memcpy(ctx->iv, gcm_128_info->salt, TLS_CIPHER_AES_GCM_128_SALT_SIZE); + memcpy(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); +@@ -756,7 +755,7 @@ int tls_set_sw_offload(struct sock *sk, + + rc = crypto_aead_setauthsize(sw_ctx->aead_send, ctx->tag_size); + if (!rc) +- goto out; ++ return 0; + + free_aead: + crypto_free_aead(sw_ctx->aead_send); +@@ -767,6 +766,9 @@ free_rec_seq: + free_iv: + kfree(ctx->iv); + ctx->iv = NULL; ++free_priv: ++ kfree(ctx->priv_ctx); ++ ctx->priv_ctx = NULL; + out: + return rc; + } diff --git a/queue-4.14/tls-reset-crypto_info-when-do_tls_setsockopt_tx-fails.patch b/queue-4.14/tls-reset-crypto_info-when-do_tls_setsockopt_tx-fails.patch new file mode 100644 index 00000000000..171b45a1ddb --- /dev/null +++ b/queue-4.14/tls-reset-crypto_info-when-do_tls_setsockopt_tx-fails.patch @@ -0,0 +1,60 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Sabrina Dubroca +Date: Tue, 16 Jan 2018 16:04:28 +0100 +Subject: tls: reset crypto_info when do_tls_setsockopt_tx fails + +From: Sabrina Dubroca + + +[ Upstream commit 6db959c82eb039a151d95a0f8b7dea643657327a ] + +The current code copies directly from userspace to ctx->crypto_send, but +doesn't always reinitialize it to 0 on failure. This causes any +subsequent attempt to use this setsockopt to fail because of the +TLS_CRYPTO_INFO_READY check, eventhough crypto_info is not actually +ready. + +This should result in a correctly set up socket after the 3rd call, but +currently it does not: + + size_t s = sizeof(struct tls12_crypto_info_aes_gcm_128); + struct tls12_crypto_info_aes_gcm_128 crypto_good = { + .info.version = TLS_1_2_VERSION, + .info.cipher_type = TLS_CIPHER_AES_GCM_128, + }; + + struct tls12_crypto_info_aes_gcm_128 crypto_bad_type = crypto_good; + crypto_bad_type.info.cipher_type = 42; + + setsockopt(sock, SOL_TLS, TLS_TX, &crypto_bad_type, s); + setsockopt(sock, SOL_TLS, TLS_TX, &crypto_good, s - 1); + setsockopt(sock, SOL_TLS, TLS_TX, &crypto_good, s); + +Fixes: 3c4d7559159b ("tls: kernel TLS support") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tls/tls_main.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/tls/tls_main.c ++++ b/net/tls/tls_main.c +@@ -373,7 +373,7 @@ static int do_tls_setsockopt_tx(struct s + case TLS_CIPHER_AES_GCM_128: { + if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { + rc = -EINVAL; +- goto out; ++ goto err_crypto_info; + } + rc = copy_from_user( + crypto_info, +@@ -388,7 +388,7 @@ static int do_tls_setsockopt_tx(struct s + } + default: + rc = -EINVAL; +- goto out; ++ goto err_crypto_info; + } + + ctx->sk_write_space = sk->sk_write_space; diff --git a/queue-4.14/tls-return-ebusy-if-crypto_info-is-already-set.patch b/queue-4.14/tls-return-ebusy-if-crypto_info-is-already-set.patch new file mode 100644 index 00000000000..f206fce51c6 --- /dev/null +++ b/queue-4.14/tls-return-ebusy-if-crypto_info-is-already-set.patch @@ -0,0 +1,35 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Sabrina Dubroca +Date: Tue, 16 Jan 2018 16:04:27 +0100 +Subject: tls: return -EBUSY if crypto_info is already set + +From: Sabrina Dubroca + + +[ Upstream commit 877d17c79b66466942a836403773276e34fe3614 ] + +do_tls_setsockopt_tx returns 0 without doing anything when crypto_info +is already set. Silent failure is confusing for users. + +Fixes: 3c4d7559159b ("tls: kernel TLS support") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tls/tls_main.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/net/tls/tls_main.c ++++ b/net/tls/tls_main.c +@@ -364,8 +364,10 @@ static int do_tls_setsockopt_tx(struct s + crypto_info = &ctx->crypto_send; + + /* Currently we don't support set crypto info more than one time */ +- if (TLS_CRYPTO_INFO_READY(crypto_info)) ++ if (TLS_CRYPTO_INFO_READY(crypto_info)) { ++ rc = -EBUSY; + goto out; ++ } + + switch (tmp_crypto_info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: { diff --git a/queue-4.14/tun-fix-a-memory-leak-for-tfile-tx_array.patch b/queue-4.14/tun-fix-a-memory-leak-for-tfile-tx_array.patch new file mode 100644 index 00000000000..5bd140adaef --- /dev/null +++ b/queue-4.14/tun-fix-a-memory-leak-for-tfile-tx_array.patch @@ -0,0 +1,81 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Cong Wang +Date: Mon, 15 Jan 2018 11:37:29 -0800 +Subject: tun: fix a memory leak for tfile->tx_array + +From: Cong Wang + + +[ Upstream commit 4df0bfc79904b7169dc77dcce44598b1545721f9 ] + +tfile->tun could be detached before we close the tun fd, +via tun_detach_all(), so it should not be used to check for +tfile->tx_array. + +As Jason suggested, we probably have to clean it up +unconditionally both in __tun_deatch() and tun_detach_all(), +but this requires to check if it is initialized or not. +Currently skb_array_cleanup() doesn't have such a check, +so I check it in the caller and introduce a helper function, +it is a bit ugly but we can always improve it in net-next. + +Reported-by: Dmitry Vyukov +Fixes: 1576d9860599 ("tun: switch to use skb array for tx") +Cc: Jason Wang +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -534,6 +534,14 @@ static void tun_queue_purge(struct tun_f + skb_queue_purge(&tfile->sk.sk_error_queue); + } + ++static void tun_cleanup_tx_array(struct tun_file *tfile) ++{ ++ if (tfile->tx_array.ring.queue) { ++ skb_array_cleanup(&tfile->tx_array); ++ memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); ++ } ++} ++ + static void __tun_detach(struct tun_file *tfile, bool clean) + { + struct tun_file *ntfile; +@@ -575,8 +583,7 @@ static void __tun_detach(struct tun_file + tun->dev->reg_state == NETREG_REGISTERED) + unregister_netdevice(tun->dev); + } +- if (tun) +- skb_array_cleanup(&tfile->tx_array); ++ tun_cleanup_tx_array(tfile); + sock_put(&tfile->sk); + } + } +@@ -616,11 +623,13 @@ static void tun_detach_all(struct net_de + /* Drop read queue */ + tun_queue_purge(tfile); + sock_put(&tfile->sk); ++ tun_cleanup_tx_array(tfile); + } + list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { + tun_enable_queue(tfile); + tun_queue_purge(tfile); + sock_put(&tfile->sk); ++ tun_cleanup_tx_array(tfile); + } + BUG_ON(tun->numdisabled != 0); + +@@ -2624,6 +2633,8 @@ static int tun_chr_open(struct inode *in + + sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); + ++ memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); ++ + return 0; + } + diff --git a/queue-4.14/vmxnet3-repair-memory-leak.patch b/queue-4.14/vmxnet3-repair-memory-leak.patch new file mode 100644 index 00000000000..56d06410ad6 --- /dev/null +++ b/queue-4.14/vmxnet3-repair-memory-leak.patch @@ -0,0 +1,58 @@ +From foo@baz Sun Jan 28 17:35:08 CET 2018 +From: Neil Horman +Date: Mon, 22 Jan 2018 16:06:37 -0500 +Subject: vmxnet3: repair memory leak + +From: Neil Horman + + +[ Upstream commit 848b159835ddef99cc4193083f7e786c3992f580 ] + +with the introduction of commit +b0eb57cb97e7837ebb746404c2c58c6f536f23fa, it appears that rq->buf_info +is improperly handled. While it is heap allocated when an rx queue is +setup, and freed when torn down, an old line of code in +vmxnet3_rq_destroy was not properly removed, leading to rq->buf_info[0] +being set to NULL prior to its being freed, causing a memory leak, which +eventually exhausts the system on repeated create/destroy operations +(for example, when the mtu of a vmxnet3 interface is changed +frequently. + +Fix is pretty straight forward, just move the NULL set to after the +free. + +Tested by myself with successful results + +Applies to net, and should likely be queued for stable, please + +Signed-off-by: Neil Horman +Reported-By: boyang@redhat.com +CC: boyang@redhat.com +CC: Shrikrishna Khare +CC: "VMware, Inc." +CC: David S. Miller +Acked-by: Shrikrishna Khare +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -1616,7 +1616,6 @@ static void vmxnet3_rq_destroy(struct vm + rq->rx_ring[i].basePA); + rq->rx_ring[i].base = NULL; + } +- rq->buf_info[i] = NULL; + } + + if (rq->data_ring.base) { +@@ -1638,6 +1637,7 @@ static void vmxnet3_rq_destroy(struct vm + (rq->rx_ring[0].size + rq->rx_ring[1].size); + dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0], + rq->buf_info_pa); ++ rq->buf_info[0] = rq->buf_info[1] = NULL; + } + } +