From: Greg Kroah-Hartman Date: Sun, 28 Jan 2018 16:39:35 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v4.4.114~16 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=266946ba02be987174181f881e1fba9408f464f0;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: be2net-restore-properly-promisc-mode-after-queues-reconfiguration.patch dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch flow_dissector-properly-cap-thoff-field.patch gso-validate-gso_type-in-gso-handlers.patch ip6_gre-init-dev-mtu-and-dev-hard_header_len-correctly.patch ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch ipv6-fix-getsockopt-for-sockets-with-default-ipv6_autoflowlabel.patch ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch ipv6-ip6_make_skb-needs-to-clear-cork.base.dst.patch lan78xx-fix-failure-in-usb-full-speed.patch mlxsw-spectrum_router-don-t-log-an-error-on-missing-neighbor.patch net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch net-igmp-fix-source-address-check-for-igmpv3-reports.patch net-qdisc_pkt_len_init-should-be-more-robust.patch net-tcp-close-sock-if-net-namespace-is-exiting.patch ppp-unlock-all_ppp_mutex-before-registering-device.patch pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch r8169-fix-memory-corruption-on-retrieval-of-hardware-statistics.patch sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch tipc-fix-a-memory-leak-in-tipc_nl_node_get_link.patch tun-fix-a-memory-leak-for-tfile-tx_array.patch vmxnet3-repair-memory-leak.patch --- diff --git a/queue-4.9/be2net-restore-properly-promisc-mode-after-queues-reconfiguration.patch b/queue-4.9/be2net-restore-properly-promisc-mode-after-queues-reconfiguration.patch new file mode 100644 index 00000000000..3b5b0346d15 --- /dev/null +++ b/queue-4.9/be2net-restore-properly-promisc-mode-after-queues-reconfiguration.patch @@ -0,0 +1,51 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Ivan Vecera +Date: Fri, 19 Jan 2018 20:23:50 +0100 +Subject: be2net: restore properly promisc mode after queues reconfiguration + +From: Ivan Vecera + + +[ Upstream commit 52acf06451930eb4cefabd5ecea56e2d46c32f76 ] + +The commit 622190669403 ("be2net: Request RSS capability of Rx interface +depending on number of Rx rings") modified be_update_queues() so the +IFACE (HW representation of the netdevice) is destroyed and then +re-created. This causes a regression because potential promiscuous mode +is not restored properly during be_open() because the driver thinks +that the HW has promiscuous mode already enabled. + +Note that Lancer is not affected by this bug because RX-filter flags are +disabled during be_close() for this chipset. + +Cc: Sathya Perla +Cc: Ajit Khaparde +Cc: Sriharsha Basavapatna +Cc: Somnath Kotur + +Fixes: 622190669403 ("be2net: Request RSS capability of Rx interface depending on number of Rx rings") +Signed-off-by: Ivan Vecera +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/emulex/benet/be_main.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +--- a/drivers/net/ethernet/emulex/benet/be_main.c ++++ b/drivers/net/ethernet/emulex/benet/be_main.c +@@ -4733,6 +4733,15 @@ int be_update_queues(struct be_adapter * + + be_schedule_worker(adapter); + ++ /* ++ * The IF was destroyed and re-created. We need to clear ++ * all promiscuous flags valid for the destroyed IF. ++ * Without this promisc mode is not restored during ++ * be_open() because the driver thinks that it is ++ * already enabled in HW. ++ */ ++ adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS; ++ + if (netif_running(netdev)) + status = be_open(netdev); + diff --git a/queue-4.9/dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch b/queue-4.9/dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch new file mode 100644 index 00000000000..89ac82ce9e3 --- /dev/null +++ b/queue-4.9/dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch @@ -0,0 +1,44 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Alexey Kodanev +Date: Fri, 26 Jan 2018 15:14:16 +0300 +Subject: dccp: don't restart ccid2_hc_tx_rto_expire() if sk in closed state + +From: Alexey Kodanev + + +[ Upstream commit dd5684ecae3bd8e44b644f50e2c12c7e57fdfef5 ] + +ccid2_hc_tx_rto_expire() timer callback always restarts the timer +again and can run indefinitely (unless it is stopped outside), and after +commit 120e9dabaf55 ("dccp: defer ccid_hc_tx_delete() at dismantle time"), +which moved ccid_hc_tx_delete() (also includes sk_stop_timer()) from +dccp_destroy_sock() to sk_destruct(), this started to happen quite often. +The timer prevents releasing the socket, as a result, sk_destruct() won't +be called. + +Found with LTP/dccp_ipsec tests running on the bonding device, +which later couldn't be unloaded after the tests were completed: + + unregister_netdevice: waiting for bond0 to become free. Usage count = 148 + +Fixes: 2a91aa396739 ("[DCCP] CCID2: Initial CCID2 (TCP-Like) implementation") +Signed-off-by: Alexey Kodanev +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dccp/ccids/ccid2.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/dccp/ccids/ccid2.c ++++ b/net/dccp/ccids/ccid2.c +@@ -140,6 +140,9 @@ static void ccid2_hc_tx_rto_expire(unsig + + ccid2_pr_debug("RTO_EXPIRE\n"); + ++ if (sk->sk_state == DCCP_CLOSED) ++ goto out; ++ + /* back-off timer */ + hc->tx_rto <<= 1; + if (hc->tx_rto > DCCP_RTO_MAX) diff --git a/queue-4.9/flow_dissector-properly-cap-thoff-field.patch b/queue-4.9/flow_dissector-properly-cap-thoff-field.patch new file mode 100644 index 00000000000..43538ca9517 --- /dev/null +++ b/queue-4.9/flow_dissector-properly-cap-thoff-field.patch @@ -0,0 +1,95 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Eric Dumazet +Date: Wed, 17 Jan 2018 14:21:13 -0800 +Subject: flow_dissector: properly cap thoff field + +From: Eric Dumazet + + +[ Upstream commit d0c081b49137cd3200f2023c0875723be66e7ce5 ] + +syzbot reported yet another crash [1] that is caused by +insufficient validation of DODGY packets. + +Two bugs are happening here to trigger the crash. + +1) Flow dissection leaves with incorrect thoff field. + +2) skb_probe_transport_header() sets transport header to this invalid +thoff, even if pointing after skb valid data. + +3) qdisc_pkt_len_init() reads out-of-bound data because it +trusts tcp_hdrlen(skb) + +Possible fixes : + +- Full flow dissector validation before injecting bad DODGY packets in +the stack. + This approach was attempted here : https://patchwork.ozlabs.org/patch/ +861874/ + +- Have more robust functions in the core. + This might be needed anyway for stable versions. + +This patch fixes the flow dissection issue. + +[1] +CPU: 1 PID: 3144 Comm: syzkaller271204 Not tainted 4.15.0-rc4-mm1+ #49 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:17 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:53 + print_address_description+0x73/0x250 mm/kasan/report.c:256 + kasan_report_error mm/kasan/report.c:355 [inline] + kasan_report+0x23b/0x360 mm/kasan/report.c:413 + __asan_report_load2_noabort+0x14/0x20 mm/kasan/report.c:432 + __tcp_hdrlen include/linux/tcp.h:35 [inline] + tcp_hdrlen include/linux/tcp.h:40 [inline] + qdisc_pkt_len_init net/core/dev.c:3160 [inline] + __dev_queue_xmit+0x20d3/0x2200 net/core/dev.c:3465 + dev_queue_xmit+0x17/0x20 net/core/dev.c:3554 + packet_snd net/packet/af_packet.c:2943 [inline] + packet_sendmsg+0x3ad5/0x60a0 net/packet/af_packet.c:2968 + sock_sendmsg_nosec net/socket.c:628 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:638 + sock_write_iter+0x31a/0x5d0 net/socket.c:907 + call_write_iter include/linux/fs.h:1776 [inline] + new_sync_write fs/read_write.c:469 [inline] + __vfs_write+0x684/0x970 fs/read_write.c:482 + vfs_write+0x189/0x510 fs/read_write.c:544 + SYSC_write fs/read_write.c:589 [inline] + SyS_write+0xef/0x220 fs/read_write.c:581 + entry_SYSCALL_64_fastpath+0x1f/0x96 + +Fixes: 34fad54c2537 ("net: __skb_flow_dissect() must cap its return value") +Fixes: a6e544b0a88b ("flow_dissector: Jump to exit code in __skb_flow_dissect") +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Reported-by: syzbot +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/flow_dissector.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/core/flow_dissector.c ++++ b/net/core/flow_dissector.c +@@ -550,8 +550,8 @@ ip_proto_again: + out_good: + ret = true; + +- key_control->thoff = (u16)nhoff; + out: ++ key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); + key_basic->n_proto = proto; + key_basic->ip_proto = ip_proto; + +@@ -559,7 +559,6 @@ out: + + out_bad: + ret = false; +- key_control->thoff = min_t(u16, nhoff, skb ? skb->len : hlen); + goto out; + } + EXPORT_SYMBOL(__skb_flow_dissect); diff --git a/queue-4.9/gso-validate-gso_type-in-gso-handlers.patch b/queue-4.9/gso-validate-gso_type-in-gso-handlers.patch new file mode 100644 index 00000000000..5fb6c7c6918 --- /dev/null +++ b/queue-4.9/gso-validate-gso_type-in-gso-handlers.patch @@ -0,0 +1,95 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Willem de Bruijn +Date: Fri, 19 Jan 2018 09:29:18 -0500 +Subject: gso: validate gso_type in GSO handlers + +From: Willem de Bruijn + + +[ Upstream commit 121d57af308d0cf943f08f4738d24d3966c38cd9 ] + +Validate gso_type during segmentation as SKB_GSO_DODGY sources +may pass packets where the gso_type does not match the contents. + +Syzkaller was able to enter the SCTP gso handler with a packet of +gso_type SKB_GSO_TCPV4. + +On entry of transport layer gso handlers, verify that the gso_type +matches the transport protocol. + +Fixes: 90017accff61 ("sctp: Add GSO support") +Link: http://lkml.kernel.org/r/<001a1137452496ffc305617e5fe0@google.com> +Reported-by: syzbot+fee64147a25aecd48055@syzkaller.appspotmail.com +Signed-off-by: Willem de Bruijn +Acked-by: Jason Wang +Reviewed-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_offload.c | 3 +++ + net/ipv4/udp_offload.c | 3 +++ + net/ipv6/tcpv6_offload.c | 3 +++ + net/ipv6/udp_offload.c | 3 +++ + net/sctp/offload.c | 3 +++ + 5 files changed, 15 insertions(+) + +--- a/net/ipv4/tcp_offload.c ++++ b/net/ipv4/tcp_offload.c +@@ -32,6 +32,9 @@ static void tcp_gso_tstamp(struct sk_buf + static struct sk_buff *tcp4_gso_segment(struct sk_buff *skb, + netdev_features_t features) + { ++ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)) ++ return ERR_PTR(-EINVAL); ++ + if (!pskb_may_pull(skb, sizeof(struct tcphdr))) + return ERR_PTR(-EINVAL); + +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -205,6 +205,9 @@ static struct sk_buff *udp4_ufo_fragment + goto out; + } + ++ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) ++ goto out; ++ + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + +--- a/net/ipv6/tcpv6_offload.c ++++ b/net/ipv6/tcpv6_offload.c +@@ -46,6 +46,9 @@ static struct sk_buff *tcp6_gso_segment( + { + struct tcphdr *th; + ++ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)) ++ return ERR_PTR(-EINVAL); ++ + if (!pskb_may_pull(skb, sizeof(*th))) + return ERR_PTR(-EINVAL); + +--- a/net/ipv6/udp_offload.c ++++ b/net/ipv6/udp_offload.c +@@ -55,6 +55,9 @@ static struct sk_buff *udp6_ufo_fragment + const struct ipv6hdr *ipv6h; + struct udphdr *uh; + ++ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP)) ++ goto out; ++ + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto out; + +--- a/net/sctp/offload.c ++++ b/net/sctp/offload.c +@@ -44,6 +44,9 @@ static struct sk_buff *sctp_gso_segment( + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct sctphdr *sh; + ++ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_SCTP)) ++ goto out; ++ + sh = sctp_hdr(skb); + if (!pskb_may_pull(skb, sizeof(*sh))) + goto out; diff --git a/queue-4.9/ip6_gre-init-dev-mtu-and-dev-hard_header_len-correctly.patch b/queue-4.9/ip6_gre-init-dev-mtu-and-dev-hard_header_len-correctly.patch new file mode 100644 index 00000000000..ddf493240fc --- /dev/null +++ b/queue-4.9/ip6_gre-init-dev-mtu-and-dev-hard_header_len-correctly.patch @@ -0,0 +1,96 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Alexey Kodanev +Date: Thu, 18 Jan 2018 20:51:12 +0300 +Subject: ip6_gre: init dev->mtu and dev->hard_header_len correctly + +From: Alexey Kodanev + + +[ Upstream commit 128bb975dc3c25d00de04e503e2fe0a780d04459 ] + +Commit b05229f44228 ("gre6: Cleanup GREv6 transmit path, +call common GRE functions") moved dev->mtu initialization +from ip6gre_tunnel_setup() to ip6gre_tunnel_init(), as a +result, the previously set values, before ndo_init(), are +reset in the following cases: + +* rtnl_create_link() can update dev->mtu from IFLA_MTU + parameter. + +* ip6gre_tnl_link_config() is invoked before ndo_init() in + netlink and ioctl setup, so ndo_init() can reset MTU + adjustments with the lower device MTU as well, dev->mtu + and dev->hard_header_len. + + Not applicable for ip6gretap because it has one more call + to ip6gre_tnl_link_config(tunnel, 1) in ip6gre_tap_init(). + +Fix the first case by updating dev->mtu with 'tb[IFLA_MTU]' +parameter if a user sets it manually on a device creation, +and fix the second one by moving ip6gre_tnl_link_config() +call after register_netdevice(). + +Fixes: b05229f44228 ("gre6: Cleanup GREv6 transmit path, call common GRE functions") +Fixes: db2ec95d1ba4 ("ip6_gre: Fix MTU setting") +Signed-off-by: Alexey Kodanev +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -337,11 +337,12 @@ static struct ip6_tnl *ip6gre_tunnel_loc + + nt->dev = dev; + nt->net = dev_net(dev); +- ip6gre_tnl_link_config(nt, 1); + + if (register_netdevice(dev) < 0) + goto failed_free; + ++ ip6gre_tnl_link_config(nt, 1); ++ + /* Can use a lockless transmit, unless we generate output sequences */ + if (!(nt->parms.o_flags & TUNNEL_SEQ)) + dev->features |= NETIF_F_LLTX; +@@ -1263,7 +1264,6 @@ static void ip6gre_netlink_parms(struct + + static int ip6gre_tap_init(struct net_device *dev) + { +- struct ip6_tnl *tunnel; + int ret; + + ret = ip6gre_tunnel_init_common(dev); +@@ -1272,10 +1272,6 @@ static int ip6gre_tap_init(struct net_de + + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + +- tunnel = netdev_priv(dev); +- +- ip6gre_tnl_link_config(tunnel, 1); +- + return 0; + } + +@@ -1370,7 +1366,6 @@ static int ip6gre_newlink(struct net *sr + + nt->dev = dev; + nt->net = dev_net(dev); +- ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + + dev->features |= GRE6_FEATURES; + dev->hw_features |= GRE6_FEATURES; +@@ -1396,6 +1391,11 @@ static int ip6gre_newlink(struct net *sr + if (err) + goto out; + ++ ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); ++ ++ if (tb[IFLA_MTU]) ++ ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); ++ + dev_hold(dev); + ip6gre_tunnel_link(ign, nt); + diff --git a/queue-4.9/ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch b/queue-4.9/ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch new file mode 100644 index 00000000000..a8cb73c9a62 --- /dev/null +++ b/queue-4.9/ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch @@ -0,0 +1,58 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Jim Westfall +Date: Sun, 14 Jan 2018 04:18:51 -0800 +Subject: ipv4: Make neigh lookup keys for loopback/point-to-point devices be INADDR_ANY + +From: Jim Westfall + + +[ Upstream commit cd9ff4de0107c65d69d02253bb25d6db93c3dbc1 ] + +Map all lookup neigh keys to INADDR_ANY for loopback/point-to-point devices +to avoid making an entry for every remote ip the device needs to talk to. + +This used the be the old behavior but became broken in a263b3093641f +(ipv4: Make neigh lookups directly in output packet path) and later removed +in 0bb4087cbec0 (ipv4: Fix neigh lookup keying over loopback/point-to-point +devices) because it was broken. + +Signed-off-by: Jim Westfall +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/arp.h | 3 +++ + net/ipv4/arp.c | 7 ++++++- + 2 files changed, 9 insertions(+), 1 deletion(-) + +--- a/include/net/arp.h ++++ b/include/net/arp.h +@@ -19,6 +19,9 @@ static inline u32 arp_hashfn(const void + + static inline struct neighbour *__ipv4_neigh_lookup_noref(struct net_device *dev, u32 key) + { ++ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) ++ key = INADDR_ANY; ++ + return ___neigh_lookup_noref(&arp_tbl, neigh_key_eq32, arp_hashfn, &key, dev); + } + +--- a/net/ipv4/arp.c ++++ b/net/ipv4/arp.c +@@ -223,11 +223,16 @@ static bool arp_key_eq(const struct neig + + static int arp_constructor(struct neighbour *neigh) + { +- __be32 addr = *(__be32 *)neigh->primary_key; ++ __be32 addr; + struct net_device *dev = neigh->dev; + struct in_device *in_dev; + struct neigh_parms *parms; ++ u32 inaddr_any = INADDR_ANY; + ++ if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) ++ memcpy(neigh->primary_key, &inaddr_any, arp_tbl.key_len); ++ ++ addr = *(__be32 *)neigh->primary_key; + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dev); + if (!in_dev) { diff --git a/queue-4.9/ipv6-fix-getsockopt-for-sockets-with-default-ipv6_autoflowlabel.patch b/queue-4.9/ipv6-fix-getsockopt-for-sockets-with-default-ipv6_autoflowlabel.patch new file mode 100644 index 00000000000..fba9ad0df2c --- /dev/null +++ b/queue-4.9/ipv6-fix-getsockopt-for-sockets-with-default-ipv6_autoflowlabel.patch @@ -0,0 +1,62 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Ben Hutchings +Date: Mon, 22 Jan 2018 20:06:42 +0000 +Subject: ipv6: Fix getsockopt() for sockets with default IPV6_AUTOFLOWLABEL + +From: Ben Hutchings + + +[ Upstream commit e9191ffb65d8e159680ce0ad2224e1acbde6985c ] + +Commit 513674b5a2c9 ("net: reevalulate autoflowlabel setting after +sysctl setting") removed the initialisation of +ipv6_pinfo::autoflowlabel and added a second flag to indicate +whether this field or the net namespace default should be used. + +The getsockopt() handling for this case was not updated, so it +currently returns 0 for all sockets for which IPV6_AUTOFLOWLABEL is +not explicitly enabled. Fix it to return the effective value, whether +that has been set at the socket or net namespace level. + +Fixes: 513674b5a2c9 ("net: reevalulate autoflowlabel setting after sysctl ...") +Signed-off-by: Ben Hutchings +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/ipv6.h | 1 + + net/ipv6/ip6_output.c | 2 +- + net/ipv6/ipv6_sockglue.c | 2 +- + 3 files changed, 3 insertions(+), 2 deletions(-) + +--- a/include/net/ipv6.h ++++ b/include/net/ipv6.h +@@ -290,6 +290,7 @@ int ipv6_flowlabel_opt_get(struct sock * + int flags); + int ip6_flowlabel_init(void); + void ip6_flowlabel_cleanup(void); ++bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np); + + static inline void fl6_sock_release(struct ip6_flowlabel *fl) + { +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -156,7 +156,7 @@ int ip6_output(struct net *net, struct s + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); + } + +-static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) ++bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np) + { + if (!np->autoflowlabel_set) + return ip6_default_np_autolabel(net); +--- a/net/ipv6/ipv6_sockglue.c ++++ b/net/ipv6/ipv6_sockglue.c +@@ -1316,7 +1316,7 @@ static int do_ipv6_getsockopt(struct soc + break; + + case IPV6_AUTOFLOWLABEL: +- val = np->autoflowlabel; ++ val = ip6_autoflowlabel(sock_net(sk), np); + break; + + default: diff --git a/queue-4.9/ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch b/queue-4.9/ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch new file mode 100644 index 00000000000..99de64a6cc8 --- /dev/null +++ b/queue-4.9/ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch @@ -0,0 +1,94 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Mike Maloney +Date: Wed, 10 Jan 2018 12:45:10 -0500 +Subject: ipv6: fix udpv6 sendmsg crash caused by too small MTU + +From: Mike Maloney + + +[ Upstream commit 749439bfac6e1a2932c582e2699f91d329658196 ] + +The logic in __ip6_append_data() assumes that the MTU is at least large +enough for the headers. A device's MTU may be adjusted after being +added while sendmsg() is processing data, resulting in +__ip6_append_data() seeing any MTU. For an mtu smaller than the size of +the fragmentation header, the math results in a negative 'maxfraglen', +which causes problems when refragmenting any previous skb in the +skb_write_queue, leaving it possibly malformed. + +Instead sendmsg returns EINVAL when the mtu is calculated to be less +than IPV6_MIN_MTU. + +Found by syzkaller: +kernel BUG at ./include/linux/skbuff.h:2064! +invalid opcode: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 1 PID: 14216 Comm: syz-executor5 Not tainted 4.13.0-rc4+ #2 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +task: ffff8801d0b68580 task.stack: ffff8801ac6b8000 +RIP: 0010:__skb_pull include/linux/skbuff.h:2064 [inline] +RIP: 0010:__ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 +RSP: 0018:ffff8801ac6bf570 EFLAGS: 00010216 +RAX: 0000000000010000 RBX: 0000000000000028 RCX: ffffc90003cce000 +RDX: 00000000000001b8 RSI: ffffffff839df06f RDI: ffff8801d9478ca0 +RBP: ffff8801ac6bf780 R08: ffff8801cc3f1dbc R09: 0000000000000000 +R10: ffff8801ac6bf7a0 R11: 43cb4b7b1948a9e7 R12: ffff8801cc3f1dc8 +R13: ffff8801cc3f1d40 R14: 0000000000001036 R15: dffffc0000000000 +FS: 00007f43d740c700(0000) GS:ffff8801dc100000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00007f7834984000 CR3: 00000001d79b9000 CR4: 00000000001406e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + ip6_finish_skb include/net/ipv6.h:911 [inline] + udp_v6_push_pending_frames+0x255/0x390 net/ipv6/udp.c:1093 + udpv6_sendmsg+0x280d/0x31a0 net/ipv6/udp.c:1363 + inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762 + sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:643 + SYSC_sendto+0x352/0x5a0 net/socket.c:1750 + SyS_sendto+0x40/0x50 net/socket.c:1718 + entry_SYSCALL_64_fastpath+0x1f/0xbe +RIP: 0033:0x4512e9 +RSP: 002b:00007f43d740bc08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c +RAX: ffffffffffffffda RBX: 00000000007180a8 RCX: 00000000004512e9 +RDX: 000000000000002e RSI: 0000000020d08000 RDI: 0000000000000005 +RBP: 0000000000000086 R08: 00000000209c1000 R09: 000000000000001c +R10: 0000000000040800 R11: 0000000000000216 R12: 00000000004b9c69 +R13: 00000000ffffffff R14: 0000000000000005 R15: 00000000202c2000 +Code: 9e 01 fe e9 c5 e8 ff ff e8 7f 9e 01 fe e9 4a ea ff ff 48 89 f7 e8 52 9e 01 fe e9 aa eb ff ff e8 a8 b6 cf fd 0f 0b e8 a1 b6 cf fd <0f> 0b 49 8d 45 78 4d 8d 45 7c 48 89 85 78 fe ff ff 49 8d 85 ba +RIP: __skb_pull include/linux/skbuff.h:2064 [inline] RSP: ffff8801ac6bf570 +RIP: __ip6_make_skb+0x18cf/0x1f70 net/ipv6/ip6_output.c:1617 RSP: ffff8801ac6bf570 + +Reported-by: syzbot +Signed-off-by: Mike Maloney +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1260,14 +1260,16 @@ static int ip6_setup_cork(struct sock *s + v6_cork->tclass = ipc6->tclass; + if (rt->dst.flags & DST_XFRM_TUNNEL) + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? +- rt->dst.dev->mtu : dst_mtu(&rt->dst); ++ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); + else + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? +- rt->dst.dev->mtu : dst_mtu(rt->dst.path); ++ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path); + if (np->frag_size < mtu) { + if (np->frag_size) + mtu = np->frag_size; + } ++ if (mtu < IPV6_MIN_MTU) ++ return -EINVAL; + cork->base.fragsize = mtu; + if (dst_allfrag(rt->dst.path)) + cork->base.flags |= IPCORK_ALLFRAG; diff --git a/queue-4.9/ipv6-ip6_make_skb-needs-to-clear-cork.base.dst.patch b/queue-4.9/ipv6-ip6_make_skb-needs-to-clear-cork.base.dst.patch new file mode 100644 index 00000000000..6f164126a91 --- /dev/null +++ b/queue-4.9/ipv6-ip6_make_skb-needs-to-clear-cork.base.dst.patch @@ -0,0 +1,35 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Eric Dumazet +Date: Thu, 11 Jan 2018 22:31:18 -0800 +Subject: ipv6: ip6_make_skb() needs to clear cork.base.dst + +From: Eric Dumazet + + +[ Upstream commit 95ef498d977bf44ac094778fd448b98af158a3e6 ] + +In my last patch, I missed fact that cork.base.dst was not initialized +in ip6_make_skb() : + +If ip6_setup_cork() returns an error, we might attempt a dst_release() +on some random pointer. + +Fixes: 862c03ee1deb ("ipv6: fix possible mem leaks in ipv6_make_skb()") +Signed-off-by: Eric Dumazet +Reported-by: syzbot +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_output.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1800,6 +1800,7 @@ struct sk_buff *ip6_make_skb(struct sock + cork.base.flags = 0; + cork.base.addr = 0; + cork.base.opt = NULL; ++ cork.base.dst = NULL; + v6_cork.opt = NULL; + err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); + if (err) { diff --git a/queue-4.9/lan78xx-fix-failure-in-usb-full-speed.patch b/queue-4.9/lan78xx-fix-failure-in-usb-full-speed.patch new file mode 100644 index 00000000000..cdc863c6388 --- /dev/null +++ b/queue-4.9/lan78xx-fix-failure-in-usb-full-speed.patch @@ -0,0 +1,31 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Yuiko Oshino +Date: Mon, 15 Jan 2018 13:24:28 -0500 +Subject: lan78xx: Fix failure in USB Full Speed + +From: Yuiko Oshino + + +[ Upstream commit a5b1379afbfabf91e3a689e82ac619a7157336b3 ] + +Fix initialize the uninitialized tx_qlen to an appropriate value when USB +Full Speed is used. + +Fixes: 55d7de9de6c3 ("Microchip's LAN7800 family USB 2/3 to 10/100/1000 Ethernet device driver") +Signed-off-by: Yuiko Oshino +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/usb/lan78xx.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -2197,6 +2197,7 @@ static int lan78xx_reset(struct lan78xx_ + buf = DEFAULT_BURST_CAP_SIZE / FS_USB_PKT_SIZE; + dev->rx_urb_size = DEFAULT_BURST_CAP_SIZE; + dev->rx_qlen = 4; ++ dev->tx_qlen = 4; + } + + ret = lan78xx_write_reg(dev, BURST_CAP, buf); diff --git a/queue-4.9/mlxsw-spectrum_router-don-t-log-an-error-on-missing-neighbor.patch b/queue-4.9/mlxsw-spectrum_router-don-t-log-an-error-on-missing-neighbor.patch new file mode 100644 index 00000000000..6d4868a9c57 --- /dev/null +++ b/queue-4.9/mlxsw-spectrum_router-don-t-log-an-error-on-missing-neighbor.patch @@ -0,0 +1,47 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Yuval Mintz +Date: Wed, 24 Jan 2018 10:02:09 +0100 +Subject: mlxsw: spectrum_router: Don't log an error on missing neighbor + +From: Yuval Mintz + + +[ Upstream commit 1ecdaea02ca6bfacf2ecda500dc1af51e9780c42 ] + +Driver periodically samples all neighbors configured in device +in order to update the kernel regarding their state. When finding +an entry configured in HW that doesn't show in neigh_lookup() +driver logs an error message. +This introduces a race when removing multiple neighbors - +it's possible that a given entry would still be configured in HW +as its removal is still being processed but is already removed +from the kernel's neighbor tables. + +Simply remove the error message and gracefully accept such events. + +Fixes: c723c735fa6b ("mlxsw: spectrum_router: Periodically update the kernel's neigh table") +Fixes: 60f040ca11b9 ("mlxsw: spectrum_router: Periodically dump active IPv6 neighbours") +Signed-off-by: Yuval Mintz +Reviewed-by: Ido Schimmel +Signed-off-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c ++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +@@ -765,11 +765,8 @@ static void mlxsw_sp_router_neigh_ent_ip + dipn = htonl(dip); + dev = mlxsw_sp->rifs[rif]->dev; + n = neigh_lookup(&arp_tbl, &dipn, dev); +- if (!n) { +- netdev_err(dev, "Failed to find matching neighbour for IP=%pI4h\n", +- &dip); ++ if (!n) + return; +- } + + netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip); + neigh_event_send(n, NULL); diff --git a/queue-4.9/net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch b/queue-4.9/net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch new file mode 100644 index 00000000000..1ca9c29a452 --- /dev/null +++ b/queue-4.9/net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch @@ -0,0 +1,40 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Jim Westfall +Date: Sun, 14 Jan 2018 04:18:50 -0800 +Subject: net: Allow neigh contructor functions ability to modify the primary_key + +From: Jim Westfall + + +[ Upstream commit 096b9854c04df86f03b38a97d40b6506e5730919 ] + +Use n->primary_key instead of pkey to account for the possibility that a neigh +constructor function may have modified the primary_key value. + +Signed-off-by: Jim Westfall +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/neighbour.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/core/neighbour.c ++++ b/net/core/neighbour.c +@@ -496,7 +496,7 @@ struct neighbour *__neigh_create(struct + if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) + nht = neigh_hash_grow(tbl, nht->hash_shift + 1); + +- hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift); ++ hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); + + if (n->parms->dead) { + rc = ERR_PTR(-EINVAL); +@@ -508,7 +508,7 @@ struct neighbour *__neigh_create(struct + n1 != NULL; + n1 = rcu_dereference_protected(n1->next, + lockdep_is_held(&tbl->lock))) { +- if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { ++ if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { + if (want_ref) + neigh_hold(n1); + rc = n1; diff --git a/queue-4.9/net-igmp-fix-source-address-check-for-igmpv3-reports.patch b/queue-4.9/net-igmp-fix-source-address-check-for-igmpv3-reports.patch new file mode 100644 index 00000000000..e692e2eff39 --- /dev/null +++ b/queue-4.9/net-igmp-fix-source-address-check-for-igmpv3-reports.patch @@ -0,0 +1,41 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Felix Fietkau +Date: Fri, 19 Jan 2018 11:50:46 +0100 +Subject: net: igmp: fix source address check for IGMPv3 reports + +From: Felix Fietkau + + +[ Upstream commit ad23b750933ea7bf962678972a286c78a8fa36aa ] + +Commit "net: igmp: Use correct source address on IGMPv3 reports" +introduced a check to validate the source address of locally generated +IGMPv3 packets. +Instead of checking the local interface address directly, it uses +inet_ifa_match(fl4->saddr, ifa), which checks if the address is on the +local subnet (or equal to the point-to-point address if used). + +This breaks for point-to-point interfaces, so check against +ifa->ifa_local directly. + +Cc: Kevin Cernekee +Fixes: a46182b00290 ("net: igmp: Use correct source address on IGMPv3 reports") +Reported-by: Sebastian Gottschall +Signed-off-by: Felix Fietkau +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/igmp.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/igmp.c ++++ b/net/ipv4/igmp.c +@@ -332,7 +332,7 @@ static __be32 igmpv3_get_srcaddr(struct + return htonl(INADDR_ANY); + + for_ifa(in_dev) { +- if (inet_ifa_match(fl4->saddr, ifa)) ++ if (fl4->saddr == ifa->ifa_local) + return fl4->saddr; + } endfor_ifa(in_dev); + diff --git a/queue-4.9/net-qdisc_pkt_len_init-should-be-more-robust.patch b/queue-4.9/net-qdisc_pkt_len_init-should-be-more-robust.patch new file mode 100644 index 00000000000..b700caab630 --- /dev/null +++ b/queue-4.9/net-qdisc_pkt_len_init-should-be-more-robust.patch @@ -0,0 +1,61 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Eric Dumazet +Date: Thu, 18 Jan 2018 19:59:19 -0800 +Subject: net: qdisc_pkt_len_init() should be more robust + +From: Eric Dumazet + + +[ Upstream commit 7c68d1a6b4db9012790af7ac0f0fdc0d2083422a ] + +Without proper validation of DODGY packets, we might very well +feed qdisc_pkt_len_init() with invalid GSO packets. + +tcp_hdrlen() might access out-of-bound data, so let's use +skb_header_pointer() and proper checks. + +Whole story is described in commit d0c081b49137 ("flow_dissector: +properly cap thoff field") + +We have the goal of validating DODGY packets earlier in the stack, +so we might very well revert this fix in the future. + +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Cc: Jason Wang +Reported-by: syzbot+9da69ebac7dddd804552@syzkaller.appspotmail.com +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/dev.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -3083,10 +3083,21 @@ static void qdisc_pkt_len_init(struct sk + hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + + /* + transport layer */ +- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) +- hdr_len += tcp_hdrlen(skb); +- else +- hdr_len += sizeof(struct udphdr); ++ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { ++ const struct tcphdr *th; ++ struct tcphdr _tcphdr; ++ ++ th = skb_header_pointer(skb, skb_transport_offset(skb), ++ sizeof(_tcphdr), &_tcphdr); ++ if (likely(th)) ++ hdr_len += __tcp_hdrlen(th); ++ } else { ++ struct udphdr _udphdr; ++ ++ if (skb_header_pointer(skb, skb_transport_offset(skb), ++ sizeof(_udphdr), &_udphdr)) ++ hdr_len += sizeof(struct udphdr); ++ } + + if (shinfo->gso_type & SKB_GSO_DODGY) + gso_segs = DIV_ROUND_UP(skb->len - hdr_len, diff --git a/queue-4.9/net-tcp-close-sock-if-net-namespace-is-exiting.patch b/queue-4.9/net-tcp-close-sock-if-net-namespace-is-exiting.patch new file mode 100644 index 00000000000..c7f3c0b0f64 --- /dev/null +++ b/queue-4.9/net-tcp-close-sock-if-net-namespace-is-exiting.patch @@ -0,0 +1,120 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Dan Streetman +Date: Thu, 18 Jan 2018 16:14:26 -0500 +Subject: net: tcp: close sock if net namespace is exiting + +From: Dan Streetman + + +[ Upstream commit 4ee806d51176ba7b8ff1efd81f271d7252e03a1d ] + +When a tcp socket is closed, if it detects that its net namespace is +exiting, close immediately and do not wait for FIN sequence. + +For normal sockets, a reference is taken to their net namespace, so it will +never exit while the socket is open. However, kernel sockets do not take a +reference to their net namespace, so it may begin exiting while the kernel +socket is still open. In this case if the kernel socket is a tcp socket, +it will stay open trying to complete its close sequence. The sock's dst(s) +hold a reference to their interface, which are all transferred to the +namespace's loopback interface when the real interfaces are taken down. +When the namespace tries to take down its loopback interface, it hangs +waiting for all references to the loopback interface to release, which +results in messages like: + +unregister_netdevice: waiting for lo to become free. Usage count = 1 + +These messages continue until the socket finally times out and closes. +Since the net namespace cleanup holds the net_mutex while calling its +registered pernet callbacks, any new net namespace initialization is +blocked until the current net namespace finishes exiting. + +After this change, the tcp socket notices the exiting net namespace, and +closes immediately, releasing its dst(s) and their reference to the +loopback interface, which lets the net namespace continue exiting. + +Link: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1711407 +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=97811 +Signed-off-by: Dan Streetman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/net_namespace.h | 10 ++++++++++ + net/ipv4/tcp.c | 3 +++ + net/ipv4/tcp_timer.c | 15 +++++++++++++++ + 3 files changed, 28 insertions(+) + +--- a/include/net/net_namespace.h ++++ b/include/net/net_namespace.h +@@ -213,6 +213,11 @@ int net_eq(const struct net *net1, const + return net1 == net2; + } + ++static inline int check_net(const struct net *net) ++{ ++ return atomic_read(&net->count) != 0; ++} ++ + void net_drop_ns(void *); + + #else +@@ -236,6 +241,11 @@ int net_eq(const struct net *net1, const + { + return 1; + } ++ ++static inline int check_net(const struct net *net) ++{ ++ return 1; ++} + + #define net_drop_ns NULL + #endif +--- a/net/ipv4/tcp.c ++++ b/net/ipv4/tcp.c +@@ -2215,6 +2215,9 @@ adjudge_to_death: + tcp_send_active_reset(sk, GFP_ATOMIC); + __NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPABORTONMEMORY); ++ } else if (!check_net(sock_net(sk))) { ++ /* Not possible to send reset; just close */ ++ tcp_set_state(sk, TCP_CLOSE); + } + } + +--- a/net/ipv4/tcp_timer.c ++++ b/net/ipv4/tcp_timer.c +@@ -50,11 +50,19 @@ static void tcp_write_err(struct sock *s + * to prevent DoS attacks. It is called when a retransmission timeout + * or zero probe timeout occurs on orphaned socket. + * ++ * Also close if our net namespace is exiting; in that case there is no ++ * hope of ever communicating again since all netns interfaces are already ++ * down (or about to be down), and we need to release our dst references, ++ * which have been moved to the netns loopback interface, so the namespace ++ * can finish exiting. This condition is only possible if we are a kernel ++ * socket, as those do not hold references to the namespace. ++ * + * Criteria is still not confirmed experimentally and may change. + * We kill the socket, if: + * 1. If number of orphaned sockets exceeds an administratively configured + * limit. + * 2. If we have strong memory pressure. ++ * 3. If our net namespace is exiting. + */ + static int tcp_out_of_resources(struct sock *sk, bool do_reset) + { +@@ -83,6 +91,13 @@ static int tcp_out_of_resources(struct s + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); + return 1; + } ++ ++ if (!check_net(sock_net(sk))) { ++ /* Not possible to send reset; just close */ ++ tcp_done(sk); ++ return 1; ++ } ++ + return 0; + } + diff --git a/queue-4.9/ppp-unlock-all_ppp_mutex-before-registering-device.patch b/queue-4.9/ppp-unlock-all_ppp_mutex-before-registering-device.patch new file mode 100644 index 00000000000..66ebb1db897 --- /dev/null +++ b/queue-4.9/ppp-unlock-all_ppp_mutex-before-registering-device.patch @@ -0,0 +1,58 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Guillaume Nault +Date: Wed, 10 Jan 2018 16:24:45 +0100 +Subject: ppp: unlock all_ppp_mutex before registering device + +From: Guillaume Nault + + +[ Upstream commit 0171c41835591e9aa2e384b703ef9a6ae367c610 ] + +ppp_dev_uninit(), which is the .ndo_uninit() handler of PPP devices, +needs to lock pn->all_ppp_mutex. Therefore we mustn't call +register_netdevice() with pn->all_ppp_mutex already locked, or we'd +deadlock in case register_netdevice() fails and calls .ndo_uninit(). + +Fortunately, we can unlock pn->all_ppp_mutex before calling +register_netdevice(). This lock protects pn->units_idr, which isn't +used in the device registration process. + +However, keeping pn->all_ppp_mutex locked during device registration +did ensure that no device in transient state would be published in +pn->units_idr. In practice, unlocking it before calling +register_netdevice() doesn't change this property: ppp_unit_register() +is called with 'ppp_mutex' locked and all searches done in +pn->units_idr hold this lock too. + +Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion") +Reported-and-tested-by: syzbot+367889b9c9e279219175@syzkaller.appspotmail.com +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/ppp_generic.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -1002,17 +1002,18 @@ static int ppp_unit_register(struct ppp + if (!ifname_is_set) + snprintf(ppp->dev->name, IFNAMSIZ, "ppp%i", ppp->file.index); + ++ mutex_unlock(&pn->all_ppp_mutex); ++ + ret = register_netdevice(ppp->dev); + if (ret < 0) + goto err_unit; + + atomic_inc(&ppp_unit_count); + +- mutex_unlock(&pn->all_ppp_mutex); +- + return 0; + + err_unit: ++ mutex_lock(&pn->all_ppp_mutex); + unit_put(&pn->units_idr, ppp->file.index); + err: + mutex_unlock(&pn->all_ppp_mutex); diff --git a/queue-4.9/pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch b/queue-4.9/pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch new file mode 100644 index 00000000000..2a039ea4102 --- /dev/null +++ b/queue-4.9/pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch @@ -0,0 +1,126 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Guillaume Nault +Date: Mon, 22 Jan 2018 18:06:37 +0100 +Subject: pppoe: take ->needed_headroom of lower device into account on xmit + +From: Guillaume Nault + + +[ Upstream commit 02612bb05e51df8489db5e94d0cf8d1c81f87b0c ] + +In pppoe_sendmsg(), reserving dev->hard_header_len bytes of headroom +was probably fine before the introduction of ->needed_headroom in +commit f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom"). + +But now, virtual devices typically advertise the size of their overhead +in dev->needed_headroom, so we must also take it into account in +skb_reserve(). +Allocation size of skb is also updated to take dev->needed_tailroom +into account and replace the arbitrary 32 bytes with the real size of +a PPPoE header. + +This issue was discovered by syzbot, who connected a pppoe socket to a +gre device which had dev->header_ops->create == ipgre_header and +dev->hard_header_len == 0. Therefore, PPPoE didn't reserve any +headroom, and dev_hard_header() crashed when ipgre_header() tried to +prepend its header to skb->data. + +skbuff: skb_under_panic: text:000000001d390b3a len:31 put:24 +head:00000000d8ed776f data:000000008150e823 tail:0x7 end:0xc0 dev:gre0 +------------[ cut here ]------------ +kernel BUG at net/core/skbuff.c:104! +invalid opcode: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 1 PID: 3670 Comm: syzkaller801466 Not tainted +4.15.0-rc7-next-20180115+ #97 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS +Google 01/01/2011 +RIP: 0010:skb_panic+0x162/0x1f0 net/core/skbuff.c:100 +RSP: 0018:ffff8801d9bd7840 EFLAGS: 00010282 +RAX: 0000000000000083 RBX: ffff8801d4f083c0 RCX: 0000000000000000 +RDX: 0000000000000083 RSI: 1ffff1003b37ae92 RDI: ffffed003b37aefc +RBP: ffff8801d9bd78a8 R08: 1ffff1003b37ae8a R09: 0000000000000000 +R10: 0000000000000001 R11: 0000000000000000 R12: ffffffff86200de0 +R13: ffffffff84a981ad R14: 0000000000000018 R15: ffff8801d2d34180 +FS: 00000000019c4880(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00000000208bc000 CR3: 00000001d9111001 CR4: 00000000001606e0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + skb_under_panic net/core/skbuff.c:114 [inline] + skb_push+0xce/0xf0 net/core/skbuff.c:1714 + ipgre_header+0x6d/0x4e0 net/ipv4/ip_gre.c:879 + dev_hard_header include/linux/netdevice.h:2723 [inline] + pppoe_sendmsg+0x58e/0x8b0 drivers/net/ppp/pppoe.c:890 + sock_sendmsg_nosec net/socket.c:630 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:640 + sock_write_iter+0x31a/0x5d0 net/socket.c:909 + call_write_iter include/linux/fs.h:1775 [inline] + do_iter_readv_writev+0x525/0x7f0 fs/read_write.c:653 + do_iter_write+0x154/0x540 fs/read_write.c:932 + vfs_writev+0x18a/0x340 fs/read_write.c:977 + do_writev+0xfc/0x2a0 fs/read_write.c:1012 + SYSC_writev fs/read_write.c:1085 [inline] + SyS_writev+0x27/0x30 fs/read_write.c:1082 + entry_SYSCALL_64_fastpath+0x29/0xa0 + +Admittedly PPPoE shouldn't be allowed to run on non Ethernet-like +interfaces, but reserving space for ->needed_headroom is a more +fundamental issue that needs to be addressed first. + +Same problem exists for __pppoe_xmit(), which also needs to take +dev->needed_headroom into account in skb_cow_head(). + +Fixes: f5184d267c1a ("net: Allow netdevices to specify needed head/tailroom") +Reported-by: syzbot+ed0838d0fa4c4f2b528e20286e6dc63effc7c14d@syzkaller.appspotmail.com +Signed-off-by: Guillaume Nault +Reviewed-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/pppoe.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/drivers/net/ppp/pppoe.c ++++ b/drivers/net/ppp/pppoe.c +@@ -842,6 +842,7 @@ static int pppoe_sendmsg(struct socket * + struct pppoe_hdr *ph; + struct net_device *dev; + char *start; ++ int hlen; + + lock_sock(sk); + if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) { +@@ -860,16 +861,16 @@ static int pppoe_sendmsg(struct socket * + if (total_len > (dev->mtu + dev->hard_header_len)) + goto end; + +- +- skb = sock_wmalloc(sk, total_len + dev->hard_header_len + 32, +- 0, GFP_KERNEL); ++ hlen = LL_RESERVED_SPACE(dev); ++ skb = sock_wmalloc(sk, hlen + sizeof(*ph) + total_len + ++ dev->needed_tailroom, 0, GFP_KERNEL); + if (!skb) { + error = -ENOMEM; + goto end; + } + + /* Reserve space for headers. */ +- skb_reserve(skb, dev->hard_header_len); ++ skb_reserve(skb, hlen); + skb_reset_network_header(skb); + + skb->dev = dev; +@@ -930,7 +931,7 @@ static int __pppoe_xmit(struct sock *sk, + /* Copy the data if there is no space for the header or if it's + * read-only. + */ +- if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len)) ++ if (skb_cow_head(skb, LL_RESERVED_SPACE(dev) + sizeof(*ph))) + goto abort; + + __skb_push(skb, sizeof(*ph)); diff --git a/queue-4.9/r8169-fix-memory-corruption-on-retrieval-of-hardware-statistics.patch b/queue-4.9/r8169-fix-memory-corruption-on-retrieval-of-hardware-statistics.patch new file mode 100644 index 00000000000..3703e24eac5 --- /dev/null +++ b/queue-4.9/r8169-fix-memory-corruption-on-retrieval-of-hardware-statistics.patch @@ -0,0 +1,47 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Francois Romieu +Date: Fri, 26 Jan 2018 01:53:26 +0100 +Subject: r8169: fix memory corruption on retrieval of hardware statistics. + +From: Francois Romieu + + +[ Upstream commit a78e93661c5fd30b9e1dee464b2f62f966883ef7 ] + +Hardware statistics retrieval hurts in tight invocation loops. + +Avoid extraneous write and enforce strict ordering of writes targeted to +the tally counters dump area address registers. + +Signed-off-by: Francois Romieu +Tested-by: Oliver Freyermuth +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/realtek/r8169.c | 9 ++------- + 1 file changed, 2 insertions(+), 7 deletions(-) + +--- a/drivers/net/ethernet/realtek/r8169.c ++++ b/drivers/net/ethernet/realtek/r8169.c +@@ -2222,19 +2222,14 @@ static bool rtl8169_do_counters(struct n + void __iomem *ioaddr = tp->mmio_addr; + dma_addr_t paddr = tp->counters_phys_addr; + u32 cmd; +- bool ret; + + RTL_W32(CounterAddrHigh, (u64)paddr >> 32); ++ RTL_R32(CounterAddrHigh); + cmd = (u64)paddr & DMA_BIT_MASK(32); + RTL_W32(CounterAddrLow, cmd); + RTL_W32(CounterAddrLow, cmd | counter_cmd); + +- ret = rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); +- +- RTL_W32(CounterAddrLow, 0); +- RTL_W32(CounterAddrHigh, 0); +- +- return ret; ++ return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000); + } + + static bool rtl8169_reset_counters(struct net_device *dev) diff --git a/queue-4.9/sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch b/queue-4.9/sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch new file mode 100644 index 00000000000..8eaa4386911 --- /dev/null +++ b/queue-4.9/sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch @@ -0,0 +1,56 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Xin Long +Date: Mon, 15 Jan 2018 17:02:00 +0800 +Subject: sctp: do not allow the v4 socket to bind a v4mapped v6 address + +From: Xin Long + + +[ Upstream commit c5006b8aa74599ce19104b31d322d2ea9ff887cc ] + +The check in sctp_sockaddr_af is not robust enough to forbid binding a +v4mapped v6 addr on a v4 socket. + +The worse thing is that v4 socket's bind_verify would not convert this +v4mapped v6 addr to a v4 addr. syzbot even reported a crash as the v4 +socket bound a v6 addr. + +This patch is to fix it by doing the common sa.sa_family check first, +then AF_INET check for v4mapped v6 addrs. + +Fixes: 7dab83de50c7 ("sctp: Support ipv6only AF_INET6 sockets.") +Reported-by: syzbot+7b7b518b1228d2743963@syzkaller.appspotmail.com +Acked-by: Neil Horman +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 14 ++++++-------- + 1 file changed, 6 insertions(+), 8 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -332,16 +332,14 @@ static struct sctp_af *sctp_sockaddr_af( + if (len < sizeof (struct sockaddr)) + return NULL; + ++ if (!opt->pf->af_supported(addr->sa.sa_family, opt)) ++ return NULL; ++ + /* V4 mapped address are really of AF_INET family */ + if (addr->sa.sa_family == AF_INET6 && +- ipv6_addr_v4mapped(&addr->v6.sin6_addr)) { +- if (!opt->pf->af_supported(AF_INET, opt)) +- return NULL; +- } else { +- /* Does this PF support this AF? */ +- if (!opt->pf->af_supported(addr->sa.sa_family, opt)) +- return NULL; +- } ++ ipv6_addr_v4mapped(&addr->v6.sin6_addr) && ++ !opt->pf->af_supported(AF_INET, opt)) ++ return NULL; + + /* If we get this far, af is valid. */ + af = sctp_get_af_specific(addr->sa.sa_family); diff --git a/queue-4.9/sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch b/queue-4.9/sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch new file mode 100644 index 00000000000..091a8dfe52a --- /dev/null +++ b/queue-4.9/sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch @@ -0,0 +1,87 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Xin Long +Date: Mon, 15 Jan 2018 17:01:36 +0800 +Subject: sctp: return error if the asoc has been peeled off in sctp_wait_for_sndbuf + +From: Xin Long + + +[ Upstream commit a0ff660058b88d12625a783ce9e5c1371c87951f ] + +After commit cea0cc80a677 ("sctp: use the right sk after waking up from +wait_buf sleep"), it may change to lock another sk if the asoc has been +peeled off in sctp_wait_for_sndbuf. + +However, the asoc's new sk could be already closed elsewhere, as it's in +the sendmsg context of the old sk that can't avoid the new sk's closing. +If the sk's last one refcnt is held by this asoc, later on after putting +this asoc, the new sk will be freed, while under it's own lock. + +This patch is to revert that commit, but fix the old issue by returning +error under the old sk's lock. + +Fixes: cea0cc80a677 ("sctp: use the right sk after waking up from wait_buf sleep") +Reported-by: syzbot+ac6ea7baa4432811eb50@syzkaller.appspotmail.com +Signed-off-by: Xin Long +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 16 ++++++---------- + 1 file changed, 6 insertions(+), 10 deletions(-) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -83,7 +83,7 @@ + static int sctp_writeable(struct sock *sk); + static void sctp_wfree(struct sk_buff *skb); + static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, +- size_t msg_len, struct sock **orig_sk); ++ size_t msg_len); + static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); + static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); + static int sctp_wait_for_accept(struct sock *sk, long timeo); +@@ -1956,7 +1956,7 @@ static int sctp_sendmsg(struct sock *sk, + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + if (!sctp_wspace(asoc)) { + /* sk can be changed by peel off when waiting for buf. */ +- err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk); ++ err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); + if (err) { + if (err == -ESRCH) { + /* asoc is already dead. */ +@@ -7439,12 +7439,12 @@ void sctp_sock_rfree(struct sk_buff *skb + + /* Helper function to wait for space in the sndbuf. */ + static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, +- size_t msg_len, struct sock **orig_sk) ++ size_t msg_len) + { + struct sock *sk = asoc->base.sk; +- int err = 0; + long current_timeo = *timeo_p; + DEFINE_WAIT(wait); ++ int err = 0; + + pr_debug("%s: asoc:%p, timeo:%ld, msg_len:%zu\n", __func__, asoc, + *timeo_p, msg_len); +@@ -7473,17 +7473,13 @@ static int sctp_wait_for_sndbuf(struct s + release_sock(sk); + current_timeo = schedule_timeout(current_timeo); + lock_sock(sk); +- if (sk != asoc->base.sk) { +- release_sock(sk); +- sk = asoc->base.sk; +- lock_sock(sk); +- } ++ if (sk != asoc->base.sk) ++ goto do_error; + + *timeo_p = current_timeo; + } + + out: +- *orig_sk = sk; + finish_wait(&asoc->wait, &wait); + + /* Release the association's refcnt. */ diff --git a/queue-4.9/series b/queue-4.9/series index a9901c65aac..dfdb5ee2ab3 100644 --- a/queue-4.9/series +++ b/queue-4.9/series @@ -30,3 +30,26 @@ orangefs-fix-deadlock-do-not-write-i_size-in-read_iter.patch um-link-vmlinux-with-no-pie.patch vsyscall-fix-permissions-for-emulate-mode-with-kaiser-pti.patch eventpoll.h-add-missing-epoll-event-masks.patch +dccp-don-t-restart-ccid2_hc_tx_rto_expire-if-sk-in-closed-state.patch +ipv6-fix-getsockopt-for-sockets-with-default-ipv6_autoflowlabel.patch +ipv6-fix-udpv6-sendmsg-crash-caused-by-too-small-mtu.patch +ipv6-ip6_make_skb-needs-to-clear-cork.base.dst.patch +lan78xx-fix-failure-in-usb-full-speed.patch +net-igmp-fix-source-address-check-for-igmpv3-reports.patch +net-qdisc_pkt_len_init-should-be-more-robust.patch +net-tcp-close-sock-if-net-namespace-is-exiting.patch +pppoe-take-needed_headroom-of-lower-device-into-account-on-xmit.patch +r8169-fix-memory-corruption-on-retrieval-of-hardware-statistics.patch +sctp-do-not-allow-the-v4-socket-to-bind-a-v4mapped-v6-address.patch +sctp-return-error-if-the-asoc-has-been-peeled-off-in-sctp_wait_for_sndbuf.patch +tipc-fix-a-memory-leak-in-tipc_nl_node_get_link.patch +vmxnet3-repair-memory-leak.patch +net-allow-neigh-contructor-functions-ability-to-modify-the-primary_key.patch +ipv4-make-neigh-lookup-keys-for-loopback-point-to-point-devices-be-inaddr_any.patch +ppp-unlock-all_ppp_mutex-before-registering-device.patch +be2net-restore-properly-promisc-mode-after-queues-reconfiguration.patch +ip6_gre-init-dev-mtu-and-dev-hard_header_len-correctly.patch +gso-validate-gso_type-in-gso-handlers.patch +mlxsw-spectrum_router-don-t-log-an-error-on-missing-neighbor.patch +tun-fix-a-memory-leak-for-tfile-tx_array.patch +flow_dissector-properly-cap-thoff-field.patch diff --git a/queue-4.9/tipc-fix-a-memory-leak-in-tipc_nl_node_get_link.patch b/queue-4.9/tipc-fix-a-memory-leak-in-tipc_nl_node_get_link.patch new file mode 100644 index 00000000000..8f777f02653 --- /dev/null +++ b/queue-4.9/tipc-fix-a-memory-leak-in-tipc_nl_node_get_link.patch @@ -0,0 +1,81 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Cong Wang +Date: Wed, 10 Jan 2018 12:50:25 -0800 +Subject: tipc: fix a memory leak in tipc_nl_node_get_link() + +From: Cong Wang + + +[ Upstream commit 59b36613e85fb16ebf9feaf914570879cd5c2a21 ] + +When tipc_node_find_by_name() fails, the nlmsg is not +freed. + +While on it, switch to a goto label to properly +free it. + +Fixes: be9c086715c ("tipc: narrow down exposure of struct tipc_node") +Reported-by: Dmitry Vyukov +Cc: Jon Maloy +Cc: Ying Xue +Signed-off-by: Cong Wang +Acked-by: Ying Xue +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/tipc/node.c | 26 ++++++++++++++------------ + 1 file changed, 14 insertions(+), 12 deletions(-) + +--- a/net/tipc/node.c ++++ b/net/tipc/node.c +@@ -1848,36 +1848,38 @@ int tipc_nl_node_get_link(struct sk_buff + + if (strcmp(name, tipc_bclink_name) == 0) { + err = tipc_nl_add_bc_link(net, &msg); +- if (err) { +- nlmsg_free(msg.skb); +- return err; +- } ++ if (err) ++ goto err_free; + } else { + int bearer_id; + struct tipc_node *node; + struct tipc_link *link; + + node = tipc_node_find_by_name(net, name, &bearer_id); +- if (!node) +- return -EINVAL; ++ if (!node) { ++ err = -EINVAL; ++ goto err_free; ++ } + + tipc_node_read_lock(node); + link = node->links[bearer_id].link; + if (!link) { + tipc_node_read_unlock(node); +- nlmsg_free(msg.skb); +- return -EINVAL; ++ err = -EINVAL; ++ goto err_free; + } + + err = __tipc_nl_add_link(net, &msg, link, 0); + tipc_node_read_unlock(node); +- if (err) { +- nlmsg_free(msg.skb); +- return err; +- } ++ if (err) ++ goto err_free; + } + + return genlmsg_reply(msg.skb, info); ++ ++err_free: ++ nlmsg_free(msg.skb); ++ return err; + } + + int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) diff --git a/queue-4.9/tun-fix-a-memory-leak-for-tfile-tx_array.patch b/queue-4.9/tun-fix-a-memory-leak-for-tfile-tx_array.patch new file mode 100644 index 00000000000..421acdc2598 --- /dev/null +++ b/queue-4.9/tun-fix-a-memory-leak-for-tfile-tx_array.patch @@ -0,0 +1,81 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Cong Wang +Date: Mon, 15 Jan 2018 11:37:29 -0800 +Subject: tun: fix a memory leak for tfile->tx_array + +From: Cong Wang + + +[ Upstream commit 4df0bfc79904b7169dc77dcce44598b1545721f9 ] + +tfile->tun could be detached before we close the tun fd, +via tun_detach_all(), so it should not be used to check for +tfile->tx_array. + +As Jason suggested, we probably have to clean it up +unconditionally both in __tun_deatch() and tun_detach_all(), +but this requires to check if it is initialized or not. +Currently skb_array_cleanup() doesn't have such a check, +so I check it in the caller and introduce a helper function, +it is a bit ugly but we can always improve it in net-next. + +Reported-by: Dmitry Vyukov +Fixes: 1576d9860599 ("tun: switch to use skb array for tx") +Cc: Jason Wang +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 15 +++++++++++++-- + 1 file changed, 13 insertions(+), 2 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -525,6 +525,14 @@ static void tun_queue_purge(struct tun_f + skb_queue_purge(&tfile->sk.sk_error_queue); + } + ++static void tun_cleanup_tx_array(struct tun_file *tfile) ++{ ++ if (tfile->tx_array.ring.queue) { ++ skb_array_cleanup(&tfile->tx_array); ++ memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); ++ } ++} ++ + static void __tun_detach(struct tun_file *tfile, bool clean) + { + struct tun_file *ntfile; +@@ -566,8 +574,7 @@ static void __tun_detach(struct tun_file + tun->dev->reg_state == NETREG_REGISTERED) + unregister_netdevice(tun->dev); + } +- if (tun) +- skb_array_cleanup(&tfile->tx_array); ++ tun_cleanup_tx_array(tfile); + sock_put(&tfile->sk); + } + } +@@ -606,11 +613,13 @@ static void tun_detach_all(struct net_de + /* Drop read queue */ + tun_queue_purge(tfile); + sock_put(&tfile->sk); ++ tun_cleanup_tx_array(tfile); + } + list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { + tun_enable_queue(tfile); + tun_queue_purge(tfile); + sock_put(&tfile->sk); ++ tun_cleanup_tx_array(tfile); + } + BUG_ON(tun->numdisabled != 0); + +@@ -2363,6 +2372,8 @@ static int tun_chr_open(struct inode *in + + sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); + ++ memset(&tfile->tx_array, 0, sizeof(tfile->tx_array)); ++ + return 0; + } + diff --git a/queue-4.9/vmxnet3-repair-memory-leak.patch b/queue-4.9/vmxnet3-repair-memory-leak.patch new file mode 100644 index 00000000000..ec8fcc31cbb --- /dev/null +++ b/queue-4.9/vmxnet3-repair-memory-leak.patch @@ -0,0 +1,58 @@ +From foo@baz Sun Jan 28 17:37:09 CET 2018 +From: Neil Horman +Date: Mon, 22 Jan 2018 16:06:37 -0500 +Subject: vmxnet3: repair memory leak + +From: Neil Horman + + +[ Upstream commit 848b159835ddef99cc4193083f7e786c3992f580 ] + +with the introduction of commit +b0eb57cb97e7837ebb746404c2c58c6f536f23fa, it appears that rq->buf_info +is improperly handled. While it is heap allocated when an rx queue is +setup, and freed when torn down, an old line of code in +vmxnet3_rq_destroy was not properly removed, leading to rq->buf_info[0] +being set to NULL prior to its being freed, causing a memory leak, which +eventually exhausts the system on repeated create/destroy operations +(for example, when the mtu of a vmxnet3 interface is changed +frequently. + +Fix is pretty straight forward, just move the NULL set to after the +free. + +Tested by myself with successful results + +Applies to net, and should likely be queued for stable, please + +Signed-off-by: Neil Horman +Reported-By: boyang@redhat.com +CC: boyang@redhat.com +CC: Shrikrishna Khare +CC: "VMware, Inc." +CC: David S. Miller +Acked-by: Shrikrishna Khare +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/vmxnet3/vmxnet3_drv.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -1616,7 +1616,6 @@ static void vmxnet3_rq_destroy(struct vm + rq->rx_ring[i].basePA); + rq->rx_ring[i].base = NULL; + } +- rq->buf_info[i] = NULL; + } + + if (rq->data_ring.base) { +@@ -1638,6 +1637,7 @@ static void vmxnet3_rq_destroy(struct vm + (rq->rx_ring[0].size + rq->rx_ring[1].size); + dma_free_coherent(&adapter->pdev->dev, sz, rq->buf_info[0], + rq->buf_info_pa); ++ rq->buf_info[0] = rq->buf_info[1] = NULL; + } + } +