From: Greg Kroah-Hartman Date: Wed, 15 Nov 2017 16:25:58 +0000 (+0100) Subject: 4.13-stable patches X-Git-Tag: v3.18.82~27 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=55bc16ecccf22a439097272efd63bd78e290f020;p=thirdparty%2Fkernel%2Fstable-queue.git 4.13-stable patches added patches: geneve-fix-function-matching-vni-and-tunnel-id-on-big-endian.patch gso-fix-payload-length-when-gso_size-is-zero.patch ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch ipv4-fix-traffic-triggered-ipsec-connections.patch ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch ipv6-fix-traffic-triggered-ipsec-connections.patch ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch l2tp-hold-tunnel-in-pppol2tp_connect.patch macsec-fix-memory-leaks-when-skb_to_sgvec-fails.patch net-bridge-fix-returning-of-vlan-range-op-errors.patch net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch net-dsa-check-master-device-before-put.patch net-mlx5-fix-health-work-queue-spin-lock-to-irq-safe.patch net-mlx5e-properly-deal-with-encap-flows-add-del-under-neigh-update.patch net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch net_sched-avoid-matching-qdisc-with-zero-handle.patch netlink-do-not-set-cb_running-if-dump-s-start-errs.patch netlink-fix-netlink_ack-extack-race.patch packet-avoid-panic-in-packet_getsockopt.patch ppp-fix-race-in-ppp-device-destruction.patch sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch soreuseport-fix-initialization-race.patch tap-double-free-in-error-path-in-tap_open.patch tap-reference-to-kva-of-an-unloaded-module-causes-kernel-panic.patch tcp-dccp-fix-ireq-opt-races.patch tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch tcp-fix-tcp_mtu_probe-vs-highest_sack.patch tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch tun-allow-positive-return-values-on-dev_get_valid_name-call.patch tun-call-dev_get_valid_name-before-register_netdevice.patch tun-tap-sanitize-tunsetsndbuf-input.patch --- diff --git a/queue-4.13/geneve-fix-function-matching-vni-and-tunnel-id-on-big-endian.patch b/queue-4.13/geneve-fix-function-matching-vni-and-tunnel-id-on-big-endian.patch new file mode 100644 index 00000000000..76a4cea88fa --- /dev/null +++ b/queue-4.13/geneve-fix-function-matching-vni-and-tunnel-id-on-big-endian.patch @@ -0,0 +1,43 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Stefano Brivio +Date: Thu, 19 Oct 2017 13:31:28 +0200 +Subject: geneve: Fix function matching VNI and tunnel ID on big-endian + +From: Stefano Brivio + + +[ Upstream commit 772e97b57a4aa00170ad505a40ffad31d987ce1d ] + +On big-endian machines, functions converting between tunnel ID +and VNI use the three LSBs of tunnel ID storage to map VNI. + +The comparison function eq_tun_id_and_vni(), on the other hand, +attempted to map the VNI from the three MSBs. Fix it by using +the same check implemented on LE, which maps VNI from the three +LSBs of tunnel ID. + +Fixes: 2e0b26e10352 ("geneve: Optimize geneve device lookup.") +Signed-off-by: Stefano Brivio +Reviewed-by: Jakub Sitnicki +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/geneve.c | 6 ------ + 1 file changed, 6 deletions(-) + +--- a/drivers/net/geneve.c ++++ b/drivers/net/geneve.c +@@ -113,13 +113,7 @@ static void tunnel_id_to_vni(__be64 tun_ + + static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni) + { +-#ifdef __BIG_ENDIAN +- return (vni[0] == tun_id[2]) && +- (vni[1] == tun_id[1]) && +- (vni[2] == tun_id[0]); +-#else + return !memcmp(vni, &tun_id[5], 3); +-#endif + } + + static sa_family_t geneve_get_sk_family(struct geneve_sock *gs) diff --git a/queue-4.13/gso-fix-payload-length-when-gso_size-is-zero.patch b/queue-4.13/gso-fix-payload-length-when-gso_size-is-zero.patch new file mode 100644 index 00000000000..a1fa043c428 --- /dev/null +++ b/queue-4.13/gso-fix-payload-length-when-gso_size-is-zero.patch @@ -0,0 +1,62 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Alexey Kodanev +Date: Fri, 6 Oct 2017 19:02:35 +0300 +Subject: gso: fix payload length when gso_size is zero + +From: Alexey Kodanev + + +[ Upstream commit 3d0241d57c7b25bb75ac9d7a62753642264fdbce ] + +When gso_size reset to zero for the tail segment in skb_segment(), later +in ipv6_gso_segment(), __skb_udp_tunnel_segment() and gre_gso_segment() +we will get incorrect results (payload length, pcsum) for that segment. +inet_gso_segment() already has a check for gso_size before calculating +payload. + +The issue was found with LTP vxlan & gre tests over ixgbe NIC. + +Fixes: 07b26c9454a2 ("gso: Support partial splitting at the frag_list pointer") +Signed-off-by: Alexey Kodanev +Acked-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/gre_offload.c | 2 +- + net/ipv4/udp_offload.c | 2 +- + net/ipv6/ip6_offload.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +--- a/net/ipv4/gre_offload.c ++++ b/net/ipv4/gre_offload.c +@@ -98,7 +98,7 @@ static struct sk_buff *gre_gso_segment(s + greh = (struct gre_base_hdr *)skb_transport_header(skb); + pcsum = (__sum16 *)(greh + 1); + +- if (gso_partial) { ++ if (gso_partial && skb_is_gso(skb)) { + unsigned int partial_adj; + + /* Adjust checksum to account for the fact that +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -122,7 +122,7 @@ static struct sk_buff *__skb_udp_tunnel_ + * will be using a length value equal to only one MSS sized + * segment instead of the entire frame. + */ +- if (gso_partial) { ++ if (gso_partial && skb_is_gso(skb)) { + uh->len = htons(skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)uh); +--- a/net/ipv6/ip6_offload.c ++++ b/net/ipv6/ip6_offload.c +@@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment( + + for (skb = segs; skb; skb = skb->next) { + ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); +- if (gso_partial) ++ if (gso_partial && skb_is_gso(skb)) + payload_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)(ipv6h + 1); diff --git a/queue-4.13/ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch b/queue-4.13/ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch new file mode 100644 index 00000000000..b8750740835 --- /dev/null +++ b/queue-4.13/ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch @@ -0,0 +1,64 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Xin Long +Date: Thu, 26 Oct 2017 19:23:27 +0800 +Subject: ip6_gre: only increase err_count for some certain type icmpv6 in ip6gre_err + +From: Xin Long + + +[ Upstream commit f8d20b46ce55cf40afb30dcef6d9288f7ef46d9b ] + +The similar fix in patch 'ipip: only increase err_count for some +certain type icmp in ipip_err' is needed for ip6gre_err. + +In Jianlin's case, udp netperf broke even when receiving a TooBig +icmpv6 packet. + +Fixes: c12b395a4664 ("gre: Support GRE over IPv6") +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -408,13 +408,16 @@ static void ip6gre_err(struct sk_buff *s + case ICMPV6_DEST_UNREACH: + net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", + t->parms.name); +- break; ++ if (code != ICMPV6_PORT_UNREACH) ++ break; ++ return; + case ICMPV6_TIME_EXCEED: + if (code == ICMPV6_EXC_HOPLIMIT) { + net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", + t->parms.name); ++ break; + } +- break; ++ return; + case ICMPV6_PARAMPROB: + teli = 0; + if (code == ICMPV6_HDR_FIELD) +@@ -430,7 +433,7 @@ static void ip6gre_err(struct sk_buff *s + net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", + t->parms.name); + } +- break; ++ return; + case ICMPV6_PKT_TOOBIG: + mtu = be32_to_cpu(info) - offset - t->tun_hlen; + if (t->dev->type == ARPHRD_ETHER) +@@ -438,7 +441,7 @@ static void ip6gre_err(struct sk_buff *s + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + t->dev->mtu = mtu; +- break; ++ return; + } + + if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) diff --git a/queue-4.13/ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch b/queue-4.13/ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch new file mode 100644 index 00000000000..9325540c664 --- /dev/null +++ b/queue-4.13/ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch @@ -0,0 +1,68 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Xin Long +Date: Thu, 26 Oct 2017 19:27:17 +0800 +Subject: ip6_gre: update dst pmtu if dev mtu has been updated by toobig in __gre6_xmit + +From: Xin Long + + +[ Upstream commit 8aec4959d832bae0889a8e2f348973b5e4abffef ] + +When receiving a Toobig icmpv6 packet, ip6gre_err would just set +tunnel dev's mtu, that's not enough. For skb_dst(skb)'s pmtu may +still be using the old value, it has no chance to be updated with +tunnel dev's mtu. + +Jianlin found this issue by reducing route's mtu while running +netperf, the performance went to 0. + +ip6ip6 and ip4ip6 tunnel can work well with this, as they lookup +the upper dst and update_pmtu it's pmtu or icmpv6_send a Toobig +to upper socket after setting tunnel dev's mtu. + +We couldn't do that for ip6_gre, as gre's inner packet could be +any protocol, it's difficult to handle them (like lookup upper +dst) in a good way. + +So this patch is to fix it by updating skb_dst(skb)'s pmtu when +dev->mtu < skb_dst(skb)'s pmtu in tx path. It's safe to do this +update there, as usually dev->mtu <= skb_dst(skb)'s pmtu and no +performance regression can be caused by this. + +Fixes: c12b395a4664 ("gre: Support GRE over IPv6") +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -503,8 +503,8 @@ static netdev_tx_t __gre6_xmit(struct sk + __u32 *pmtu, __be16 proto) + { + struct ip6_tnl *tunnel = netdev_priv(dev); +- __be16 protocol = (dev->type == ARPHRD_ETHER) ? +- htons(ETH_P_TEB) : proto; ++ struct dst_entry *dst = skb_dst(skb); ++ __be16 protocol; + + if (dev->type == ARPHRD_ETHER) + IPCB(skb)->flags = 0; +@@ -518,9 +518,14 @@ static netdev_tx_t __gre6_xmit(struct sk + tunnel->o_seqno++; + + /* Push GRE header. */ ++ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; + gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, + protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); + ++ /* TooBig packet may have updated dst->dev's mtu */ ++ if (dst && dst_mtu(dst) > dst->dev->mtu) ++ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); ++ + return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, + NEXTHDR_GRE); + } diff --git a/queue-4.13/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch b/queue-4.13/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch new file mode 100644 index 00000000000..59bee6b14d1 --- /dev/null +++ b/queue-4.13/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch @@ -0,0 +1,128 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Xin Long +Date: Thu, 26 Oct 2017 19:19:56 +0800 +Subject: ipip: only increase err_count for some certain type icmp in ipip_err + +From: Xin Long + + +[ Upstream commit f3594f0a7ea36661d7fd942facd7f31a64245f1a ] + +t->err_count is used to count the link failure on tunnel and an err +will be reported to user socket in tx path if t->err_count is not 0. +udp socket could even return EHOSTUNREACH to users. + +Since commit fd58156e456d ("IPIP: Use ip-tunneling code.") removed +the 'switch check' for icmp type in ipip_err(), err_count would be +increased by the icmp packet with ICMP_EXC_FRAGTIME code. an link +failure would be reported out due to this. + +In Jianlin's case, when receiving ICMP_EXC_FRAGTIME a icmp packet, +udp netperf failed with the err: + send_data: data send error: No route to host (errno 113) + +We expect this error reported from tunnel to socket when receiving +some certain type icmp, but not ICMP_EXC_FRAGTIME, ICMP_SR_FAILED +or ICMP_PARAMETERPROB ones. + +This patch is to bring 'switch check' for icmp type back to ipip_err +so that it only reports link failure for the right type icmp, just as +in ipgre_err() and ipip6_err(). + +Fixes: fd58156e456d ("IPIP: Use ip-tunneling code.") +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ipip.c | 59 +++++++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 42 insertions(+), 17 deletions(-) + +--- a/net/ipv4/ipip.c ++++ b/net/ipv4/ipip.c +@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_op + + static int ipip_err(struct sk_buff *skb, u32 info) + { +- +-/* All the routers (except for Linux) return only +- 8 bytes of packet payload. It means, that precise relaying of +- ICMP in the real Internet is absolutely infeasible. +- */ ++ /* All the routers (except for Linux) return only ++ * 8 bytes of packet payload. It means, that precise relaying of ++ * ICMP in the real Internet is absolutely infeasible. ++ */ + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); + const struct iphdr *iph = (const struct iphdr *)skb->data; +- struct ip_tunnel *t; +- int err; + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; ++ struct ip_tunnel *t; ++ int err = 0; ++ ++ switch (type) { ++ case ICMP_DEST_UNREACH: ++ switch (code) { ++ case ICMP_SR_FAILED: ++ /* Impossible event. */ ++ goto out; ++ default: ++ /* All others are translated to HOST_UNREACH. ++ * rfc2003 contains "deep thoughts" about NET_UNREACH, ++ * I believe they are just ether pollution. --ANK ++ */ ++ break; ++ } ++ break; ++ ++ case ICMP_TIME_EXCEEDED: ++ if (code != ICMP_EXC_TTL) ++ goto out; ++ break; ++ ++ case ICMP_REDIRECT: ++ break; ++ ++ default: ++ goto out; ++ } + +- err = -ENOENT; + t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->daddr, iph->saddr, 0); +- if (!t) ++ if (!t) { ++ err = -ENOENT; + goto out; ++ } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { +- ipv4_update_pmtu(skb, dev_net(skb->dev), info, +- t->parms.link, 0, iph->protocol, 0); +- err = 0; ++ ipv4_update_pmtu(skb, net, info, t->parms.link, 0, ++ iph->protocol, 0); + goto out; + } + + if (type == ICMP_REDIRECT) { +- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, +- iph->protocol, 0); +- err = 0; ++ ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0); + goto out; + } + +- if (t->parms.iph.daddr == 0) ++ if (t->parms.iph.daddr == 0) { ++ err = -ENOENT; + goto out; ++ } + +- err = 0; + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) + goto out; + diff --git a/queue-4.13/ipv4-fix-traffic-triggered-ipsec-connections.patch b/queue-4.13/ipv4-fix-traffic-triggered-ipsec-connections.patch new file mode 100644 index 00000000000..20058febff7 --- /dev/null +++ b/queue-4.13/ipv4-fix-traffic-triggered-ipsec-connections.patch @@ -0,0 +1,40 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Steffen Klassert +Date: Mon, 9 Oct 2017 08:43:55 +0200 +Subject: ipv4: Fix traffic triggered IPsec connections. + +From: Steffen Klassert + + +[ Upstream commit 6c0e7284d89995877740d8a26c3e99a937312a3c ] + +A recent patch removed the dst_free() on the allocated +dst_entry in ipv4_blackhole_route(). The dst_free() marked the +dst_entry as dead and added it to the gc list. I.e. it was setup +for a one time usage. As a result we may now have a blackhole +route cached at a socket on some IPsec scenarios. This makes the +connection unusable. + +Fix this by marking the dst_entry directly at allocation time +as 'dead', so it is used only once. + +Fixes: b838d5e1c5b6 ("ipv4: mark DST_NOGC and remove the operation of dst_free()") +Reported-by: Tobias Brunner +Signed-off-by: Steffen Klassert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/route.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2507,7 +2507,7 @@ struct dst_entry *ipv4_blackhole_route(s + struct rtable *ort = (struct rtable *) dst_orig; + struct rtable *rt; + +- rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0); ++ rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); + if (rt) { + struct dst_entry *new = &rt->dst; + diff --git a/queue-4.13/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch b/queue-4.13/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch new file mode 100644 index 00000000000..8bb28640bdd --- /dev/null +++ b/queue-4.13/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch @@ -0,0 +1,95 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Eric Dumazet +Date: Mon, 30 Oct 2017 22:47:09 -0700 +Subject: ipv6: addrconf: increment ifp refcount before ipv6_del_addr() + +From: Eric Dumazet + + +[ Upstream commit e669b86945478b3d90d2d87e3793a6eed06d332f ] + +In the (unlikely) event fixup_permanent_addr() returns a failure, +addrconf_permanent_addr() calls ipv6_del_addr() without the +mandatory call to in6_ifa_hold(), leading to a refcount error, +spotted by syzkaller : + +WARNING: CPU: 1 PID: 3142 at lib/refcount.c:227 refcount_dec+0x4c/0x50 +lib/refcount.c:227 +Kernel panic - not syncing: panic_on_warn set ... + +CPU: 1 PID: 3142 Comm: ip Not tainted 4.14.0-rc4-next-20171009+ #33 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS +Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + panic+0x1e4/0x41c kernel/panic.c:181 + __warn+0x1c4/0x1e0 kernel/panic.c:544 + report_bug+0x211/0x2d0 lib/bug.c:183 + fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:178 + do_trap_no_signal arch/x86/kernel/traps.c:212 [inline] + do_trap+0x260/0x390 arch/x86/kernel/traps.c:261 + do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:298 + do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:311 + invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905 +RIP: 0010:refcount_dec+0x4c/0x50 lib/refcount.c:227 +RSP: 0018:ffff8801ca49e680 EFLAGS: 00010286 +RAX: 000000000000002c RBX: ffff8801d07cfcdc RCX: 0000000000000000 +RDX: 000000000000002c RSI: 1ffff10039493c90 RDI: ffffed0039493cc4 +RBP: ffff8801ca49e688 R08: ffff8801ca49dd70 R09: 0000000000000000 +R10: ffff8801ca49df58 R11: 0000000000000000 R12: 1ffff10039493cd9 +R13: ffff8801ca49e6e8 R14: ffff8801ca49e7e8 R15: ffff8801d07cfcdc + __in6_ifa_put include/net/addrconf.h:369 [inline] + ipv6_del_addr+0x42b/0xb60 net/ipv6/addrconf.c:1208 + addrconf_permanent_addr net/ipv6/addrconf.c:3327 [inline] + addrconf_notify+0x1c66/0x2190 net/ipv6/addrconf.c:3393 + notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93 + __raw_notifier_call_chain kernel/notifier.c:394 [inline] + raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 + call_netdevice_notifiers_info+0x32/0x60 net/core/dev.c:1697 + call_netdevice_notifiers net/core/dev.c:1715 [inline] + __dev_notify_flags+0x15d/0x430 net/core/dev.c:6843 + dev_change_flags+0xf5/0x140 net/core/dev.c:6879 + do_setlink+0xa1b/0x38e0 net/core/rtnetlink.c:2113 + rtnl_newlink+0xf0d/0x1a40 net/core/rtnetlink.c:2661 + rtnetlink_rcv_msg+0x733/0x1090 net/core/rtnetlink.c:4301 + netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2408 + rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4313 + netlink_unicast_kernel net/netlink/af_netlink.c:1273 [inline] + netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1299 + netlink_sendmsg+0xa4a/0xe70 net/netlink/af_netlink.c:1862 + sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:643 + ___sys_sendmsg+0x75b/0x8a0 net/socket.c:2049 + __sys_sendmsg+0xe5/0x210 net/socket.c:2083 + SYSC_sendmsg net/socket.c:2094 [inline] + SyS_sendmsg+0x2d/0x50 net/socket.c:2090 + entry_SYSCALL_64_fastpath+0x1f/0xbe +RIP: 0033:0x7fa9174d3320 +RSP: 002b:00007ffe302ae9e8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007ffe302b2ae0 RCX: 00007fa9174d3320 +RDX: 0000000000000000 RSI: 00007ffe302aea20 RDI: 0000000000000016 +RBP: 0000000000000082 R08: 0000000000000000 R09: 000000000000000f +R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffe302b32a0 +R13: 0000000000000000 R14: 00007ffe302b2ab8 R15: 00007ffe302b32b8 + +Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") +Signed-off-by: Eric Dumazet +Cc: David Ahern +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -3367,6 +3367,7 @@ static void addrconf_permanent_addr(stru + if ((ifp->flags & IFA_F_PERMANENT) && + fixup_permanent_addr(idev, ifp) < 0) { + write_unlock_bh(&idev->lock); ++ in6_ifa_hold(ifp); + ipv6_del_addr(ifp); + write_lock_bh(&idev->lock); + diff --git a/queue-4.13/ipv6-fix-traffic-triggered-ipsec-connections.patch b/queue-4.13/ipv6-fix-traffic-triggered-ipsec-connections.patch new file mode 100644 index 00000000000..d44d1cfa844 --- /dev/null +++ b/queue-4.13/ipv6-fix-traffic-triggered-ipsec-connections.patch @@ -0,0 +1,40 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Steffen Klassert +Date: Mon, 9 Oct 2017 08:39:43 +0200 +Subject: ipv6: Fix traffic triggered IPsec connections. + +From: Steffen Klassert + + +[ Upstream commit 62cf27e52b8c9a39066172ca6b6134cb5eaa9450 ] + +A recent patch removed the dst_free() on the allocated +dst_entry in ipv6_blackhole_route(). The dst_free() marked +the dst_entry as dead and added it to the gc list. I.e. it +was setup for a one time usage. As a result we may now have +a blackhole route cached at a socket on some IPsec scenarios. +This makes the connection unusable. + +Fix this by marking the dst_entry directly at allocation time +as 'dead', so it is used only once. + +Fixes: 587fea741134 ("ipv6: mark DST_NOGC and remove the operation of dst_free()") +Reported-by: Tobias Brunner +Signed-off-by: Steffen Klassert +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/route.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1251,7 +1251,7 @@ struct dst_entry *ip6_blackhole_route(st + struct dst_entry *new = NULL; + + rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, +- DST_OBSOLETE_NONE, 0); ++ DST_OBSOLETE_DEAD, 0); + if (rt) { + rt6_info_init(rt); + diff --git a/queue-4.13/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch b/queue-4.13/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch new file mode 100644 index 00000000000..dd3ff4b237d --- /dev/null +++ b/queue-4.13/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch @@ -0,0 +1,104 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Eric Dumazet +Date: Sat, 21 Oct 2017 12:26:23 -0700 +Subject: ipv6: flowlabel: do not leave opt->tot_len with garbage + +From: Eric Dumazet + + +[ Upstream commit 864e2a1f8aac05effac6063ce316b480facb46ff ] + +When syzkaller team brought us a C repro for the crash [1] that +had been reported many times in the past, I finally could find +the root cause. + +If FlowLabel info is merged by fl6_merge_options(), we leave +part of the opt_space storage provided by udp/raw/l2tp with random value +in opt_space.tot_len, unless a control message was provided at sendmsg() +time. + +Then ip6_setup_cork() would use this random value to perform a kzalloc() +call. Undefined behavior and crashes. + +Fix is to properly set tot_len in fl6_merge_options() + +At the same time, we can also avoid consuming memory and cpu cycles +to clear it, if every option is copied via a kmemdup(). This is the +change in ip6_setup_cork(). + +[1] +kasan: CONFIG_KASAN_INLINE enabled +kasan: GPF could be caused by NULL-ptr deref or user memory access +general protection fault: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 0 PID: 6613 Comm: syz-executor0 Not tainted 4.14.0-rc4+ #127 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +task: ffff8801cb64a100 task.stack: ffff8801cc350000 +RIP: 0010:ip6_setup_cork+0x274/0x15c0 net/ipv6/ip6_output.c:1168 +RSP: 0018:ffff8801cc357550 EFLAGS: 00010203 +RAX: dffffc0000000000 RBX: ffff8801cc357748 RCX: 0000000000000010 +RDX: 0000000000000002 RSI: ffffffff842bd1d9 RDI: 0000000000000014 +RBP: ffff8801cc357620 R08: ffff8801cb17f380 R09: ffff8801cc357b10 +R10: ffff8801cb64a100 R11: 0000000000000000 R12: ffff8801cc357ab0 +R13: ffff8801cc357b10 R14: 0000000000000000 R15: ffff8801c3bbf0c0 +FS: 00007f9c5c459700(0000) GS:ffff8801db200000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000020324000 CR3: 00000001d1cf2000 CR4: 00000000001406f0 +DR0: 0000000020001010 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 +Call Trace: + ip6_make_skb+0x282/0x530 net/ipv6/ip6_output.c:1729 + udpv6_sendmsg+0x2769/0x3380 net/ipv6/udp.c:1340 + inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762 + sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:643 + SYSC_sendto+0x358/0x5a0 net/socket.c:1750 + SyS_sendto+0x40/0x50 net/socket.c:1718 + entry_SYSCALL_64_fastpath+0x1f/0xbe +RIP: 0033:0x4520a9 +RSP: 002b:00007f9c5c458c08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c +RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 00000000004520a9 +RDX: 0000000000000001 RSI: 0000000020fd1000 RDI: 0000000000000016 +RBP: 0000000000000086 R08: 0000000020e0afe4 R09: 000000000000001c +R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004bb1ee +R13: 00000000ffffffff R14: 0000000000000016 R15: 0000000000000029 +Code: e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 ea 0f 00 00 48 8d 79 04 48 b8 00 00 00 00 00 fc ff df 45 8b 74 24 04 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 +RIP: ip6_setup_cork+0x274/0x15c0 net/ipv6/ip6_output.c:1168 RSP: ffff8801cc357550 + +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_flowlabel.c | 1 + + net/ipv6/ip6_output.c | 4 ++-- + 2 files changed, 3 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_flowlabel.c ++++ b/net/ipv6/ip6_flowlabel.c +@@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options + } + opt_space->dst1opt = fopt->dst1opt; + opt_space->opt_flen = fopt->opt_flen; ++ opt_space->tot_len = fopt->tot_len; + return opt_space; + } + EXPORT_SYMBOL_GPL(fl6_merge_options); +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1224,11 +1224,11 @@ static int ip6_setup_cork(struct sock *s + if (WARN_ON(v6_cork->opt)) + return -EINVAL; + +- v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); ++ v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); + if (unlikely(!v6_cork->opt)) + return -ENOBUFS; + +- v6_cork->opt->tot_len = opt->tot_len; ++ v6_cork->opt->tot_len = sizeof(*opt); + v6_cork->opt->opt_flen = opt->opt_flen; + v6_cork->opt->opt_nflen = opt->opt_nflen; + diff --git a/queue-4.13/l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch b/queue-4.13/l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch new file mode 100644 index 00000000000..cecd2a36b3b --- /dev/null +++ b/queue-4.13/l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch @@ -0,0 +1,36 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Guillaume Nault +Date: Fri, 13 Oct 2017 19:22:35 +0200 +Subject: l2tp: check ps->sock before running pppol2tp_session_ioctl() + +From: Guillaume Nault + + +[ Upstream commit 5903f594935a3841137c86b9d5b75143a5b7121c ] + +When pppol2tp_session_ioctl() is called by pppol2tp_tunnel_ioctl(), +the session may be unconnected. That is, it was created by +pppol2tp_session_create() and hasn't been connected with +pppol2tp_connect(). In this case, ps->sock is NULL, so we need to check +for this case in order to avoid dereferencing a NULL pointer. + +Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP") +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ppp.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -993,6 +993,9 @@ static int pppol2tp_session_ioctl(struct + session->name, cmd, arg); + + sk = ps->sock; ++ if (!sk) ++ return -EBADR; ++ + sock_hold(sk); + + switch (cmd) { diff --git a/queue-4.13/l2tp-hold-tunnel-in-pppol2tp_connect.patch b/queue-4.13/l2tp-hold-tunnel-in-pppol2tp_connect.patch new file mode 100644 index 00000000000..8f2dd23028f --- /dev/null +++ b/queue-4.13/l2tp-hold-tunnel-in-pppol2tp_connect.patch @@ -0,0 +1,51 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Guillaume Nault +Date: Mon, 30 Oct 2017 17:58:58 +0100 +Subject: l2tp: hold tunnel in pppol2tp_connect() + +From: Guillaume Nault + + +[ Upstream commit f9e56baf03f9d36043a78f16e3e8b2cfd211e09e ] + +Use l2tp_tunnel_get() in pppol2tp_connect() to ensure the tunnel isn't +going to disappear while processing the rest of the function. + +Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts") +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ppp.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -584,6 +584,7 @@ static int pppol2tp_connect(struct socke + u32 tunnel_id, peer_tunnel_id; + u32 session_id, peer_session_id; + bool drop_refcnt = false; ++ bool drop_tunnel = false; + int ver = 2; + int fd; + +@@ -652,7 +653,9 @@ static int pppol2tp_connect(struct socke + if (tunnel_id == 0) + goto end; + +- tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id); ++ tunnel = l2tp_tunnel_get(sock_net(sk), tunnel_id); ++ if (tunnel) ++ drop_tunnel = true; + + /* Special case: create tunnel context if session_id and + * peer_session_id is 0. Otherwise look up tunnel using supplied +@@ -781,6 +784,8 @@ out_no_ppp: + end: + if (drop_refcnt) + l2tp_session_dec_refcount(session); ++ if (drop_tunnel) ++ l2tp_tunnel_dec_refcount(tunnel); + release_sock(sk); + + return error; diff --git a/queue-4.13/macsec-fix-memory-leaks-when-skb_to_sgvec-fails.patch b/queue-4.13/macsec-fix-memory-leaks-when-skb_to_sgvec-fails.patch new file mode 100644 index 00000000000..e60aea4b13f --- /dev/null +++ b/queue-4.13/macsec-fix-memory-leaks-when-skb_to_sgvec-fails.patch @@ -0,0 +1,36 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Sabrina Dubroca +Date: Tue, 10 Oct 2017 17:07:12 +0200 +Subject: macsec: fix memory leaks when skb_to_sgvec fails + +From: Sabrina Dubroca + + +[ Upstream commit 5aba2ba5030b66a6f8c93049b718556f9aacd7c6 ] + +Fixes: cda7ea690350 ("macsec: check return value of skb_to_sgvec always") +Signed-off-by: Sabrina Dubroca +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macsec.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/net/macsec.c ++++ b/drivers/net/macsec.c +@@ -742,6 +742,7 @@ static struct sk_buff *macsec_encrypt(st + sg_init_table(sg, ret); + ret = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(ret < 0)) { ++ aead_request_free(req); + macsec_txsa_put(tx_sa); + kfree_skb(skb); + return ERR_PTR(ret); +@@ -954,6 +955,7 @@ static struct sk_buff *macsec_decrypt(st + sg_init_table(sg, ret); + ret = skb_to_sgvec(skb, sg, 0, skb->len); + if (unlikely(ret < 0)) { ++ aead_request_free(req); + kfree_skb(skb); + return ERR_PTR(ret); + } diff --git a/queue-4.13/net-bridge-fix-returning-of-vlan-range-op-errors.patch b/queue-4.13/net-bridge-fix-returning-of-vlan-range-op-errors.patch new file mode 100644 index 00000000000..b55fbdae193 --- /dev/null +++ b/queue-4.13/net-bridge-fix-returning-of-vlan-range-op-errors.patch @@ -0,0 +1,34 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Nikolay Aleksandrov +Date: Thu, 19 Oct 2017 20:17:32 +0300 +Subject: net: bridge: fix returning of vlan range op errors + +From: Nikolay Aleksandrov + + +[ Upstream commit 66c54517540cedf5a22911c6b7f5c7d8b5d1e1be ] + +When vlan tunnels were introduced, vlan range errors got silently +dropped and instead 0 was returned always. Restore the previous +behaviour and return errors to user-space. + +Fixes: efa5356b0d97 ("bridge: per vlan dst_metadata netlink support") +Signed-off-by: Nikolay Aleksandrov +Acked-by: Roopa Prabhu +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_netlink.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/bridge/br_netlink.c ++++ b/net/bridge/br_netlink.c +@@ -573,7 +573,7 @@ static int br_process_vlan_info(struct n + } + *vinfo_last = NULL; + +- return 0; ++ return err; + } + + return br_vlan_info(br, p, cmd, vinfo_curr); diff --git a/queue-4.13/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch b/queue-4.13/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch new file mode 100644 index 00000000000..ef4448bf491 --- /dev/null +++ b/queue-4.13/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch @@ -0,0 +1,44 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Eric Dumazet +Date: Tue, 10 Oct 2017 19:12:33 -0700 +Subject: net: call cgroup_sk_alloc() earlier in sk_clone_lock() + +From: Eric Dumazet + + +[ Upstream commit c0576e3975084d4699b7bfef578613fb8e1144f6 ] + +If for some reason, the newly allocated child need to be freed, +we will call cgroup_put() (via sk_free_unlock_clone()) while the +corresponding cgroup_get() was not yet done, and we will free memory +too soon. + +Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") +Signed-off-by: Eric Dumazet +Cc: Johannes Weiner +Cc: Tejun Heo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1674,6 +1674,7 @@ struct sock *sk_clone_lock(const struct + newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; + + sock_reset_flag(newsk, SOCK_DONE); ++ cgroup_sk_alloc(&newsk->sk_cgrp_data); + + rcu_read_lock(); + filter = rcu_dereference(sk->sk_filter); +@@ -1706,8 +1707,6 @@ struct sock *sk_clone_lock(const struct + atomic64_set(&newsk->sk_cookie, 0); + + mem_cgroup_sk_alloc(newsk); +- cgroup_sk_alloc(&newsk->sk_cgrp_data); +- + /* + * Before updating sk_refcnt, we must commit prior changes to memory + * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/queue-4.13/net-dsa-check-master-device-before-put.patch b/queue-4.13/net-dsa-check-master-device-before-put.patch new file mode 100644 index 00000000000..935eb5da874 --- /dev/null +++ b/queue-4.13/net-dsa-check-master-device-before-put.patch @@ -0,0 +1,43 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Vivien Didelot +Date: Tue, 24 Oct 2017 16:37:19 -0400 +Subject: net: dsa: check master device before put + +From: Vivien Didelot + + +[ Upstream commit 3eb8feeb1708c7dbfd2e97df92a2a407c116606e ] + +In the case of pdata, the dsa_cpu_parse function calls dev_put() before +making sure it isn't NULL. Fix this. + +Fixes: 71e0bbde0d88 ("net: dsa: Add support for platform data") +Signed-off-by: Vivien Didelot +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/dsa/dsa2.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +--- a/net/dsa/dsa2.c ++++ b/net/dsa/dsa2.c +@@ -496,14 +496,15 @@ static int dsa_cpu_parse(struct dsa_port + if (!ethernet) + return -EINVAL; + ethernet_dev = of_find_net_device_by_node(ethernet); ++ if (!ethernet_dev) ++ return -EPROBE_DEFER; + } else { + ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]); ++ if (!ethernet_dev) ++ return -EPROBE_DEFER; + dev_put(ethernet_dev); + } + +- if (!ethernet_dev) +- return -EPROBE_DEFER; +- + if (!dst->cpu_dp) { + dst->cpu_dp = port; + dst->cpu_dp->netdev = ethernet_dev; diff --git a/queue-4.13/net-mlx5-fix-health-work-queue-spin-lock-to-irq-safe.patch b/queue-4.13/net-mlx5-fix-health-work-queue-spin-lock-to-irq-safe.patch new file mode 100644 index 00000000000..b452695609e --- /dev/null +++ b/queue-4.13/net-mlx5-fix-health-work-queue-spin-lock-to-irq-safe.patch @@ -0,0 +1,51 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Moshe Shemesh +Date: Thu, 19 Oct 2017 14:14:29 +0300 +Subject: net/mlx5: Fix health work queue spin lock to IRQ safe + +From: Moshe Shemesh + + +[ Upstream commit 6377ed0bbae6fa28853e1679d068a9106c8a8908 ] + +spin_lock/unlock of health->wq_lock should be IRQ safe. +It was changed to spin_lock_irqsave since adding commit 0179720d6be2 +("net/mlx5: Introduce trigger_health_work function") which uses +spin_lock from asynchronous event (IRQ) context. +Thus, all spin_lock/unlock of health->wq_lock should have been moved +to IRQ safe mode. +However, one occurrence on new code using this lock missed that +change, resulting in possible deadlock: + kernel: Possible unsafe locking scenario: + kernel: CPU0 + kernel: ---- + kernel: lock(&(&health->wq_lock)->rlock); + kernel: + kernel: lock(&(&health->wq_lock)->rlock); + kernel: #012 *** DEADLOCK *** + +Fixes: 2a0165a034ac ("net/mlx5: Cancel delayed recovery work when unloading the driver") +Signed-off-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/health.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c +@@ -356,10 +356,11 @@ void mlx5_drain_health_wq(struct mlx5_co + void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) + { + struct mlx5_core_health *health = &dev->priv.health; ++ unsigned long flags; + +- spin_lock(&health->wq_lock); ++ spin_lock_irqsave(&health->wq_lock, flags); + set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); +- spin_unlock(&health->wq_lock); ++ spin_unlock_irqrestore(&health->wq_lock, flags); + cancel_delayed_work_sync(&dev->priv.health.recover_work); + } + diff --git a/queue-4.13/net-mlx5e-properly-deal-with-encap-flows-add-del-under-neigh-update.patch b/queue-4.13/net-mlx5e-properly-deal-with-encap-flows-add-del-under-neigh-update.patch new file mode 100644 index 00000000000..87afee63c11 --- /dev/null +++ b/queue-4.13/net-mlx5e-properly-deal-with-encap-flows-add-del-under-neigh-update.patch @@ -0,0 +1,247 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Or Gerlitz +Date: Tue, 17 Oct 2017 12:33:43 +0200 +Subject: net/mlx5e: Properly deal with encap flows add/del under neigh update + +From: Or Gerlitz + + +[ Upstream commit 3c37745ec614ff048d5dce38f976804b05d307ee ] + +Currently, the encap action offload is handled in the actions parse +function and not in mlx5e_tc_add_fdb_flow() where we deal with all +the other aspects of offloading actions (vlan, modify header) and +the rule itself. + +When the neigh update code (mlx5e_tc_encap_flows_add()) recreates the +encap entry and offloads the related flows, we wrongly call again into +mlx5e_tc_add_fdb_flow(), this for itself would cause us to handle +again the offloading of vlans and header re-write which puts things +in non consistent state and step on freed memory (e.g the modify +header parse buffer which is already freed). + +Since on error, mlx5e_tc_add_fdb_flow() detaches and may release the +encap entry, it causes a corruption at the neigh update code which goes +over the list of flows associated with this encap entry, or double free +when the tc flow is later deleted by user-space. + +When neigh update (mlx5e_tc_encap_flows_del()) unoffloads the flows related +to an encap entry which is now invalid, we do a partial repeat of the eswitch +flow removal code which is wrong too. + +To fix things up we do the following: + +(1) handle the encap action offload in the eswitch flow add function + mlx5e_tc_add_fdb_flow() as done for the other actions and the rule itself. + +(2) modify the neigh update code (mlx5e_tc_encap_flows_add/del) to only + deal with the encap entry and rules delete/add and not with any of + the other offloaded actions. + +Fixes: 232c001398ae ('net/mlx5e: Add support to neighbour update flow') +Signed-off-by: Or Gerlitz +Reviewed-by: Paul Blakey +Signed-off-by: Saeed Mahameed +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 89 ++++++++++++++---------- + 1 file changed, 54 insertions(+), 35 deletions(-) + +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +@@ -78,9 +78,11 @@ struct mlx5e_tc_flow { + }; + + struct mlx5e_tc_flow_parse_attr { ++ struct ip_tunnel_info tun_info; + struct mlx5_flow_spec spec; + int num_mod_hdr_actions; + void *mod_hdr_actions; ++ int mirred_ifindex; + }; + + enum { +@@ -322,6 +324,12 @@ static void mlx5e_tc_del_nic_flow(struct + static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + ++static int mlx5e_attach_encap(struct mlx5e_priv *priv, ++ struct ip_tunnel_info *tun_info, ++ struct net_device *mirred_dev, ++ struct net_device **encap_dev, ++ struct mlx5e_tc_flow *flow); ++ + static struct mlx5_flow_handle * + mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow_parse_attr *parse_attr, +@@ -329,9 +337,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv + { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; +- struct mlx5_flow_handle *rule; ++ struct net_device *out_dev, *encap_dev = NULL; ++ struct mlx5_flow_handle *rule = NULL; ++ struct mlx5e_rep_priv *rpriv; ++ struct mlx5e_priv *out_priv; + int err; + ++ if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) { ++ out_dev = __dev_get_by_index(dev_net(priv->netdev), ++ attr->parse_attr->mirred_ifindex); ++ err = mlx5e_attach_encap(priv, &parse_attr->tun_info, ++ out_dev, &encap_dev, flow); ++ if (err) { ++ rule = ERR_PTR(err); ++ if (err != -EAGAIN) ++ goto err_attach_encap; ++ } ++ out_priv = netdev_priv(encap_dev); ++ rpriv = out_priv->ppriv; ++ attr->out_rep = rpriv->rep; ++ } ++ + err = mlx5_eswitch_add_vlan_action(esw, attr); + if (err) { + rule = ERR_PTR(err); +@@ -347,10 +373,14 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv + } + } + +- rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); +- if (IS_ERR(rule)) +- goto err_add_rule; +- ++ /* we get here if (1) there's no error (rule being null) or when ++ * (2) there's an encap action and we're on -EAGAIN (no valid neigh) ++ */ ++ if (rule != ERR_PTR(-EAGAIN)) { ++ rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr); ++ if (IS_ERR(rule)) ++ goto err_add_rule; ++ } + return rule; + + err_add_rule: +@@ -361,6 +391,7 @@ err_mod_hdr: + err_add_vlan: + if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + mlx5e_detach_encap(priv, flow); ++err_attach_encap: + return rule; + } + +@@ -389,6 +420,8 @@ static void mlx5e_tc_del_fdb_flow(struct + void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) + { ++ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; ++ struct mlx5_esw_flow_attr *esw_attr; + struct mlx5e_tc_flow *flow; + int err; + +@@ -404,10 +437,9 @@ void mlx5e_tc_encap_flows_add(struct mlx + mlx5e_rep_queue_neigh_stats_work(priv); + + list_for_each_entry(flow, &e->flows, encap) { +- flow->esw_attr->encap_id = e->encap_id; +- flow->rule = mlx5e_tc_add_fdb_flow(priv, +- flow->esw_attr->parse_attr, +- flow); ++ esw_attr = flow->esw_attr; ++ esw_attr->encap_id = e->encap_id; ++ flow->rule = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr); + if (IS_ERR(flow->rule)) { + err = PTR_ERR(flow->rule); + mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", +@@ -421,15 +453,13 @@ void mlx5e_tc_encap_flows_add(struct mlx + void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv, + struct mlx5e_encap_entry *e) + { ++ struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_tc_flow *flow; +- struct mlx5_fc *counter; + + list_for_each_entry(flow, &e->flows, encap) { + if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; +- counter = mlx5_flow_rule_counter(flow->rule); +- mlx5_del_flow_rules(flow->rule); +- mlx5_fc_destroy(priv->mdev, counter); ++ mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr); + } + } + +@@ -1871,7 +1901,7 @@ static int parse_tc_fdb_actions(struct m + + if (is_tcf_mirred_egress_redirect(a)) { + int ifindex = tcf_mirred_ifindex(a); +- struct net_device *out_dev, *encap_dev = NULL; ++ struct net_device *out_dev; + struct mlx5e_priv *out_priv; + + out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); +@@ -1884,17 +1914,13 @@ static int parse_tc_fdb_actions(struct m + rpriv = out_priv->ppriv; + attr->out_rep = rpriv->rep; + } else if (encap) { +- err = mlx5e_attach_encap(priv, info, +- out_dev, &encap_dev, flow); +- if (err && err != -EAGAIN) +- return err; ++ parse_attr->mirred_ifindex = ifindex; ++ parse_attr->tun_info = *info; ++ attr->parse_attr = parse_attr; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_COUNT; +- out_priv = netdev_priv(encap_dev); +- rpriv = out_priv->ppriv; +- attr->out_rep = rpriv->rep; +- attr->parse_attr = parse_attr; ++ /* attr->out_rep is resolved when we handle encap */ + } else { + pr_err("devices %s %s not on same switch HW, can't offload forwarding\n", + priv->netdev->name, out_dev->name); +@@ -1972,7 +1998,7 @@ int mlx5e_configure_flower(struct mlx5e_ + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow); + if (err < 0) +- goto err_handle_encap_flow; ++ goto err_free; + flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow); + } else { + err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow); +@@ -1983,10 +2009,13 @@ int mlx5e_configure_flower(struct mlx5e_ + + if (IS_ERR(flow->rule)) { + err = PTR_ERR(flow->rule); +- goto err_free; ++ if (err != -EAGAIN) ++ goto err_free; + } + +- flow->flags |= MLX5E_TC_FLOW_OFFLOADED; ++ if (err != -EAGAIN) ++ flow->flags |= MLX5E_TC_FLOW_OFFLOADED; ++ + err = rhashtable_insert_fast(&tc->ht, &flow->node, + tc->ht_params); + if (err) +@@ -2000,16 +2029,6 @@ int mlx5e_configure_flower(struct mlx5e_ + err_del_rule: + mlx5e_tc_del_flow(priv, flow); + +-err_handle_encap_flow: +- if (err == -EAGAIN) { +- err = rhashtable_insert_fast(&tc->ht, &flow->node, +- tc->ht_params); +- if (err) +- mlx5e_tc_del_flow(priv, flow); +- else +- return 0; +- } +- + err_free: + kvfree(parse_attr); + kfree(flow); diff --git a/queue-4.13/net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch b/queue-4.13/net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch new file mode 100644 index 00000000000..00b7dbe963d --- /dev/null +++ b/queue-4.13/net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch @@ -0,0 +1,39 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Andrei Vagin +Date: Wed, 25 Oct 2017 10:16:42 -0700 +Subject: net/unix: don't show information about sockets from other namespaces + +From: Andrei Vagin + + +[ Upstream commit 0f5da659d8f1810f44de14acf2c80cd6499623a0 ] + +socket_diag shows information only about sockets from a namespace where +a diag socket lives. + +But if we request information about one unix socket, the kernel don't +check that its netns is matched with a diag socket namespace, so any +user can get information about any unix socket in a system. This looks +like a bug. + +v2: add a Fixes tag + +Fixes: 51d7cccf0723 ("net: make sock diag per-namespace") +Signed-off-by: Andrei Vagin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/diag.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/unix/diag.c ++++ b/net/unix/diag.c +@@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk + err = -ENOENT; + if (sk == NULL) + goto out_nosk; ++ if (!net_eq(sock_net(sk), net)) ++ goto out; + + err = sock_diag_check_cookie(sk, req->udiag_cookie); + if (err) diff --git a/queue-4.13/net_sched-avoid-matching-qdisc-with-zero-handle.patch b/queue-4.13/net_sched-avoid-matching-qdisc-with-zero-handle.patch new file mode 100644 index 00000000000..5194ac5432b --- /dev/null +++ b/queue-4.13/net_sched-avoid-matching-qdisc-with-zero-handle.patch @@ -0,0 +1,49 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Cong Wang +Date: Fri, 27 Oct 2017 22:08:56 -0700 +Subject: net_sched: avoid matching qdisc with zero handle + +From: Cong Wang + + +[ Upstream commit 50317fce2cc70a2bbbc4b42c31bbad510382a53c ] + +Davide found the following script triggers a NULL pointer +dereference: + +ip l a name eth0 type dummy +tc q a dev eth0 parent :1 handle 1: htb + +This is because for a freshly created netdevice noop_qdisc +is attached and when passing 'parent :1', kernel actually +tries to match the major handle which is 0 and noop_qdisc +has handle 0 so is matched by mistake. Commit 69012ae425d7 +tries to fix a similar bug but still misses this case. + +Handle 0 is not a valid one, should be just skipped. In +fact, kernel uses it as TC_H_UNSPEC. + +Fixes: 69012ae425d7 ("net: sched: fix handling of singleton qdiscs with qdisc_hash") +Fixes: 59cc1f61f09c ("net: sched:convert qdisc linked list to hashtable") +Reported-by: Davide Caratti +Cc: Jiri Kosina +Cc: Eric Dumazet +Cc: Jamal Hadi Salim +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_api.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -307,6 +307,8 @@ struct Qdisc *qdisc_lookup(struct net_de + { + struct Qdisc *q; + ++ if (!handle) ++ return NULL; + q = qdisc_match_from_root(dev->qdisc, handle); + if (q) + goto out; diff --git a/queue-4.13/netlink-do-not-set-cb_running-if-dump-s-start-errs.patch b/queue-4.13/netlink-do-not-set-cb_running-if-dump-s-start-errs.patch new file mode 100644 index 00000000000..095bc0e5d08 --- /dev/null +++ b/queue-4.13/netlink-do-not-set-cb_running-if-dump-s-start-errs.patch @@ -0,0 +1,61 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: "Jason A. Donenfeld" +Date: Mon, 9 Oct 2017 14:14:51 +0200 +Subject: netlink: do not set cb_running if dump's start() errs + +From: "Jason A. Donenfeld" + + +[ Upstream commit 41c87425a1ac9b633e0fcc78eb1f19640c8fb5a0 ] + +It turns out that multiple places can call netlink_dump(), which means +it's still possible to dereference partially initialized values in +dump() that were the result of a faulty returned start(). + +This fixes the issue by calling start() _before_ setting cb_running to +true, so that there's no chance at all of hitting the dump() function +through any indirect paths. + +It also moves the call to start() to be when the mutex is held. This has +the nice side effect of serializing invocations to start(), which is +likely desirable anyway. It also prevents any possible other races that +might come out of this logic. + +In testing this with several different pieces of tricky code to trigger +these issues, this commit fixes all avenues that I'm aware of. + +Signed-off-by: Jason A. Donenfeld +Cc: Johannes Berg +Reviewed-by: Johannes Berg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -2258,16 +2258,17 @@ int __netlink_dump_start(struct sock *ss + cb->min_dump_alloc = control->min_dump_alloc; + cb->skb = skb; + ++ if (cb->start) { ++ ret = cb->start(cb); ++ if (ret) ++ goto error_unlock; ++ } ++ + nlk->cb_running = true; + + mutex_unlock(nlk->cb_mutex); + +- ret = 0; +- if (cb->start) +- ret = cb->start(cb); +- +- if (!ret) +- ret = netlink_dump(sk); ++ ret = netlink_dump(sk); + + sock_put(sk); + diff --git a/queue-4.13/netlink-fix-netlink_ack-extack-race.patch b/queue-4.13/netlink-fix-netlink_ack-extack-race.patch new file mode 100644 index 00000000000..1a9e5975a84 --- /dev/null +++ b/queue-4.13/netlink-fix-netlink_ack-extack-race.patch @@ -0,0 +1,67 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Johannes Berg +Date: Mon, 16 Oct 2017 17:09:53 +0200 +Subject: netlink: fix netlink_ack() extack race + +From: Johannes Berg + + +[ Upstream commit 48044eb490be71c203e14dd89e8bae87209eab52 ] + +It seems that it's possible to toggle NETLINK_F_EXT_ACK +through setsockopt() while another thread/CPU is building +a message inside netlink_ack(), which could then trigger +the WARN_ON()s I added since if it goes from being turned +off to being turned on between allocating and filling the +message, the skb could end up being too small. + +Avoid this whole situation by storing the value of this +flag in a separate variable and using that throughout the +function instead. + +Fixes: 2d4bc93368f5 ("netlink: extended ACK reporting") +Signed-off-by: Johannes Berg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -2299,6 +2299,7 @@ void netlink_ack(struct sk_buff *in_skb, + size_t tlvlen = 0; + struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); + unsigned int flags = 0; ++ bool nlk_has_extack = nlk->flags & NETLINK_F_EXT_ACK; + + /* Error messages get the original request appened, unless the user + * requests to cap the error message, and get extra error data if +@@ -2309,7 +2310,7 @@ void netlink_ack(struct sk_buff *in_skb, + payload += nlmsg_len(nlh); + else + flags |= NLM_F_CAPPED; +- if (nlk->flags & NETLINK_F_EXT_ACK && extack) { ++ if (nlk_has_extack && extack) { + if (extack->_msg) + tlvlen += nla_total_size(strlen(extack->_msg) + 1); + if (extack->bad_attr) +@@ -2318,8 +2319,7 @@ void netlink_ack(struct sk_buff *in_skb, + } else { + flags |= NLM_F_CAPPED; + +- if (nlk->flags & NETLINK_F_EXT_ACK && +- extack && extack->cookie_len) ++ if (nlk_has_extack && extack && extack->cookie_len) + tlvlen += nla_total_size(extack->cookie_len); + } + +@@ -2347,7 +2347,7 @@ void netlink_ack(struct sk_buff *in_skb, + errmsg->error = err; + memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); + +- if (nlk->flags & NETLINK_F_EXT_ACK && extack) { ++ if (nlk_has_extack && extack) { + if (err) { + if (extack->_msg) + WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, diff --git a/queue-4.13/packet-avoid-panic-in-packet_getsockopt.patch b/queue-4.13/packet-avoid-panic-in-packet_getsockopt.patch new file mode 100644 index 00000000000..4df26771541 --- /dev/null +++ b/queue-4.13/packet-avoid-panic-in-packet_getsockopt.patch @@ -0,0 +1,86 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Eric Dumazet +Date: Wed, 18 Oct 2017 16:14:52 -0700 +Subject: packet: avoid panic in packet_getsockopt() + +From: Eric Dumazet + + +[ Upstream commit 509c7a1ecc8601f94ffba8a00889fefb239c00c6 ] + +syzkaller got crashes in packet_getsockopt() processing +PACKET_ROLLOVER_STATS command while another thread was managing +to change po->rollover + +Using RCU will fix this bug. We might later add proper RCU annotations +for sparse sake. + +In v2: I replaced kfree(rollover) in fanout_add() to kfree_rcu() +variant, as spotted by John. + +Fixes: a9b6391814d5 ("packet: rollover statistics") +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Cc: John Sperbeck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -1771,7 +1771,7 @@ static int fanout_add(struct sock *sk, u + + out: + if (err && rollover) { +- kfree(rollover); ++ kfree_rcu(rollover, rcu); + po->rollover = NULL; + } + mutex_unlock(&fanout_mutex); +@@ -1798,8 +1798,10 @@ static struct packet_fanout *fanout_rele + else + f = NULL; + +- if (po->rollover) ++ if (po->rollover) { + kfree_rcu(po->rollover, rcu); ++ po->rollover = NULL; ++ } + } + mutex_unlock(&fanout_mutex); + +@@ -3853,6 +3855,7 @@ static int packet_getsockopt(struct sock + void *data = &val; + union tpacket_stats_u st; + struct tpacket_rollover_stats rstats; ++ struct packet_rollover *rollover; + + if (level != SOL_PACKET) + return -ENOPROTOOPT; +@@ -3931,13 +3934,18 @@ static int packet_getsockopt(struct sock + 0); + break; + case PACKET_ROLLOVER_STATS: +- if (!po->rollover) ++ rcu_read_lock(); ++ rollover = rcu_dereference(po->rollover); ++ if (rollover) { ++ rstats.tp_all = atomic_long_read(&rollover->num); ++ rstats.tp_huge = atomic_long_read(&rollover->num_huge); ++ rstats.tp_failed = atomic_long_read(&rollover->num_failed); ++ data = &rstats; ++ lv = sizeof(rstats); ++ } ++ rcu_read_unlock(); ++ if (!rollover) + return -EINVAL; +- rstats.tp_all = atomic_long_read(&po->rollover->num); +- rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); +- rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); +- data = &rstats; +- lv = sizeof(rstats); + break; + case PACKET_TX_HAS_OFF: + val = po->tp_tx_has_off; diff --git a/queue-4.13/ppp-fix-race-in-ppp-device-destruction.patch b/queue-4.13/ppp-fix-race-in-ppp-device-destruction.patch new file mode 100644 index 00000000000..57e25aab4fd --- /dev/null +++ b/queue-4.13/ppp-fix-race-in-ppp-device-destruction.patch @@ -0,0 +1,113 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Guillaume Nault +Date: Fri, 6 Oct 2017 17:05:49 +0200 +Subject: ppp: fix race in ppp device destruction + +From: Guillaume Nault + + +[ Upstream commit 6151b8b37b119e8e3a8401b080d532520c95faf4 ] + +ppp_release() tries to ensure that netdevices are unregistered before +decrementing the unit refcount and running ppp_destroy_interface(). + +This is all fine as long as the the device is unregistered by +ppp_release(): the unregister_netdevice() call, followed by +rtnl_unlock(), guarantee that the unregistration process completes +before rtnl_unlock() returns. + +However, the device may be unregistered by other means (like +ppp_nl_dellink()). If this happens right before ppp_release() calling +rtnl_lock(), then ppp_release() has to wait for the concurrent +unregistration code to release the lock. +But rtnl_unlock() releases the lock before completing the device +unregistration process. This allows ppp_release() to proceed and +eventually call ppp_destroy_interface() before the unregistration +process completes. Calling free_netdev() on this partially unregistered +device will BUG(): + + ------------[ cut here ]------------ + kernel BUG at net/core/dev.c:8141! + invalid opcode: 0000 [#1] SMP + + CPU: 1 PID: 1557 Comm: pppd Not tainted 4.14.0-rc2+ #4 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014 + + Call Trace: + ppp_destroy_interface+0xd8/0xe0 [ppp_generic] + ppp_disconnect_channel+0xda/0x110 [ppp_generic] + ppp_unregister_channel+0x5e/0x110 [ppp_generic] + pppox_unbind_sock+0x23/0x30 [pppox] + pppoe_connect+0x130/0x440 [pppoe] + SYSC_connect+0x98/0x110 + ? do_fcntl+0x2c0/0x5d0 + SyS_connect+0xe/0x10 + entry_SYSCALL_64_fastpath+0x1a/0xa5 + + RIP: free_netdev+0x107/0x110 RSP: ffffc28a40573d88 + ---[ end trace ed294ff0cc40eeff ]--- + +We could set the ->needs_free_netdev flag on PPP devices and move the +ppp_destroy_interface() logic in the ->priv_destructor() callback. But +that'd be quite intrusive as we'd first need to unlink from the other +channels and units that depend on the device (the ones that used the +PPPIOCCONNECT and PPPIOCATTACH ioctls). + +Instead, we can just let the netdevice hold a reference on its +ppp_file. This reference is dropped in ->priv_destructor(), at the very +end of the unregistration process, so that neither ppp_release() nor +ppp_disconnect_channel() can call ppp_destroy_interface() in the interim. + +Reported-by: Beniamino Galvani +Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion") +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/ppp_generic.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -1339,7 +1339,17 @@ ppp_get_stats64(struct net_device *dev, + + static int ppp_dev_init(struct net_device *dev) + { ++ struct ppp *ppp; ++ + netdev_lockdep_set_classes(dev); ++ ++ ppp = netdev_priv(dev); ++ /* Let the netdevice take a reference on the ppp file. This ensures ++ * that ppp_destroy_interface() won't run before the device gets ++ * unregistered. ++ */ ++ atomic_inc(&ppp->file.refcnt); ++ + return 0; + } + +@@ -1362,6 +1372,15 @@ static void ppp_dev_uninit(struct net_de + wake_up_interruptible(&ppp->file.rwait); + } + ++static void ppp_dev_priv_destructor(struct net_device *dev) ++{ ++ struct ppp *ppp; ++ ++ ppp = netdev_priv(dev); ++ if (atomic_dec_and_test(&ppp->file.refcnt)) ++ ppp_destroy_interface(ppp); ++} ++ + static const struct net_device_ops ppp_netdev_ops = { + .ndo_init = ppp_dev_init, + .ndo_uninit = ppp_dev_uninit, +@@ -1387,6 +1406,7 @@ static void ppp_setup(struct net_device + dev->tx_queue_len = 3; + dev->type = ARPHRD_PPP; + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; ++ dev->priv_destructor = ppp_dev_priv_destructor; + netif_keep_dst(dev); + } + diff --git a/queue-4.13/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch b/queue-4.13/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch new file mode 100644 index 00000000000..92f0a66d096 --- /dev/null +++ b/queue-4.13/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch @@ -0,0 +1,48 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Xin Long +Date: Wed, 18 Oct 2017 21:37:49 +0800 +Subject: sctp: add the missing sock_owned_by_user check in sctp_icmp_redirect + +From: Xin Long + + +[ Upstream commit 1cc276cec9ec574d41cf47dfc0f51406b6f26ab4 ] + +Now sctp processes icmp redirect packet in sctp_icmp_redirect where +it calls sctp_transport_dst_check in which tp->dst can be released. + +The problem is before calling sctp_transport_dst_check, it doesn't +check sock_owned_by_user, which means tp->dst could be freed while +a process is accessing it with owning the socket. + +An use-after-free issue could be triggered by this. + +This patch is to fix it by checking sock_owned_by_user before calling +sctp_transport_dst_check in sctp_icmp_redirect, so that it would not +release tp->dst if users still hold sock lock. + +Besides, the same issue fixed in commit 45caeaa5ac0b ("dccp/tcp: fix +routing redirect race") on sctp also needs this check. + +Fixes: 55be7a9c6074 ("ipv4: Add redirect support to all protocol icmp error handlers") +Reported-by: Eric Dumazet +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sctp/input.c ++++ b/net/sctp/input.c +@@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk, + { + struct dst_entry *dst; + +- if (!t) ++ if (sock_owned_by_user(sk) || !t) + return; + dst = sctp_transport_dst_check(t); + if (dst) diff --git a/queue-4.13/sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch b/queue-4.13/sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch new file mode 100644 index 00000000000..9b96de99457 --- /dev/null +++ b/queue-4.13/sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch @@ -0,0 +1,55 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Laszlo Toth +Date: Mon, 23 Oct 2017 19:19:33 +0200 +Subject: sctp: full support for ipv6 ip_nonlocal_bind & IP_FREEBIND + +From: Laszlo Toth + + +[ Upstream commit b71d21c274eff20a9db8158882b545b141b73ab8 ] + +Commit 9b9742022888 ("sctp: support ipv6 nonlocal bind") +introduced support for the above options as v4 sctp did, +so patched sctp_v6_available(). + +In the v4 implementation it's enough, because +sctp_inet_bind_verify() just returns with sctp_v4_available(). +However sctp_inet6_bind_verify() has an extra check before that +for link-local scope_id, which won't respect the above options. + +Added the checks before calling ipv6_chk_addr(), but +not before the validation of scope_id. + +before (w/ both options): + ./v6test fe80::10 sctp + bind failed, errno: 99 (Cannot assign requested address) + ./v6test fe80::10 tcp + bind success, errno: 0 (Success) + +after (w/ both options): + ./v6test fe80::10 sctp + bind success, errno: 0 (Success) + +Signed-off-by: Laszlo Toth +Reviewed-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/ipv6.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/sctp/ipv6.c ++++ b/net/sctp/ipv6.c +@@ -882,8 +882,10 @@ static int sctp_inet6_bind_verify(struct + net = sock_net(&opt->inet.sk); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id); +- if (!dev || +- !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) { ++ if (!dev || !(opt->inet.freebind || ++ net->ipv6.sysctl.ip_nonlocal_bind || ++ ipv6_chk_addr(net, &addr->v6.sin6_addr, ++ dev, 0))) { + rcu_read_unlock(); + return 0; + } diff --git a/queue-4.13/sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch b/queue-4.13/sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch new file mode 100644 index 00000000000..e9c31c9a17d --- /dev/null +++ b/queue-4.13/sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch @@ -0,0 +1,100 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Xin Long +Date: Sat, 28 Oct 2017 02:13:29 +0800 +Subject: sctp: reset owner sk for data chunks on out queues when migrating a sock + +From: Xin Long + + +[ Upstream commit d04adf1b355181e737b6b1e23d801b07f0b7c4c0 ] + +Now when migrating sock to another one in sctp_sock_migrate(), it only +resets owner sk for the data in receive queues, not the chunks on out +queues. + +It would cause that data chunks length on the sock is not consistent +with sk sk_wmem_alloc. When closing the sock or freeing these chunks, +the old sk would never be freed, and the new sock may crash due to +the overflow sk_wmem_alloc. + +syzbot found this issue with this series: + + r0 = socket$inet_sctp() + sendto$inet(r0) + listen(r0) + accept4(r0) + close(r0) + +Although listen() should have returned error when one TCP-style socket +is in connecting (I may fix this one in another patch), it could also +be reproduced by peeling off an assoc. + +This issue is there since very beginning. + +This patch is to reset owner sk for the chunks on out queues so that +sk sk_wmem_alloc has correct value after accept one sock or peeloff +an assoc to one sock. + +Note that when resetting owner sk for chunks on outqueue, it has to +sctp_clear_owner_w/skb_orphan chunks before changing assoc->base.sk +first and then sctp_set_owner_w them after changing assoc->base.sk, +due to that sctp_wfree and it's callees are using assoc->base.sk. + +Reported-by: Dmitry Vyukov +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -169,6 +169,36 @@ static inline void sctp_set_owner_w(stru + sk_mem_charge(sk, chunk->skb->truesize); + } + ++static void sctp_clear_owner_w(struct sctp_chunk *chunk) ++{ ++ skb_orphan(chunk->skb); ++} ++ ++static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, ++ void (*cb)(struct sctp_chunk *)) ++ ++{ ++ struct sctp_outq *q = &asoc->outqueue; ++ struct sctp_transport *t; ++ struct sctp_chunk *chunk; ++ ++ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) ++ list_for_each_entry(chunk, &t->transmitted, transmitted_list) ++ cb(chunk); ++ ++ list_for_each_entry(chunk, &q->retransmit, list) ++ cb(chunk); ++ ++ list_for_each_entry(chunk, &q->sacked, list) ++ cb(chunk); ++ ++ list_for_each_entry(chunk, &q->abandoned, list) ++ cb(chunk); ++ ++ list_for_each_entry(chunk, &q->out_chunk_list, list) ++ cb(chunk); ++} ++ + /* Verify that this is a valid address. */ + static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, + int len) +@@ -8196,7 +8226,9 @@ static void sctp_sock_migrate(struct soc + * paths won't try to lock it and then oldsk. + */ + lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); ++ sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w); + sctp_assoc_migrate(assoc, newsk); ++ sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w); + + /* If the association on the newsk is already closed before accept() + * is called, set RCV_SHUTDOWN flag. diff --git a/queue-4.13/series b/queue-4.13/series new file mode 100644 index 00000000000..563466748f9 --- /dev/null +++ b/queue-4.13/series @@ -0,0 +1,37 @@ +ppp-fix-race-in-ppp-device-destruction.patch +gso-fix-payload-length-when-gso_size-is-zero.patch +ipv4-fix-traffic-triggered-ipsec-connections.patch +ipv6-fix-traffic-triggered-ipsec-connections.patch +netlink-do-not-set-cb_running-if-dump-s-start-errs.patch +net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch +macsec-fix-memory-leaks-when-skb_to_sgvec-fails.patch +l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch +tun-call-dev_get_valid_name-before-register_netdevice.patch +netlink-fix-netlink_ack-extack-race.patch +sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch +tcp-dccp-fix-ireq-opt-races.patch +packet-avoid-panic-in-packet_getsockopt.patch +geneve-fix-function-matching-vni-and-tunnel-id-on-big-endian.patch +net-bridge-fix-returning-of-vlan-range-op-errors.patch +soreuseport-fix-initialization-race.patch +ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch +sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch +tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch +tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch +net-dsa-check-master-device-before-put.patch +net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch +tap-double-free-in-error-path-in-tap_open.patch +net-mlx5-fix-health-work-queue-spin-lock-to-irq-safe.patch +net-mlx5e-properly-deal-with-encap-flows-add-del-under-neigh-update.patch +ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch +ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch +ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch +tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch +tap-reference-to-kva-of-an-unloaded-module-causes-kernel-panic.patch +tun-allow-positive-return-values-on-dev_get_valid_name-call.patch +sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch +net_sched-avoid-matching-qdisc-with-zero-handle.patch +l2tp-hold-tunnel-in-pppol2tp_connect.patch +tun-tap-sanitize-tunsetsndbuf-input.patch +ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch +tcp-fix-tcp_mtu_probe-vs-highest_sack.patch diff --git a/queue-4.13/soreuseport-fix-initialization-race.patch b/queue-4.13/soreuseport-fix-initialization-race.patch new file mode 100644 index 00000000000..bd19887e220 --- /dev/null +++ b/queue-4.13/soreuseport-fix-initialization-race.patch @@ -0,0 +1,91 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Craig Gallek +Date: Thu, 19 Oct 2017 15:00:29 -0400 +Subject: soreuseport: fix initialization race + +From: Craig Gallek + + +[ Upstream commit 1b5f962e71bfad6284574655c406597535c3ea7a ] + +Syzkaller stumbled upon a way to trigger +WARNING: CPU: 1 PID: 13881 at net/core/sock_reuseport.c:41 +reuseport_alloc+0x306/0x3b0 net/core/sock_reuseport.c:39 + +There are two initialization paths for the sock_reuseport structure in a +socket: Through the udp/tcp bind paths of SO_REUSEPORT sockets or through +SO_ATTACH_REUSEPORT_[CE]BPF before bind. The existing implementation +assumedthat the socket lock protected both of these paths when it actually +only protects the SO_ATTACH_REUSEPORT path. Syzkaller triggered this +double allocation by running these paths concurrently. + +This patch moves the check for double allocation into the reuseport_alloc +function which is protected by a global spin lock. + +Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") +Fixes: c125e80b8868 ("soreuseport: fast reuseport TCP socket selection") +Signed-off-by: Craig Gallek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock_reuseport.c | 12 +++++++++--- + net/ipv4/inet_hashtables.c | 5 +---- + net/ipv4/udp.c | 5 +---- + 3 files changed, 11 insertions(+), 11 deletions(-) + +--- a/net/core/sock_reuseport.c ++++ b/net/core/sock_reuseport.c +@@ -36,9 +36,14 @@ int reuseport_alloc(struct sock *sk) + * soft irq of receive path or setsockopt from process context + */ + spin_lock_bh(&reuseport_lock); +- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, +- lockdep_is_held(&reuseport_lock)), +- "multiple allocations for the same socket"); ++ ++ /* Allocation attempts can occur concurrently via the setsockopt path ++ * and the bind/hash path. Nothing to do when we lose the race. ++ */ ++ if (rcu_dereference_protected(sk->sk_reuseport_cb, ++ lockdep_is_held(&reuseport_lock))) ++ goto out; ++ + reuse = __reuseport_alloc(INIT_SOCKS); + if (!reuse) { + spin_unlock_bh(&reuseport_lock); +@@ -49,6 +54,7 @@ int reuseport_alloc(struct sock *sk) + reuse->num_socks = 1; + rcu_assign_pointer(sk->sk_reuseport_cb, reuse); + ++out: + spin_unlock_bh(&reuseport_lock); + + return 0; +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -449,10 +449,7 @@ static int inet_reuseport_add_sock(struc + return reuseport_add_sock(sk, sk2); + } + +- /* Initial allocation may have already happened via setsockopt */ +- if (!rcu_access_pointer(sk->sk_reuseport_cb)) +- return reuseport_alloc(sk); +- return 0; ++ return reuseport_alloc(sk); + } + + int __inet_hash(struct sock *sk, struct sock *osk) +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -231,10 +231,7 @@ static int udp_reuseport_add_sock(struct + } + } + +- /* Initial allocation may have already happened via setsockopt */ +- if (!rcu_access_pointer(sk->sk_reuseport_cb)) +- return reuseport_alloc(sk); +- return 0; ++ return reuseport_alloc(sk); + } + + /** diff --git a/queue-4.13/tap-double-free-in-error-path-in-tap_open.patch b/queue-4.13/tap-double-free-in-error-path-in-tap_open.patch new file mode 100644 index 00000000000..9c65862a8af --- /dev/null +++ b/queue-4.13/tap-double-free-in-error-path-in-tap_open.patch @@ -0,0 +1,66 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Girish Moodalbail +Date: Wed, 25 Oct 2017 00:23:04 -0700 +Subject: tap: double-free in error path in tap_open() + +From: Girish Moodalbail + + +[ Upstream commit 78e0ea6791d7baafb8a0ca82b1bd0c7b3453c919 ] + +Double free of skb_array in tap module is causing kernel panic. When +tap_set_queue() fails we free skb_array right away by calling +skb_array_cleanup(). However, later on skb_array_cleanup() is called +again by tap_sock_destruct through sock_put(). This patch fixes that +issue. + +Fixes: 362899b8725b35e3 (macvtap: switch to use skb array) +Signed-off-by: Girish Moodalbail +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tap.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +--- a/drivers/net/tap.c ++++ b/drivers/net/tap.c +@@ -517,6 +517,10 @@ static int tap_open(struct inode *inode, + &tap_proto, 0); + if (!q) + goto err; ++ if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) { ++ sk_free(&q->sk); ++ goto err; ++ } + + RCU_INIT_POINTER(q->sock.wq, &q->wq); + init_waitqueue_head(&q->wq.wait); +@@ -540,22 +544,18 @@ static int tap_open(struct inode *inode, + if ((tap->dev->features & NETIF_F_HIGHDMA) && (tap->dev->features & NETIF_F_SG)) + sock_set_flag(&q->sk, SOCK_ZEROCOPY); + +- err = -ENOMEM; +- if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) +- goto err_array; +- + err = tap_set_queue(tap, file, q); +- if (err) +- goto err_queue; ++ if (err) { ++ /* tap_sock_destruct() will take care of freeing skb_array */ ++ goto err_put; ++ } + + dev_put(tap->dev); + + rtnl_unlock(); + return err; + +-err_queue: +- skb_array_cleanup(&q->skb_array); +-err_array: ++err_put: + sock_put(&q->sk); + err: + if (tap) diff --git a/queue-4.13/tap-reference-to-kva-of-an-unloaded-module-causes-kernel-panic.patch b/queue-4.13/tap-reference-to-kva-of-an-unloaded-module-causes-kernel-panic.patch new file mode 100644 index 00000000000..e341dd2e8e7 --- /dev/null +++ b/queue-4.13/tap-reference-to-kva-of-an-unloaded-module-causes-kernel-panic.patch @@ -0,0 +1,120 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Girish Moodalbail +Date: Fri, 27 Oct 2017 00:00:16 -0700 +Subject: tap: reference to KVA of an unloaded module causes kernel panic + +From: Girish Moodalbail + + +[ Upstream commit dea6e19f4ef746aa18b4c33d1a7fed54356796ed ] + +The commit 9a393b5d5988 ("tap: tap as an independent module") created a +separate tap module that implements tap functionality and exports +interfaces that will be used by macvtap and ipvtap modules to create +create respective tap devices. + +However, that patch introduced a regression wherein the modules macvtap +and ipvtap can be removed (through modprobe -r) while there are +applications using the respective /dev/tapX devices. These applications +cause kernel to hold reference to /dev/tapX through 'struct cdev +macvtap_cdev' and 'struct cdev ipvtap_dev' defined in macvtap and ipvtap +modules respectively. So, when the application is later closed the +kernel panics because we are referencing KVA that is present in the +unloaded modules. + +----------8<------- Example ----------8<---------- +$ sudo ip li add name mv0 link enp7s0 type macvtap +$ sudo ip li show mv0 |grep mv0| awk -e '{print $1 $2}' + 14:mv0@enp7s0: +$ cat /dev/tap14 & +$ lsmod |egrep -i 'tap|vlan' +macvtap 16384 0 +macvlan 24576 1 macvtap +tap 24576 3 macvtap +$ sudo modprobe -r macvtap +$ fg +cat /dev/tap14 +^C + +<...system panics...> +BUG: unable to handle kernel paging request at ffffffffa038c500 +IP: cdev_put+0xf/0x30 +----------8<-----------------8<---------- + +The fix is to set cdev.owner to the module that creates the tap device +(either macvtap or ipvtap). With this set, the operations (in +fs/char_dev.c) on char device holds and releases the module through +cdev_get() and cdev_put() and will not allow the module to unload +prematurely. + +Fixes: 9a393b5d5988ea4e (tap: tap as an independent module) +Signed-off-by: Girish Moodalbail +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ipvlan/ipvtap.c | 4 ++-- + drivers/net/macvtap.c | 4 ++-- + drivers/net/tap.c | 5 +++-- + include/linux/if_tap.h | 4 ++-- + 4 files changed, 9 insertions(+), 8 deletions(-) + +--- a/drivers/net/ipvlan/ipvtap.c ++++ b/drivers/net/ipvlan/ipvtap.c +@@ -197,8 +197,8 @@ static int ipvtap_init(void) + { + int err; + +- err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap"); +- ++ err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap", ++ THIS_MODULE); + if (err) + goto out1; + +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -204,8 +204,8 @@ static int macvtap_init(void) + { + int err; + +- err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap"); +- ++ err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap", ++ THIS_MODULE); + if (err) + goto out1; + +--- a/drivers/net/tap.c ++++ b/drivers/net/tap.c +@@ -1252,8 +1252,8 @@ static int tap_list_add(dev_t major, con + return 0; + } + +-int tap_create_cdev(struct cdev *tap_cdev, +- dev_t *tap_major, const char *device_name) ++int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, ++ const char *device_name, struct module *module) + { + int err; + +@@ -1262,6 +1262,7 @@ int tap_create_cdev(struct cdev *tap_cde + goto out1; + + cdev_init(tap_cdev, &tap_fops); ++ tap_cdev->owner = module; + err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS); + if (err) + goto out2; +--- a/include/linux/if_tap.h ++++ b/include/linux/if_tap.h +@@ -73,8 +73,8 @@ void tap_del_queues(struct tap_dev *tap) + int tap_get_minor(dev_t major, struct tap_dev *tap); + void tap_free_minor(dev_t major, struct tap_dev *tap); + int tap_queue_resize(struct tap_dev *tap); +-int tap_create_cdev(struct cdev *tap_cdev, +- dev_t *tap_major, const char *device_name); ++int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, ++ const char *device_name, struct module *module); + void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev); + + #endif /*_LINUX_IF_TAP_H_*/ diff --git a/queue-4.13/tcp-dccp-fix-ireq-opt-races.patch b/queue-4.13/tcp-dccp-fix-ireq-opt-races.patch new file mode 100644 index 00000000000..4c3c09b2f79 --- /dev/null +++ b/queue-4.13/tcp-dccp-fix-ireq-opt-races.patch @@ -0,0 +1,408 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Eric Dumazet +Date: Fri, 20 Oct 2017 09:04:13 -0700 +Subject: tcp/dccp: fix ireq->opt races + +From: Eric Dumazet + + +[ Upstream commit c92e8c02fe664155ac4234516e32544bec0f113d ] + +syzkaller found another bug in DCCP/TCP stacks [1] + +For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix +ireq->pktopts race"), we need to make sure we do not access +ireq->opt unless we own the request sock. + +Note the opt field is renamed to ireq_opt to ease grep games. + +[1] +BUG: KASAN: use-after-free in ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474 +Read of size 1 at addr ffff8801c951039c by task syz-executor5/3295 + +CPU: 1 PID: 3295 Comm: syz-executor5 Not tainted 4.14.0-rc4+ #80 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + print_address_description+0x73/0x250 mm/kasan/report.c:252 + kasan_report_error mm/kasan/report.c:351 [inline] + kasan_report+0x25b/0x340 mm/kasan/report.c:409 + __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:427 + ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474 + tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1135 + tcp_send_ack.part.37+0x3bb/0x650 net/ipv4/tcp_output.c:3587 + tcp_send_ack+0x49/0x60 net/ipv4/tcp_output.c:3557 + __tcp_ack_snd_check+0x2c6/0x4b0 net/ipv4/tcp_input.c:5072 + tcp_ack_snd_check net/ipv4/tcp_input.c:5085 [inline] + tcp_rcv_state_process+0x2eff/0x4850 net/ipv4/tcp_input.c:6071 + tcp_child_process+0x342/0x990 net/ipv4/tcp_minisocks.c:816 + tcp_v4_rcv+0x1827/0x2f80 net/ipv4/tcp_ipv4.c:1682 + ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 + dst_input include/net/dst.h:464 [inline] + ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 + __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 + __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 + netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 + netif_receive_skb+0xae/0x390 net/core/dev.c:4611 + tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 + tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 + tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 + call_write_iter include/linux/fs.h:1770 [inline] + new_sync_write fs/read_write.c:468 [inline] + __vfs_write+0x68a/0x970 fs/read_write.c:481 + vfs_write+0x18f/0x510 fs/read_write.c:543 + SYSC_write fs/read_write.c:588 [inline] + SyS_write+0xef/0x220 fs/read_write.c:580 + entry_SYSCALL_64_fastpath+0x1f/0xbe +RIP: 0033:0x40c341 +RSP: 002b:00007f469523ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 +RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 000000000040c341 +RDX: 0000000000000037 RSI: 0000000020004000 RDI: 0000000000000015 +RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 +R10: 00000000000f4240 R11: 0000000000000293 R12: 00000000004b7fd1 +R13: 00000000ffffffff R14: 0000000020000000 R15: 0000000000025000 + +Allocated by task 3295: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 + __do_kmalloc mm/slab.c:3725 [inline] + __kmalloc+0x162/0x760 mm/slab.c:3734 + kmalloc include/linux/slab.h:498 [inline] + tcp_v4_save_options include/net/tcp.h:1962 [inline] + tcp_v4_init_req+0x2d3/0x3e0 net/ipv4/tcp_ipv4.c:1271 + tcp_conn_request+0xf6d/0x3410 net/ipv4/tcp_input.c:6283 + tcp_v4_conn_request+0x157/0x210 net/ipv4/tcp_ipv4.c:1313 + tcp_rcv_state_process+0x8ea/0x4850 net/ipv4/tcp_input.c:5857 + tcp_v4_do_rcv+0x55c/0x7d0 net/ipv4/tcp_ipv4.c:1482 + tcp_v4_rcv+0x2d10/0x2f80 net/ipv4/tcp_ipv4.c:1711 + ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 + dst_input include/net/dst.h:464 [inline] + ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 + __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 + __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 + netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 + netif_receive_skb+0xae/0x390 net/core/dev.c:4611 + tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 + tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 + tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 + call_write_iter include/linux/fs.h:1770 [inline] + new_sync_write fs/read_write.c:468 [inline] + __vfs_write+0x68a/0x970 fs/read_write.c:481 + vfs_write+0x18f/0x510 fs/read_write.c:543 + SYSC_write fs/read_write.c:588 [inline] + SyS_write+0xef/0x220 fs/read_write.c:580 + entry_SYSCALL_64_fastpath+0x1f/0xbe + +Freed by task 3306: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 + __cache_free mm/slab.c:3503 [inline] + kfree+0xca/0x250 mm/slab.c:3820 + inet_sock_destruct+0x59d/0x950 net/ipv4/af_inet.c:157 + __sk_destruct+0xfd/0x910 net/core/sock.c:1560 + sk_destruct+0x47/0x80 net/core/sock.c:1595 + __sk_free+0x57/0x230 net/core/sock.c:1603 + sk_free+0x2a/0x40 net/core/sock.c:1614 + sock_put include/net/sock.h:1652 [inline] + inet_csk_complete_hashdance+0xd5/0xf0 net/ipv4/inet_connection_sock.c:959 + tcp_check_req+0xf4d/0x1620 net/ipv4/tcp_minisocks.c:765 + tcp_v4_rcv+0x17f6/0x2f80 net/ipv4/tcp_ipv4.c:1675 + ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 + dst_input include/net/dst.h:464 [inline] + ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 + __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 + __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 + netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 + netif_receive_skb+0xae/0x390 net/core/dev.c:4611 + tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 + tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 + tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 + call_write_iter include/linux/fs.h:1770 [inline] + new_sync_write fs/read_write.c:468 [inline] + __vfs_write+0x68a/0x970 fs/read_write.c:481 + vfs_write+0x18f/0x510 fs/read_write.c:543 + SYSC_write fs/read_write.c:588 [inline] + SyS_write+0xef/0x220 fs/read_write.c:580 + entry_SYSCALL_64_fastpath+0x1f/0xbe + +Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") +Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_sock.h | 2 +- + net/dccp/ipv4.c | 13 ++++++++----- + net/ipv4/cipso_ipv4.c | 24 +++++++----------------- + net/ipv4/inet_connection_sock.c | 8 +++----- + net/ipv4/syncookies.c | 2 +- + net/ipv4/tcp_input.c | 2 +- + net/ipv4/tcp_ipv4.c | 21 ++++++++++++--------- + 7 files changed, 33 insertions(+), 39 deletions(-) + +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -96,7 +96,7 @@ struct inet_request_sock { + kmemcheck_bitfield_end(flags); + u32 ir_mark; + union { +- struct ip_options_rcu *opt; ++ struct ip_options_rcu __rcu *ireq_opt; + #if IS_ENABLED(CONFIG_IPV6) + struct { + struct ipv6_txoptions *ipv6_opt; +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(c + sk_daddr_set(newsk, ireq->ir_rmt_addr); + sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); + newinet->inet_saddr = ireq->ir_loc_addr; +- newinet->inet_opt = ireq->opt; +- ireq->opt = NULL; ++ RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); + newinet->mc_index = inet_iif(skb); + newinet->mc_ttl = ip_hdr(skb)->ttl; + newinet->inet_id = jiffies; +@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(c + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); +- ++ if (*own_req) ++ ireq->ireq_opt = NULL; ++ else ++ newinet->inet_opt = NULL; + return newsk; + + exit_overflow: +@@ -441,6 +443,7 @@ exit: + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); + return NULL; + put_and_exit: ++ newinet->inet_opt = NULL; + inet_csk_prepare_forced_close(newsk); + dccp_done(newsk); + goto exit; +@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const s + ireq->ir_rmt_addr); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- ireq->opt); ++ rcu_dereference(ireq->ireq_opt)); + err = net_xmit_eval(err); + } + +@@ -548,7 +551,7 @@ out: + static void dccp_v4_reqsk_destructor(struct request_sock *req) + { + dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); +- kfree(inet_rsk(req)->opt); ++ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); + } + + void dccp_syn_ack_timeout(const struct request_sock *req) +--- a/net/ipv4/cipso_ipv4.c ++++ b/net/ipv4/cipso_ipv4.c +@@ -1951,7 +1951,7 @@ int cipso_v4_req_setattr(struct request_ + buf = NULL; + + req_inet = inet_rsk(req); +- opt = xchg(&req_inet->opt, opt); ++ opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt); + if (opt) + kfree_rcu(opt, rcu); + +@@ -1973,11 +1973,13 @@ req_setattr_failure: + * values on failure. + * + */ +-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) ++static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) + { ++ struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1); + int hdr_delta = 0; +- struct ip_options_rcu *opt = *opt_ptr; + ++ if (!opt || opt->opt.cipso == 0) ++ return 0; + if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { + u8 cipso_len; + u8 cipso_off; +@@ -2039,14 +2041,10 @@ static int cipso_v4_delopt(struct ip_opt + */ + void cipso_v4_sock_delattr(struct sock *sk) + { +- int hdr_delta; +- struct ip_options_rcu *opt; + struct inet_sock *sk_inet; ++ int hdr_delta; + + sk_inet = inet_sk(sk); +- opt = rcu_dereference_protected(sk_inet->inet_opt, 1); +- if (!opt || opt->opt.cipso == 0) +- return; + + hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); + if (sk_inet->is_icsk && hdr_delta > 0) { +@@ -2066,15 +2064,7 @@ void cipso_v4_sock_delattr(struct sock * + */ + void cipso_v4_req_delattr(struct request_sock *req) + { +- struct ip_options_rcu *opt; +- struct inet_request_sock *req_inet; +- +- req_inet = inet_rsk(req); +- opt = req_inet->opt; +- if (!opt || opt->opt.cipso == 0) +- return; +- +- cipso_v4_delopt(&req_inet->opt); ++ cipso_v4_delopt(&inet_rsk(req)->ireq_opt); + } + + /** +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -537,9 +537,10 @@ struct dst_entry *inet_csk_route_req(con + { + const struct inet_request_sock *ireq = inet_rsk(req); + struct net *net = read_pnet(&ireq->ireq_net); +- struct ip_options_rcu *opt = ireq->opt; ++ struct ip_options_rcu *opt; + struct rtable *rt; + ++ opt = rcu_dereference(ireq->ireq_opt); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), +@@ -573,10 +574,9 @@ struct dst_entry *inet_csk_route_child_s + struct flowi4 *fl4; + struct rtable *rt; + ++ opt = rcu_dereference(ireq->ireq_opt); + fl4 = &newinet->cork.fl.u.ip4; + +- rcu_read_lock(); +- opt = rcu_dereference(newinet->inet_opt); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), +@@ -589,13 +589,11 @@ struct dst_entry *inet_csk_route_child_s + goto no_route; + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) + goto route_err; +- rcu_read_unlock(); + return &rt->dst; + + route_err: + ip_rt_put(rt); + no_route: +- rcu_read_unlock(); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); + return NULL; + } +--- a/net/ipv4/syncookies.c ++++ b/net/ipv4/syncookies.c +@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock + /* We throwed the options of the initial SYN away, so we hope + * the ACK carries the same options again (see RFC1122 4.2.3.8) + */ +- ireq->opt = tcp_v4_save_options(skb); ++ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb)); + + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -6235,7 +6235,7 @@ struct request_sock *inet_reqsk_alloc(co + struct inet_request_sock *ireq = inet_rsk(req); + + kmemcheck_annotate_bitfield(ireq, flags); +- ireq->opt = NULL; ++ ireq->ireq_opt = NULL; + #if IS_ENABLED(CONFIG_IPV6) + ireq->pktopts = NULL; + #endif +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -878,7 +878,7 @@ static int tcp_v4_send_synack(const stru + + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- ireq->opt); ++ rcu_dereference(ireq->ireq_opt)); + err = net_xmit_eval(err); + } + +@@ -890,7 +890,7 @@ static int tcp_v4_send_synack(const stru + */ + static void tcp_v4_reqsk_destructor(struct request_sock *req) + { +- kfree(inet_rsk(req)->opt); ++ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); + } + + #ifdef CONFIG_TCP_MD5SIG +@@ -1269,7 +1269,7 @@ static void tcp_v4_init_req(struct reque + + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); + sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); +- ireq->opt = tcp_v4_save_options(skb); ++ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb)); + } + + static struct dst_entry *tcp_v4_route_req(const struct sock *sk, +@@ -1356,10 +1356,9 @@ struct sock *tcp_v4_syn_recv_sock(const + sk_daddr_set(newsk, ireq->ir_rmt_addr); + sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); + newsk->sk_bound_dev_if = ireq->ir_iif; +- newinet->inet_saddr = ireq->ir_loc_addr; +- inet_opt = ireq->opt; +- rcu_assign_pointer(newinet->inet_opt, inet_opt); +- ireq->opt = NULL; ++ newinet->inet_saddr = ireq->ir_loc_addr; ++ inet_opt = rcu_dereference(ireq->ireq_opt); ++ RCU_INIT_POINTER(newinet->inet_opt, inet_opt); + newinet->mc_index = inet_iif(skb); + newinet->mc_ttl = ip_hdr(skb)->ttl; + newinet->rcv_tos = ip_hdr(skb)->tos; +@@ -1404,9 +1403,12 @@ struct sock *tcp_v4_syn_recv_sock(const + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); +- if (*own_req) ++ if (likely(*own_req)) { + tcp_move_syn(newtp, req); +- ++ ireq->ireq_opt = NULL; ++ } else { ++ newinet->inet_opt = NULL; ++ } + return newsk; + + exit_overflow: +@@ -1417,6 +1419,7 @@ exit: + tcp_listendrop(sk); + return NULL; + put_and_exit: ++ newinet->inet_opt = NULL; + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); + goto exit; diff --git a/queue-4.13/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch b/queue-4.13/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch new file mode 100644 index 00000000000..91453c14a0c --- /dev/null +++ b/queue-4.13/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch @@ -0,0 +1,49 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Eric Dumazet +Date: Sun, 22 Oct 2017 12:33:57 -0700 +Subject: tcp/dccp: fix lockdep splat in inet_csk_route_req() + +From: Eric Dumazet + + +[ Upstream commit a6ca7abe53633d08eea1c6756cb49c9b2d4c90bf ] + +This patch fixes the following lockdep splat in inet_csk_route_req() + + lockdep_rcu_suspicious + inet_csk_route_req + tcp_v4_send_synack + tcp_rtx_synack + inet_rtx_syn_ack + tcp_fastopen_synack_time + tcp_retransmit_timer + tcp_write_timer_handler + tcp_write_timer + call_timer_fn + +Thread running inet_csk_route_req() owns a reference on the request +socket, so we have the guarantee ireq->ireq_opt wont be changed or +freed. + +lockdep can enforce this invariant for us. + +Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_connection_sock.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -540,7 +540,8 @@ struct dst_entry *inet_csk_route_req(con + struct ip_options_rcu *opt; + struct rtable *rt; + +- opt = rcu_dereference(ireq->ireq_opt); ++ opt = rcu_dereference_protected(ireq->ireq_opt, ++ refcount_read(&req->rsk_refcnt) > 0); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), diff --git a/queue-4.13/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch b/queue-4.13/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch new file mode 100644 index 00000000000..5f41a7e9412 --- /dev/null +++ b/queue-4.13/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch @@ -0,0 +1,113 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Eric Dumazet +Date: Tue, 24 Oct 2017 08:20:31 -0700 +Subject: tcp/dccp: fix other lockdep splats accessing ireq_opt + +From: Eric Dumazet + + +[ Upstream commit 06f877d613be3621604c2520ec0351d9fbdca15f ] + +In my first attempt to fix the lockdep splat, I forgot we could +enter inet_csk_route_req() with a freshly allocated request socket, +for which refcount has not yet been elevated, due to complex +SLAB_TYPESAFE_BY_RCU rules. + +We either are in rcu_read_lock() section _or_ we own a refcount on the +request. + +Correct RCU verb to use here is rcu_dereference_check(), although it is +not possible to prove we actually own a reference on a shared +refcount :/ + +In v2, I added ireq_opt_deref() helper and use in three places, to fix other +possible splats. + +[ 49.844590] lockdep_rcu_suspicious+0xea/0xf3 +[ 49.846487] inet_csk_route_req+0x53/0x14d +[ 49.848334] tcp_v4_route_req+0xe/0x10 +[ 49.850174] tcp_conn_request+0x31c/0x6a0 +[ 49.851992] ? __lock_acquire+0x614/0x822 +[ 49.854015] tcp_v4_conn_request+0x5a/0x79 +[ 49.855957] ? tcp_v4_conn_request+0x5a/0x79 +[ 49.858052] tcp_rcv_state_process+0x98/0xdcc +[ 49.859990] ? sk_filter_trim_cap+0x2f6/0x307 +[ 49.862085] tcp_v4_do_rcv+0xfc/0x145 +[ 49.864055] ? tcp_v4_do_rcv+0xfc/0x145 +[ 49.866173] tcp_v4_rcv+0x5ab/0xaf9 +[ 49.868029] ip_local_deliver_finish+0x1af/0x2e7 +[ 49.870064] ip_local_deliver+0x1b2/0x1c5 +[ 49.871775] ? inet_del_offload+0x45/0x45 +[ 49.873916] ip_rcv_finish+0x3f7/0x471 +[ 49.875476] ip_rcv+0x3f1/0x42f +[ 49.876991] ? ip_local_deliver_finish+0x2e7/0x2e7 +[ 49.878791] __netif_receive_skb_core+0x6d3/0x950 +[ 49.880701] ? process_backlog+0x7e/0x216 +[ 49.882589] __netif_receive_skb+0x1d/0x5e +[ 49.884122] process_backlog+0x10c/0x216 +[ 49.885812] net_rx_action+0x147/0x3df + +Fixes: a6ca7abe53633 ("tcp/dccp: fix lockdep splat in inet_csk_route_req()") +Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races") +Signed-off-by: Eric Dumazet +Reported-by: kernel test robot +Reported-by: Maciej Żenczykowski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_sock.h | 6 ++++++ + net/dccp/ipv4.c | 2 +- + net/ipv4/inet_connection_sock.c | 4 ++-- + net/ipv4/tcp_ipv4.c | 2 +- + 4 files changed, 10 insertions(+), 4 deletions(-) + +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -132,6 +132,12 @@ static inline int inet_request_bound_dev + return sk->sk_bound_dev_if; + } + ++static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) ++{ ++ return rcu_dereference_check(ireq->ireq_opt, ++ refcount_read(&ireq->req.rsk_refcnt) > 0); ++} ++ + struct inet_cork { + unsigned int flags; + __be32 addr; +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -495,7 +495,7 @@ static int dccp_v4_send_response(const s + ireq->ir_rmt_addr); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- rcu_dereference(ireq->ireq_opt)); ++ ireq_opt_deref(ireq)); + err = net_xmit_eval(err); + } + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -540,8 +540,8 @@ struct dst_entry *inet_csk_route_req(con + struct ip_options_rcu *opt; + struct rtable *rt; + +- opt = rcu_dereference_protected(ireq->ireq_opt, +- refcount_read(&req->rsk_refcnt) > 0); ++ opt = ireq_opt_deref(ireq); ++ + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -878,7 +878,7 @@ static int tcp_v4_send_synack(const stru + + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- rcu_dereference(ireq->ireq_opt)); ++ ireq_opt_deref(ireq)); + err = net_xmit_eval(err); + } + diff --git a/queue-4.13/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch b/queue-4.13/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch new file mode 100644 index 00000000000..2a18fbc1142 --- /dev/null +++ b/queue-4.13/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch @@ -0,0 +1,81 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Eric Dumazet +Date: Mon, 30 Oct 2017 23:08:20 -0700 +Subject: tcp: fix tcp_mtu_probe() vs highest_sack + +From: Eric Dumazet + + +[ Upstream commit 2b7cda9c35d3b940eb9ce74b30bbd5eb30db493d ] + +Based on SNMP values provided by Roman, Yuchung made the observation +that some crashes in tcp_sacktag_walk() might be caused by MTU probing. + +Looking at tcp_mtu_probe(), I found that when a new skb was placed +in front of the write queue, we were not updating tcp highest sack. + +If one skb is freed because all its content was copied to the new skb +(for MTU probing), then tp->highest_sack could point to a now freed skb. + +Bad things would then happen, including infinite loops. + +This patch renames tcp_highest_sack_combine() and uses it +from tcp_mtu_probe() to fix the bug. + +Note that I also removed one test against tp->sacked_out, +since we want to replace tp->highest_sack regardless of whatever +condition, since keeping a stale pointer to freed skb is a recipe +for disaster. + +Fixes: a47e5a988a57 ("[TCP]: Convert highest_sack to sk_buff to allow direct access") +Signed-off-by: Eric Dumazet +Reported-by: Alexei Starovoitov +Reported-by: Roman Gushchin +Reported-by: Oleksandr Natalenko +Acked-by: Alexei Starovoitov +Acked-by: Neal Cardwell +Acked-by: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 6 +++--- + net/ipv4/tcp_output.c | 3 ++- + 2 files changed, 5 insertions(+), 4 deletions(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1750,12 +1750,12 @@ static inline void tcp_highest_sack_rese + tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk); + } + +-/* Called when old skb is about to be deleted (to be combined with new skb) */ +-static inline void tcp_highest_sack_combine(struct sock *sk, ++/* Called when old skb is about to be deleted and replaced by new skb */ ++static inline void tcp_highest_sack_replace(struct sock *sk, + struct sk_buff *old, + struct sk_buff *new) + { +- if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack)) ++ if (old == tcp_highest_sack(sk)) + tcp_sk(sk)->highest_sack = new; + } + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2094,6 +2094,7 @@ static int tcp_mtu_probe(struct sock *sk + nskb->ip_summed = skb->ip_summed; + + tcp_insert_write_queue_before(nskb, skb, sk); ++ tcp_highest_sack_replace(sk, skb, nskb); + + len = 0; + tcp_for_write_queue_from_safe(skb, next, sk) { +@@ -2694,7 +2695,7 @@ static bool tcp_collapse_retrans(struct + else if (!skb_shift(skb, next_skb, next_skb_size)) + return false; + } +- tcp_highest_sack_combine(sk, next_skb, skb); ++ tcp_highest_sack_replace(sk, next_skb, skb); + + tcp_unlink_write_queue(next_skb, sk); + diff --git a/queue-4.13/tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch b/queue-4.13/tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch new file mode 100644 index 00000000000..00f4d28c8fc --- /dev/null +++ b/queue-4.13/tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch @@ -0,0 +1,42 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Eric Dumazet +Date: Thu, 26 Oct 2017 21:21:40 -0700 +Subject: tcp: refresh tp timestamp before tcp_mtu_probe() + +From: Eric Dumazet + + +[ Upstream commit ee1836aec4f5a977c1699a311db4d9027ef21ac8 ] + +In the unlikely event tcp_mtu_probe() is sending a packet, we +want tp->tcp_mstamp being as accurate as possible. + +This means we need to call tcp_mstamp_refresh() a bit earlier in +tcp_write_xmit(). + +Fixes: 385e20706fac ("tcp: use tp->tcp_mstamp in output path") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_output.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -2271,6 +2271,7 @@ static bool tcp_write_xmit(struct sock * + + sent_pkts = 0; + ++ tcp_mstamp_refresh(tp); + if (!push_one) { + /* Do MTU probing. */ + result = tcp_mtu_probe(sk); +@@ -2282,7 +2283,6 @@ static bool tcp_write_xmit(struct sock * + } + + max_segs = tcp_tso_segs(sk, mss_now); +- tcp_mstamp_refresh(tp); + while ((skb = tcp_send_head(sk))) { + unsigned int limit; + diff --git a/queue-4.13/tun-allow-positive-return-values-on-dev_get_valid_name-call.patch b/queue-4.13/tun-allow-positive-return-values-on-dev_get_valid_name-call.patch new file mode 100644 index 00000000000..c78a9048e2c --- /dev/null +++ b/queue-4.13/tun-allow-positive-return-values-on-dev_get_valid_name-call.patch @@ -0,0 +1,36 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Julien Gomes +Date: Wed, 25 Oct 2017 11:50:50 -0700 +Subject: tun: allow positive return values on dev_get_valid_name() call + +From: Julien Gomes + + +[ Upstream commit 5c25f65fd1e42685f7ccd80e0621829c105785d9 ] + +If the name argument of dev_get_valid_name() contains "%d", it will try +to assign it a unit number in __dev__alloc_name() and return either the +unit number (>= 0) or an error code (< 0). +Considering positive values as error values prevent tun device creations +relying this mechanism, therefor we should only consider negative values +as errors here. + +Signed-off-by: Julien Gomes +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1814,7 +1814,7 @@ static int tun_set_iff(struct net *net, + if (!dev) + return -ENOMEM; + err = dev_get_valid_name(net, dev, name); +- if (err) ++ if (err < 0) + goto err_free_dev; + + dev_net_set(dev, net); diff --git a/queue-4.13/tun-call-dev_get_valid_name-before-register_netdevice.patch b/queue-4.13/tun-call-dev_get_valid_name-before-register_netdevice.patch new file mode 100644 index 00000000000..a8eda781fb6 --- /dev/null +++ b/queue-4.13/tun-call-dev_get_valid_name-before-register_netdevice.patch @@ -0,0 +1,82 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Cong Wang +Date: Fri, 13 Oct 2017 11:58:53 -0700 +Subject: tun: call dev_get_valid_name() before register_netdevice() + +From: Cong Wang + + +[ Upstream commit 0ad646c81b2182f7fa67ec0c8c825e0ee165696d ] + +register_netdevice() could fail early when we have an invalid +dev name, in which case ->ndo_uninit() is not called. For tun +device, this is a problem because a timer etc. are already +initialized and it expects ->ndo_uninit() to clean them up. + +We could move these initializations into a ->ndo_init() so +that register_netdevice() knows better, however this is still +complicated due to the logic in tun_detach(). + +Therefore, I choose to just call dev_get_valid_name() before +register_netdevice(), which is quicker and much easier to audit. +And for this specific case, it is already enough. + +Fixes: 96442e42429e ("tuntap: choose the txq based on rxq") +Reported-by: Dmitry Alexeev +Cc: Jason Wang +Cc: "Michael S. Tsirkin" +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 3 +++ + include/linux/netdevice.h | 3 +++ + net/core/dev.c | 6 +++--- + 3 files changed, 9 insertions(+), 3 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1813,6 +1813,9 @@ static int tun_set_iff(struct net *net, + + if (!dev) + return -ENOMEM; ++ err = dev_get_valid_name(net, dev, name); ++ if (err) ++ goto err_free_dev; + + dev_net_set(dev, net); + dev->rtnl_link_ops = &tun_link_ops; +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3702,6 +3702,9 @@ struct net_device *alloc_netdev_mqs(int + unsigned char name_assign_type, + void (*setup)(struct net_device *), + unsigned int txqs, unsigned int rxqs); ++int dev_get_valid_name(struct net *net, struct net_device *dev, ++ const char *name); ++ + #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ + alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1146,9 +1146,8 @@ static int dev_alloc_name_ns(struct net + return ret; + } + +-static int dev_get_valid_name(struct net *net, +- struct net_device *dev, +- const char *name) ++int dev_get_valid_name(struct net *net, struct net_device *dev, ++ const char *name) + { + BUG_ON(!net); + +@@ -1164,6 +1163,7 @@ static int dev_get_valid_name(struct net + + return 0; + } ++EXPORT_SYMBOL(dev_get_valid_name); + + /** + * dev_change_name - change name of a device diff --git a/queue-4.13/tun-tap-sanitize-tunsetsndbuf-input.patch b/queue-4.13/tun-tap-sanitize-tunsetsndbuf-input.patch new file mode 100644 index 00000000000..4c53d4d452f --- /dev/null +++ b/queue-4.13/tun-tap-sanitize-tunsetsndbuf-input.patch @@ -0,0 +1,88 @@ +From foo@baz Wed Nov 15 17:25:34 CET 2017 +From: Craig Gallek +Date: Mon, 30 Oct 2017 18:50:11 -0400 +Subject: tun/tap: sanitize TUNSETSNDBUF input + +From: Craig Gallek + + +[ Upstream commit 93161922c658c714715686cd0cf69b090cb9bf1d ] + +Syzkaller found several variants of the lockup below by setting negative +values with the TUNSETSNDBUF ioctl. This patch adds a sanity check +to both the tun and tap versions of this ioctl. + + watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [repro:2389] + Modules linked in: + irq event stamp: 329692056 + hardirqs last enabled at (329692055): [] _raw_spin_unlock_irqrestore+0x31/0x75 + hardirqs last disabled at (329692056): [] apic_timer_interrupt+0x98/0xb0 + softirqs last enabled at (35659740): [] __do_softirq+0x328/0x48c + softirqs last disabled at (35659731): [] irq_exit+0xbc/0xd0 + CPU: 0 PID: 2389 Comm: repro Not tainted 4.14.0-rc7 #23 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + task: ffff880009452140 task.stack: ffff880006a20000 + RIP: 0010:_raw_spin_lock_irqsave+0x11/0x80 + RSP: 0018:ffff880006a27c50 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff10 + RAX: ffff880009ac68d0 RBX: ffff880006a27ce0 RCX: 0000000000000000 + RDX: 0000000000000001 RSI: ffff880006a27ce0 RDI: ffff880009ac6900 + RBP: ffff880006a27c60 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000000001 R11: 000000000063ff00 R12: ffff880009ac6900 + R13: ffff880006a27cf8 R14: 0000000000000001 R15: ffff880006a27cf8 + FS: 00007f4be4838700(0000) GS:ffff88000cc00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000020101000 CR3: 0000000009616000 CR4: 00000000000006f0 + Call Trace: + prepare_to_wait+0x26/0xc0 + sock_alloc_send_pskb+0x14e/0x270 + ? remove_wait_queue+0x60/0x60 + tun_get_user+0x2cc/0x19d0 + ? __tun_get+0x60/0x1b0 + tun_chr_write_iter+0x57/0x86 + __vfs_write+0x156/0x1e0 + vfs_write+0xf7/0x230 + SyS_write+0x57/0xd0 + entry_SYSCALL_64_fastpath+0x1f/0xbe + RIP: 0033:0x7f4be4356df9 + RSP: 002b:00007ffc18101c08 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 + RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f4be4356df9 + RDX: 0000000000000046 RSI: 0000000020101000 RDI: 0000000000000005 + RBP: 00007ffc18101c40 R08: 0000000000000001 R09: 0000000000000001 + R10: 0000000000000001 R11: 0000000000000293 R12: 0000559c75f64780 + R13: 00007ffc18101d30 R14: 0000000000000000 R15: 0000000000000000 + +Fixes: 33dccbb050bb ("tun: Limit amount of queued packets per device") +Fixes: 20d29d7a916a ("net: macvtap driver") +Signed-off-by: Craig Gallek +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tap.c | 2 ++ + drivers/net/tun.c | 4 ++++ + 2 files changed, 6 insertions(+) + +--- a/drivers/net/tap.c ++++ b/drivers/net/tap.c +@@ -1035,6 +1035,8 @@ static long tap_ioctl(struct file *file, + case TUNSETSNDBUF: + if (get_user(s, sp)) + return -EFAULT; ++ if (s <= 0) ++ return -EINVAL; + + q->sk.sk_sndbuf = s; + return 0; +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -2219,6 +2219,10 @@ static long __tun_chr_ioctl(struct file + ret = -EFAULT; + break; + } ++ if (sndbuf <= 0) { ++ ret = -EINVAL; ++ break; ++ } + + tun->sndbuf = sndbuf; + tun_set_sndbuf(tun);