From: Greg Kroah-Hartman Date: Wed, 15 Nov 2017 16:31:22 +0000 (+0100) Subject: 4.9-stable patches X-Git-Tag: v3.18.82~26 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=132fe773189bc6ee5aaa6991acf5a8af68f44455;p=thirdparty%2Fkernel%2Fstable-queue.git 4.9-stable patches added patches: gso-fix-payload-length-when-gso_size-is-zero.patch ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch net_sched-avoid-matching-qdisc-with-zero-handle.patch netlink-do-not-set-cb_running-if-dump-s-start-errs.patch packet-avoid-panic-in-packet_getsockopt.patch ppp-fix-race-in-ppp-device-destruction.patch sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch soreuseport-fix-initialization-race.patch tap-double-free-in-error-path-in-tap_open.patch tcp-dccp-fix-ireq-opt-races.patch tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch tcp-fix-tcp_mtu_probe-vs-highest_sack.patch tun-allow-positive-return-values-on-dev_get_valid_name-call.patch tun-call-dev_get_valid_name-before-register_netdevice.patch tun-tap-sanitize-tunsetsndbuf-input.patch --- diff --git a/queue-4.9/gso-fix-payload-length-when-gso_size-is-zero.patch b/queue-4.9/gso-fix-payload-length-when-gso_size-is-zero.patch new file mode 100644 index 00000000000..90599b62eba --- /dev/null +++ b/queue-4.9/gso-fix-payload-length-when-gso_size-is-zero.patch @@ -0,0 +1,62 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Alexey Kodanev +Date: Fri, 6 Oct 2017 19:02:35 +0300 +Subject: gso: fix payload length when gso_size is zero + +From: Alexey Kodanev + + +[ Upstream commit 3d0241d57c7b25bb75ac9d7a62753642264fdbce ] + +When gso_size reset to zero for the tail segment in skb_segment(), later +in ipv6_gso_segment(), __skb_udp_tunnel_segment() and gre_gso_segment() +we will get incorrect results (payload length, pcsum) for that segment. +inet_gso_segment() already has a check for gso_size before calculating +payload. + +The issue was found with LTP vxlan & gre tests over ixgbe NIC. + +Fixes: 07b26c9454a2 ("gso: Support partial splitting at the frag_list pointer") +Signed-off-by: Alexey Kodanev +Acked-by: Alexander Duyck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/gre_offload.c | 2 +- + net/ipv4/udp_offload.c | 2 +- + net/ipv6/ip6_offload.c | 2 +- + 3 files changed, 3 insertions(+), 3 deletions(-) + +--- a/net/ipv4/gre_offload.c ++++ b/net/ipv4/gre_offload.c +@@ -98,7 +98,7 @@ static struct sk_buff *gre_gso_segment(s + greh = (struct gre_base_hdr *)skb_transport_header(skb); + pcsum = (__sum16 *)(greh + 1); + +- if (gso_partial) { ++ if (gso_partial && skb_is_gso(skb)) { + unsigned int partial_adj; + + /* Adjust checksum to account for the fact that +--- a/net/ipv4/udp_offload.c ++++ b/net/ipv4/udp_offload.c +@@ -122,7 +122,7 @@ static struct sk_buff *__skb_udp_tunnel_ + * will be using a length value equal to only one MSS sized + * segment instead of the entire frame. + */ +- if (gso_partial) { ++ if (gso_partial && skb_is_gso(skb)) { + uh->len = htons(skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)uh); +--- a/net/ipv6/ip6_offload.c ++++ b/net/ipv6/ip6_offload.c +@@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment( + + for (skb = segs; skb; skb = skb->next) { + ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); +- if (gso_partial) ++ if (gso_partial && skb_is_gso(skb)) + payload_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)(ipv6h + 1); diff --git a/queue-4.9/ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch b/queue-4.9/ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch new file mode 100644 index 00000000000..95cd5bdbae5 --- /dev/null +++ b/queue-4.9/ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch @@ -0,0 +1,64 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Xin Long +Date: Thu, 26 Oct 2017 19:23:27 +0800 +Subject: ip6_gre: only increase err_count for some certain type icmpv6 in ip6gre_err + +From: Xin Long + + +[ Upstream commit f8d20b46ce55cf40afb30dcef6d9288f7ef46d9b ] + +The similar fix in patch 'ipip: only increase err_count for some +certain type icmp in ipip_err' is needed for ip6gre_err. + +In Jianlin's case, udp netperf broke even when receiving a TooBig +icmpv6 packet. + +Fixes: c12b395a4664 ("gre: Support GRE over IPv6") +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -408,13 +408,16 @@ static void ip6gre_err(struct sk_buff *s + case ICMPV6_DEST_UNREACH: + net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", + t->parms.name); +- break; ++ if (code != ICMPV6_PORT_UNREACH) ++ break; ++ return; + case ICMPV6_TIME_EXCEED: + if (code == ICMPV6_EXC_HOPLIMIT) { + net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", + t->parms.name); ++ break; + } +- break; ++ return; + case ICMPV6_PARAMPROB: + teli = 0; + if (code == ICMPV6_HDR_FIELD) +@@ -430,7 +433,7 @@ static void ip6gre_err(struct sk_buff *s + net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", + t->parms.name); + } +- break; ++ return; + case ICMPV6_PKT_TOOBIG: + mtu = be32_to_cpu(info) - offset - t->tun_hlen; + if (t->dev->type == ARPHRD_ETHER) +@@ -438,7 +441,7 @@ static void ip6gre_err(struct sk_buff *s + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + t->dev->mtu = mtu; +- break; ++ return; + } + + if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) diff --git a/queue-4.9/ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch b/queue-4.9/ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch new file mode 100644 index 00000000000..6999bbeeb43 --- /dev/null +++ b/queue-4.9/ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch @@ -0,0 +1,68 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Xin Long +Date: Thu, 26 Oct 2017 19:27:17 +0800 +Subject: ip6_gre: update dst pmtu if dev mtu has been updated by toobig in __gre6_xmit + +From: Xin Long + + +[ Upstream commit 8aec4959d832bae0889a8e2f348973b5e4abffef ] + +When receiving a Toobig icmpv6 packet, ip6gre_err would just set +tunnel dev's mtu, that's not enough. For skb_dst(skb)'s pmtu may +still be using the old value, it has no chance to be updated with +tunnel dev's mtu. + +Jianlin found this issue by reducing route's mtu while running +netperf, the performance went to 0. + +ip6ip6 and ip4ip6 tunnel can work well with this, as they lookup +the upper dst and update_pmtu it's pmtu or icmpv6_send a Toobig +to upper socket after setting tunnel dev's mtu. + +We couldn't do that for ip6_gre, as gre's inner packet could be +any protocol, it's difficult to handle them (like lookup upper +dst) in a good way. + +So this patch is to fix it by updating skb_dst(skb)'s pmtu when +dev->mtu < skb_dst(skb)'s pmtu in tx path. It's safe to do this +update there, as usually dev->mtu <= skb_dst(skb)'s pmtu and no +performance regression can be caused by this. + +Fixes: c12b395a4664 ("gre: Support GRE over IPv6") +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_gre.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_gre.c ++++ b/net/ipv6/ip6_gre.c +@@ -508,8 +508,8 @@ static netdev_tx_t __gre6_xmit(struct sk + __u32 *pmtu, __be16 proto) + { + struct ip6_tnl *tunnel = netdev_priv(dev); +- __be16 protocol = (dev->type == ARPHRD_ETHER) ? +- htons(ETH_P_TEB) : proto; ++ struct dst_entry *dst = skb_dst(skb); ++ __be16 protocol; + + if (dev->type == ARPHRD_ETHER) + IPCB(skb)->flags = 0; +@@ -523,9 +523,14 @@ static netdev_tx_t __gre6_xmit(struct sk + tunnel->o_seqno++; + + /* Push GRE header. */ ++ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; + gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, + protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); + ++ /* TooBig packet may have updated dst->dev's mtu */ ++ if (dst && dst_mtu(dst) > dst->dev->mtu) ++ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); ++ + return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, + NEXTHDR_GRE); + } diff --git a/queue-4.9/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch b/queue-4.9/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch new file mode 100644 index 00000000000..9024736c8f0 --- /dev/null +++ b/queue-4.9/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch @@ -0,0 +1,128 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Xin Long +Date: Thu, 26 Oct 2017 19:19:56 +0800 +Subject: ipip: only increase err_count for some certain type icmp in ipip_err + +From: Xin Long + + +[ Upstream commit f3594f0a7ea36661d7fd942facd7f31a64245f1a ] + +t->err_count is used to count the link failure on tunnel and an err +will be reported to user socket in tx path if t->err_count is not 0. +udp socket could even return EHOSTUNREACH to users. + +Since commit fd58156e456d ("IPIP: Use ip-tunneling code.") removed +the 'switch check' for icmp type in ipip_err(), err_count would be +increased by the icmp packet with ICMP_EXC_FRAGTIME code. an link +failure would be reported out due to this. + +In Jianlin's case, when receiving ICMP_EXC_FRAGTIME a icmp packet, +udp netperf failed with the err: + send_data: data send error: No route to host (errno 113) + +We expect this error reported from tunnel to socket when receiving +some certain type icmp, but not ICMP_EXC_FRAGTIME, ICMP_SR_FAILED +or ICMP_PARAMETERPROB ones. + +This patch is to bring 'switch check' for icmp type back to ipip_err +so that it only reports link failure for the right type icmp, just as +in ipgre_err() and ipip6_err(). + +Fixes: fd58156e456d ("IPIP: Use ip-tunneling code.") +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ipip.c | 59 +++++++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 42 insertions(+), 17 deletions(-) + +--- a/net/ipv4/ipip.c ++++ b/net/ipv4/ipip.c +@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_op + + static int ipip_err(struct sk_buff *skb, u32 info) + { +- +-/* All the routers (except for Linux) return only +- 8 bytes of packet payload. It means, that precise relaying of +- ICMP in the real Internet is absolutely infeasible. +- */ ++ /* All the routers (except for Linux) return only ++ * 8 bytes of packet payload. It means, that precise relaying of ++ * ICMP in the real Internet is absolutely infeasible. ++ */ + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); + const struct iphdr *iph = (const struct iphdr *)skb->data; +- struct ip_tunnel *t; +- int err; + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; ++ struct ip_tunnel *t; ++ int err = 0; ++ ++ switch (type) { ++ case ICMP_DEST_UNREACH: ++ switch (code) { ++ case ICMP_SR_FAILED: ++ /* Impossible event. */ ++ goto out; ++ default: ++ /* All others are translated to HOST_UNREACH. ++ * rfc2003 contains "deep thoughts" about NET_UNREACH, ++ * I believe they are just ether pollution. --ANK ++ */ ++ break; ++ } ++ break; ++ ++ case ICMP_TIME_EXCEEDED: ++ if (code != ICMP_EXC_TTL) ++ goto out; ++ break; ++ ++ case ICMP_REDIRECT: ++ break; ++ ++ default: ++ goto out; ++ } + +- err = -ENOENT; + t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->daddr, iph->saddr, 0); +- if (!t) ++ if (!t) { ++ err = -ENOENT; + goto out; ++ } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { +- ipv4_update_pmtu(skb, dev_net(skb->dev), info, +- t->parms.link, 0, iph->protocol, 0); +- err = 0; ++ ipv4_update_pmtu(skb, net, info, t->parms.link, 0, ++ iph->protocol, 0); + goto out; + } + + if (type == ICMP_REDIRECT) { +- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, +- iph->protocol, 0); +- err = 0; ++ ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0); + goto out; + } + +- if (t->parms.iph.daddr == 0) ++ if (t->parms.iph.daddr == 0) { ++ err = -ENOENT; + goto out; ++ } + +- err = 0; + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) + goto out; + diff --git a/queue-4.9/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch b/queue-4.9/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch new file mode 100644 index 00000000000..4cbe11494b2 --- /dev/null +++ b/queue-4.9/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch @@ -0,0 +1,95 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Eric Dumazet +Date: Mon, 30 Oct 2017 22:47:09 -0700 +Subject: ipv6: addrconf: increment ifp refcount before ipv6_del_addr() + +From: Eric Dumazet + + +[ Upstream commit e669b86945478b3d90d2d87e3793a6eed06d332f ] + +In the (unlikely) event fixup_permanent_addr() returns a failure, +addrconf_permanent_addr() calls ipv6_del_addr() without the +mandatory call to in6_ifa_hold(), leading to a refcount error, +spotted by syzkaller : + +WARNING: CPU: 1 PID: 3142 at lib/refcount.c:227 refcount_dec+0x4c/0x50 +lib/refcount.c:227 +Kernel panic - not syncing: panic_on_warn set ... + +CPU: 1 PID: 3142 Comm: ip Not tainted 4.14.0-rc4-next-20171009+ #33 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS +Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + panic+0x1e4/0x41c kernel/panic.c:181 + __warn+0x1c4/0x1e0 kernel/panic.c:544 + report_bug+0x211/0x2d0 lib/bug.c:183 + fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:178 + do_trap_no_signal arch/x86/kernel/traps.c:212 [inline] + do_trap+0x260/0x390 arch/x86/kernel/traps.c:261 + do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:298 + do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:311 + invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905 +RIP: 0010:refcount_dec+0x4c/0x50 lib/refcount.c:227 +RSP: 0018:ffff8801ca49e680 EFLAGS: 00010286 +RAX: 000000000000002c RBX: ffff8801d07cfcdc RCX: 0000000000000000 +RDX: 000000000000002c RSI: 1ffff10039493c90 RDI: ffffed0039493cc4 +RBP: ffff8801ca49e688 R08: ffff8801ca49dd70 R09: 0000000000000000 +R10: ffff8801ca49df58 R11: 0000000000000000 R12: 1ffff10039493cd9 +R13: ffff8801ca49e6e8 R14: ffff8801ca49e7e8 R15: ffff8801d07cfcdc + __in6_ifa_put include/net/addrconf.h:369 [inline] + ipv6_del_addr+0x42b/0xb60 net/ipv6/addrconf.c:1208 + addrconf_permanent_addr net/ipv6/addrconf.c:3327 [inline] + addrconf_notify+0x1c66/0x2190 net/ipv6/addrconf.c:3393 + notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93 + __raw_notifier_call_chain kernel/notifier.c:394 [inline] + raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401 + call_netdevice_notifiers_info+0x32/0x60 net/core/dev.c:1697 + call_netdevice_notifiers net/core/dev.c:1715 [inline] + __dev_notify_flags+0x15d/0x430 net/core/dev.c:6843 + dev_change_flags+0xf5/0x140 net/core/dev.c:6879 + do_setlink+0xa1b/0x38e0 net/core/rtnetlink.c:2113 + rtnl_newlink+0xf0d/0x1a40 net/core/rtnetlink.c:2661 + rtnetlink_rcv_msg+0x733/0x1090 net/core/rtnetlink.c:4301 + netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2408 + rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4313 + netlink_unicast_kernel net/netlink/af_netlink.c:1273 [inline] + netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1299 + netlink_sendmsg+0xa4a/0xe70 net/netlink/af_netlink.c:1862 + sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:643 + ___sys_sendmsg+0x75b/0x8a0 net/socket.c:2049 + __sys_sendmsg+0xe5/0x210 net/socket.c:2083 + SYSC_sendmsg net/socket.c:2094 [inline] + SyS_sendmsg+0x2d/0x50 net/socket.c:2090 + entry_SYSCALL_64_fastpath+0x1f/0xbe +RIP: 0033:0x7fa9174d3320 +RSP: 002b:00007ffe302ae9e8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e +RAX: ffffffffffffffda RBX: 00007ffe302b2ae0 RCX: 00007fa9174d3320 +RDX: 0000000000000000 RSI: 00007ffe302aea20 RDI: 0000000000000016 +RBP: 0000000000000082 R08: 0000000000000000 R09: 000000000000000f +R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffe302b32a0 +R13: 0000000000000000 R14: 00007ffe302b2ab8 R15: 00007ffe302b32b8 + +Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") +Signed-off-by: Eric Dumazet +Cc: David Ahern +Acked-by: David Ahern +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/addrconf.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/net/ipv6/addrconf.c ++++ b/net/ipv6/addrconf.c +@@ -3299,6 +3299,7 @@ static void addrconf_permanent_addr(stru + if ((ifp->flags & IFA_F_PERMANENT) && + fixup_permanent_addr(idev, ifp) < 0) { + write_unlock_bh(&idev->lock); ++ in6_ifa_hold(ifp); + ipv6_del_addr(ifp); + write_lock_bh(&idev->lock); + diff --git a/queue-4.9/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch b/queue-4.9/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch new file mode 100644 index 00000000000..61b0013f685 --- /dev/null +++ b/queue-4.9/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch @@ -0,0 +1,104 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Eric Dumazet +Date: Sat, 21 Oct 2017 12:26:23 -0700 +Subject: ipv6: flowlabel: do not leave opt->tot_len with garbage + +From: Eric Dumazet + + +[ Upstream commit 864e2a1f8aac05effac6063ce316b480facb46ff ] + +When syzkaller team brought us a C repro for the crash [1] that +had been reported many times in the past, I finally could find +the root cause. + +If FlowLabel info is merged by fl6_merge_options(), we leave +part of the opt_space storage provided by udp/raw/l2tp with random value +in opt_space.tot_len, unless a control message was provided at sendmsg() +time. + +Then ip6_setup_cork() would use this random value to perform a kzalloc() +call. Undefined behavior and crashes. + +Fix is to properly set tot_len in fl6_merge_options() + +At the same time, we can also avoid consuming memory and cpu cycles +to clear it, if every option is copied via a kmemdup(). This is the +change in ip6_setup_cork(). + +[1] +kasan: CONFIG_KASAN_INLINE enabled +kasan: GPF could be caused by NULL-ptr deref or user memory access +general protection fault: 0000 [#1] SMP KASAN +Dumping ftrace buffer: + (ftrace buffer empty) +Modules linked in: +CPU: 0 PID: 6613 Comm: syz-executor0 Not tainted 4.14.0-rc4+ #127 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +task: ffff8801cb64a100 task.stack: ffff8801cc350000 +RIP: 0010:ip6_setup_cork+0x274/0x15c0 net/ipv6/ip6_output.c:1168 +RSP: 0018:ffff8801cc357550 EFLAGS: 00010203 +RAX: dffffc0000000000 RBX: ffff8801cc357748 RCX: 0000000000000010 +RDX: 0000000000000002 RSI: ffffffff842bd1d9 RDI: 0000000000000014 +RBP: ffff8801cc357620 R08: ffff8801cb17f380 R09: ffff8801cc357b10 +R10: ffff8801cb64a100 R11: 0000000000000000 R12: ffff8801cc357ab0 +R13: ffff8801cc357b10 R14: 0000000000000000 R15: ffff8801c3bbf0c0 +FS: 00007f9c5c459700(0000) GS:ffff8801db200000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 0000000020324000 CR3: 00000001d1cf2000 CR4: 00000000001406f0 +DR0: 0000000020001010 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 +Call Trace: + ip6_make_skb+0x282/0x530 net/ipv6/ip6_output.c:1729 + udpv6_sendmsg+0x2769/0x3380 net/ipv6/udp.c:1340 + inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762 + sock_sendmsg_nosec net/socket.c:633 [inline] + sock_sendmsg+0xca/0x110 net/socket.c:643 + SYSC_sendto+0x358/0x5a0 net/socket.c:1750 + SyS_sendto+0x40/0x50 net/socket.c:1718 + entry_SYSCALL_64_fastpath+0x1f/0xbe +RIP: 0033:0x4520a9 +RSP: 002b:00007f9c5c458c08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c +RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 00000000004520a9 +RDX: 0000000000000001 RSI: 0000000020fd1000 RDI: 0000000000000016 +RBP: 0000000000000086 R08: 0000000020e0afe4 R09: 000000000000001c +R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004bb1ee +R13: 00000000ffffffff R14: 0000000000000016 R15: 0000000000000029 +Code: e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 ea 0f 00 00 48 8d 79 04 48 b8 00 00 00 00 00 fc ff df 45 8b 74 24 04 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 +RIP: ip6_setup_cork+0x274/0x15c0 net/ipv6/ip6_output.c:1168 RSP: ffff8801cc357550 + +Signed-off-by: Eric Dumazet +Reported-by: Dmitry Vyukov +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv6/ip6_flowlabel.c | 1 + + net/ipv6/ip6_output.c | 4 ++-- + 2 files changed, 3 insertions(+), 2 deletions(-) + +--- a/net/ipv6/ip6_flowlabel.c ++++ b/net/ipv6/ip6_flowlabel.c +@@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options + } + opt_space->dst1opt = fopt->dst1opt; + opt_space->opt_flen = fopt->opt_flen; ++ opt_space->tot_len = fopt->tot_len; + return opt_space; + } + EXPORT_SYMBOL_GPL(fl6_merge_options); +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1215,11 +1215,11 @@ static int ip6_setup_cork(struct sock *s + if (WARN_ON(v6_cork->opt)) + return -EINVAL; + +- v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); ++ v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); + if (unlikely(!v6_cork->opt)) + return -ENOBUFS; + +- v6_cork->opt->tot_len = opt->tot_len; ++ v6_cork->opt->tot_len = sizeof(*opt); + v6_cork->opt->opt_flen = opt->opt_flen; + v6_cork->opt->opt_nflen = opt->opt_nflen; + diff --git a/queue-4.9/l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch b/queue-4.9/l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch new file mode 100644 index 00000000000..1f45649ad81 --- /dev/null +++ b/queue-4.9/l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch @@ -0,0 +1,36 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Guillaume Nault +Date: Fri, 13 Oct 2017 19:22:35 +0200 +Subject: l2tp: check ps->sock before running pppol2tp_session_ioctl() + +From: Guillaume Nault + + +[ Upstream commit 5903f594935a3841137c86b9d5b75143a5b7121c ] + +When pppol2tp_session_ioctl() is called by pppol2tp_tunnel_ioctl(), +the session may be unconnected. That is, it was created by +pppol2tp_session_create() and hasn't been connected with +pppol2tp_connect(). In this case, ps->sock is NULL, so we need to check +for this case in order to avoid dereferencing a NULL pointer. + +Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP") +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/l2tp/l2tp_ppp.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/net/l2tp/l2tp_ppp.c ++++ b/net/l2tp/l2tp_ppp.c +@@ -993,6 +993,9 @@ static int pppol2tp_session_ioctl(struct + session->name, cmd, arg); + + sk = ps->sock; ++ if (!sk) ++ return -EBADR; ++ + sock_hold(sk); + + switch (cmd) { diff --git a/queue-4.9/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch b/queue-4.9/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch new file mode 100644 index 00000000000..a0775f9f62d --- /dev/null +++ b/queue-4.9/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch @@ -0,0 +1,44 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Eric Dumazet +Date: Tue, 10 Oct 2017 19:12:33 -0700 +Subject: net: call cgroup_sk_alloc() earlier in sk_clone_lock() + +From: Eric Dumazet + + +[ Upstream commit c0576e3975084d4699b7bfef578613fb8e1144f6 ] + +If for some reason, the newly allocated child need to be freed, +we will call cgroup_put() (via sk_free_unlock_clone()) while the +corresponding cgroup_get() was not yet done, and we will free memory +too soon. + +Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets") +Signed-off-by: Eric Dumazet +Cc: Johannes Weiner +Cc: Tejun Heo +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +--- a/net/core/sock.c ++++ b/net/core/sock.c +@@ -1526,6 +1526,7 @@ struct sock *sk_clone_lock(const struct + newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; + + sock_reset_flag(newsk, SOCK_DONE); ++ cgroup_sk_alloc(&newsk->sk_cgrp_data); + skb_queue_head_init(&newsk->sk_error_queue); + + filter = rcu_dereference_protected(newsk->sk_filter, 1); +@@ -1560,8 +1561,6 @@ struct sock *sk_clone_lock(const struct + atomic64_set(&newsk->sk_cookie, 0); + + mem_cgroup_sk_alloc(newsk); +- cgroup_sk_alloc(&newsk->sk_cgrp_data); +- + /* + * Before updating sk_refcnt, we must commit prior changes to memory + * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/queue-4.9/net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch b/queue-4.9/net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch new file mode 100644 index 00000000000..eed98ef4f02 --- /dev/null +++ b/queue-4.9/net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch @@ -0,0 +1,39 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Andrei Vagin +Date: Wed, 25 Oct 2017 10:16:42 -0700 +Subject: net/unix: don't show information about sockets from other namespaces + +From: Andrei Vagin + + +[ Upstream commit 0f5da659d8f1810f44de14acf2c80cd6499623a0 ] + +socket_diag shows information only about sockets from a namespace where +a diag socket lives. + +But if we request information about one unix socket, the kernel don't +check that its netns is matched with a diag socket namespace, so any +user can get information about any unix socket in a system. This looks +like a bug. + +v2: add a Fixes tag + +Fixes: 51d7cccf0723 ("net: make sock diag per-namespace") +Signed-off-by: Andrei Vagin +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/unix/diag.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/unix/diag.c ++++ b/net/unix/diag.c +@@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk + err = -ENOENT; + if (sk == NULL) + goto out_nosk; ++ if (!net_eq(sock_net(sk), net)) ++ goto out; + + err = sock_diag_check_cookie(sk, req->udiag_cookie); + if (err) diff --git a/queue-4.9/net_sched-avoid-matching-qdisc-with-zero-handle.patch b/queue-4.9/net_sched-avoid-matching-qdisc-with-zero-handle.patch new file mode 100644 index 00000000000..4bf7b236b4c --- /dev/null +++ b/queue-4.9/net_sched-avoid-matching-qdisc-with-zero-handle.patch @@ -0,0 +1,49 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Cong Wang +Date: Fri, 27 Oct 2017 22:08:56 -0700 +Subject: net_sched: avoid matching qdisc with zero handle + +From: Cong Wang + + +[ Upstream commit 50317fce2cc70a2bbbc4b42c31bbad510382a53c ] + +Davide found the following script triggers a NULL pointer +dereference: + +ip l a name eth0 type dummy +tc q a dev eth0 parent :1 handle 1: htb + +This is because for a freshly created netdevice noop_qdisc +is attached and when passing 'parent :1', kernel actually +tries to match the major handle which is 0 and noop_qdisc +has handle 0 so is matched by mistake. Commit 69012ae425d7 +tries to fix a similar bug but still misses this case. + +Handle 0 is not a valid one, should be just skipped. In +fact, kernel uses it as TC_H_UNSPEC. + +Fixes: 69012ae425d7 ("net: sched: fix handling of singleton qdiscs with qdisc_hash") +Fixes: 59cc1f61f09c ("net: sched:convert qdisc linked list to hashtable") +Reported-by: Davide Caratti +Cc: Jiri Kosina +Cc: Eric Dumazet +Cc: Jamal Hadi Salim +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sched/sch_api.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/net/sched/sch_api.c ++++ b/net/sched/sch_api.c +@@ -296,6 +296,8 @@ struct Qdisc *qdisc_lookup(struct net_de + { + struct Qdisc *q; + ++ if (!handle) ++ return NULL; + q = qdisc_match_from_root(dev->qdisc, handle); + if (q) + goto out; diff --git a/queue-4.9/netlink-do-not-set-cb_running-if-dump-s-start-errs.patch b/queue-4.9/netlink-do-not-set-cb_running-if-dump-s-start-errs.patch new file mode 100644 index 00000000000..77f77768b4e --- /dev/null +++ b/queue-4.9/netlink-do-not-set-cb_running-if-dump-s-start-errs.patch @@ -0,0 +1,61 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: "Jason A. Donenfeld" +Date: Mon, 9 Oct 2017 14:14:51 +0200 +Subject: netlink: do not set cb_running if dump's start() errs + +From: "Jason A. Donenfeld" + + +[ Upstream commit 41c87425a1ac9b633e0fcc78eb1f19640c8fb5a0 ] + +It turns out that multiple places can call netlink_dump(), which means +it's still possible to dereference partially initialized values in +dump() that were the result of a faulty returned start(). + +This fixes the issue by calling start() _before_ setting cb_running to +true, so that there's no chance at all of hitting the dump() function +through any indirect paths. + +It also moves the call to start() to be when the mutex is held. This has +the nice side effect of serializing invocations to start(), which is +likely desirable anyway. It also prevents any possible other races that +might come out of this logic. + +In testing this with several different pieces of tricky code to trigger +these issues, this commit fixes all avenues that I'm aware of. + +Signed-off-by: Jason A. Donenfeld +Cc: Johannes Berg +Reviewed-by: Johannes Berg +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/netlink/af_netlink.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +--- a/net/netlink/af_netlink.c ++++ b/net/netlink/af_netlink.c +@@ -2207,16 +2207,17 @@ int __netlink_dump_start(struct sock *ss + cb->min_dump_alloc = control->min_dump_alloc; + cb->skb = skb; + ++ if (cb->start) { ++ ret = cb->start(cb); ++ if (ret) ++ goto error_unlock; ++ } ++ + nlk->cb_running = true; + + mutex_unlock(nlk->cb_mutex); + +- ret = 0; +- if (cb->start) +- ret = cb->start(cb); +- +- if (!ret) +- ret = netlink_dump(sk); ++ ret = netlink_dump(sk); + + sock_put(sk); + diff --git a/queue-4.9/packet-avoid-panic-in-packet_getsockopt.patch b/queue-4.9/packet-avoid-panic-in-packet_getsockopt.patch new file mode 100644 index 00000000000..ff8c826d31b --- /dev/null +++ b/queue-4.9/packet-avoid-panic-in-packet_getsockopt.patch @@ -0,0 +1,86 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Eric Dumazet +Date: Wed, 18 Oct 2017 16:14:52 -0700 +Subject: packet: avoid panic in packet_getsockopt() + +From: Eric Dumazet + + +[ Upstream commit 509c7a1ecc8601f94ffba8a00889fefb239c00c6 ] + +syzkaller got crashes in packet_getsockopt() processing +PACKET_ROLLOVER_STATS command while another thread was managing +to change po->rollover + +Using RCU will fix this bug. We might later add proper RCU annotations +for sparse sake. + +In v2: I replaced kfree(rollover) in fanout_add() to kfree_rcu() +variant, as spotted by John. + +Fixes: a9b6391814d5 ("packet: rollover statistics") +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Cc: John Sperbeck +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/packet/af_packet.c | 24 ++++++++++++++++-------- + 1 file changed, 16 insertions(+), 8 deletions(-) + +--- a/net/packet/af_packet.c ++++ b/net/packet/af_packet.c +@@ -1720,7 +1720,7 @@ static int fanout_add(struct sock *sk, u + + out: + if (err && rollover) { +- kfree(rollover); ++ kfree_rcu(rollover, rcu); + po->rollover = NULL; + } + mutex_unlock(&fanout_mutex); +@@ -1747,8 +1747,10 @@ static struct packet_fanout *fanout_rele + else + f = NULL; + +- if (po->rollover) ++ if (po->rollover) { + kfree_rcu(po->rollover, rcu); ++ po->rollover = NULL; ++ } + } + mutex_unlock(&fanout_mutex); + +@@ -3851,6 +3853,7 @@ static int packet_getsockopt(struct sock + void *data = &val; + union tpacket_stats_u st; + struct tpacket_rollover_stats rstats; ++ struct packet_rollover *rollover; + + if (level != SOL_PACKET) + return -ENOPROTOOPT; +@@ -3929,13 +3932,18 @@ static int packet_getsockopt(struct sock + 0); + break; + case PACKET_ROLLOVER_STATS: +- if (!po->rollover) ++ rcu_read_lock(); ++ rollover = rcu_dereference(po->rollover); ++ if (rollover) { ++ rstats.tp_all = atomic_long_read(&rollover->num); ++ rstats.tp_huge = atomic_long_read(&rollover->num_huge); ++ rstats.tp_failed = atomic_long_read(&rollover->num_failed); ++ data = &rstats; ++ lv = sizeof(rstats); ++ } ++ rcu_read_unlock(); ++ if (!rollover) + return -EINVAL; +- rstats.tp_all = atomic_long_read(&po->rollover->num); +- rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); +- rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); +- data = &rstats; +- lv = sizeof(rstats); + break; + case PACKET_TX_HAS_OFF: + val = po->tp_tx_has_off; diff --git a/queue-4.9/ppp-fix-race-in-ppp-device-destruction.patch b/queue-4.9/ppp-fix-race-in-ppp-device-destruction.patch new file mode 100644 index 00000000000..eb7923f12c2 --- /dev/null +++ b/queue-4.9/ppp-fix-race-in-ppp-device-destruction.patch @@ -0,0 +1,113 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Guillaume Nault +Date: Fri, 6 Oct 2017 17:05:49 +0200 +Subject: ppp: fix race in ppp device destruction + +From: Guillaume Nault + + +[ Upstream commit 6151b8b37b119e8e3a8401b080d532520c95faf4 ] + +ppp_release() tries to ensure that netdevices are unregistered before +decrementing the unit refcount and running ppp_destroy_interface(). + +This is all fine as long as the the device is unregistered by +ppp_release(): the unregister_netdevice() call, followed by +rtnl_unlock(), guarantee that the unregistration process completes +before rtnl_unlock() returns. + +However, the device may be unregistered by other means (like +ppp_nl_dellink()). If this happens right before ppp_release() calling +rtnl_lock(), then ppp_release() has to wait for the concurrent +unregistration code to release the lock. +But rtnl_unlock() releases the lock before completing the device +unregistration process. This allows ppp_release() to proceed and +eventually call ppp_destroy_interface() before the unregistration +process completes. Calling free_netdev() on this partially unregistered +device will BUG(): + + ------------[ cut here ]------------ + kernel BUG at net/core/dev.c:8141! + invalid opcode: 0000 [#1] SMP + + CPU: 1 PID: 1557 Comm: pppd Not tainted 4.14.0-rc2+ #4 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014 + + Call Trace: + ppp_destroy_interface+0xd8/0xe0 [ppp_generic] + ppp_disconnect_channel+0xda/0x110 [ppp_generic] + ppp_unregister_channel+0x5e/0x110 [ppp_generic] + pppox_unbind_sock+0x23/0x30 [pppox] + pppoe_connect+0x130/0x440 [pppoe] + SYSC_connect+0x98/0x110 + ? do_fcntl+0x2c0/0x5d0 + SyS_connect+0xe/0x10 + entry_SYSCALL_64_fastpath+0x1a/0xa5 + + RIP: free_netdev+0x107/0x110 RSP: ffffc28a40573d88 + ---[ end trace ed294ff0cc40eeff ]--- + +We could set the ->needs_free_netdev flag on PPP devices and move the +ppp_destroy_interface() logic in the ->priv_destructor() callback. But +that'd be quite intrusive as we'd first need to unlink from the other +channels and units that depend on the device (the ones that used the +PPPIOCCONNECT and PPPIOCATTACH ioctls). + +Instead, we can just let the netdevice hold a reference on its +ppp_file. This reference is dropped in ->priv_destructor(), at the very +end of the unregistration process, so that neither ppp_release() nor +ppp_disconnect_channel() can call ppp_destroy_interface() in the interim. + +Reported-by: Beniamino Galvani +Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion") +Signed-off-by: Guillaume Nault +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/ppp/ppp_generic.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +--- a/drivers/net/ppp/ppp_generic.c ++++ b/drivers/net/ppp/ppp_generic.c +@@ -1338,7 +1338,17 @@ ppp_get_stats64(struct net_device *dev, + + static int ppp_dev_init(struct net_device *dev) + { ++ struct ppp *ppp; ++ + netdev_lockdep_set_classes(dev); ++ ++ ppp = netdev_priv(dev); ++ /* Let the netdevice take a reference on the ppp file. This ensures ++ * that ppp_destroy_interface() won't run before the device gets ++ * unregistered. ++ */ ++ atomic_inc(&ppp->file.refcnt); ++ + return 0; + } + +@@ -1361,6 +1371,15 @@ static void ppp_dev_uninit(struct net_de + wake_up_interruptible(&ppp->file.rwait); + } + ++static void ppp_dev_priv_destructor(struct net_device *dev) ++{ ++ struct ppp *ppp; ++ ++ ppp = netdev_priv(dev); ++ if (atomic_dec_and_test(&ppp->file.refcnt)) ++ ppp_destroy_interface(ppp); ++} ++ + static const struct net_device_ops ppp_netdev_ops = { + .ndo_init = ppp_dev_init, + .ndo_uninit = ppp_dev_uninit, +@@ -1386,6 +1405,7 @@ static void ppp_setup(struct net_device + dev->tx_queue_len = 3; + dev->type = ARPHRD_PPP; + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; ++ dev->destructor = ppp_dev_priv_destructor; + netif_keep_dst(dev); + } + diff --git a/queue-4.9/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch b/queue-4.9/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch new file mode 100644 index 00000000000..5028b92c5c7 --- /dev/null +++ b/queue-4.9/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch @@ -0,0 +1,48 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Xin Long +Date: Wed, 18 Oct 2017 21:37:49 +0800 +Subject: sctp: add the missing sock_owned_by_user check in sctp_icmp_redirect + +From: Xin Long + + +[ Upstream commit 1cc276cec9ec574d41cf47dfc0f51406b6f26ab4 ] + +Now sctp processes icmp redirect packet in sctp_icmp_redirect where +it calls sctp_transport_dst_check in which tp->dst can be released. + +The problem is before calling sctp_transport_dst_check, it doesn't +check sock_owned_by_user, which means tp->dst could be freed while +a process is accessing it with owning the socket. + +An use-after-free issue could be triggered by this. + +This patch is to fix it by checking sock_owned_by_user before calling +sctp_transport_dst_check in sctp_icmp_redirect, so that it would not +release tp->dst if users still hold sock lock. + +Besides, the same issue fixed in commit 45caeaa5ac0b ("dccp/tcp: fix +routing redirect race") on sctp also needs this check. + +Fixes: 55be7a9c6074 ("ipv4: Add redirect support to all protocol icmp error handlers") +Reported-by: Eric Dumazet +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Acked-by: Neil Horman +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/sctp/input.c ++++ b/net/sctp/input.c +@@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk, + { + struct dst_entry *dst; + +- if (!t) ++ if (sock_owned_by_user(sk) || !t) + return; + dst = sctp_transport_dst_check(t); + if (dst) diff --git a/queue-4.9/sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch b/queue-4.9/sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch new file mode 100644 index 00000000000..c090bfe3571 --- /dev/null +++ b/queue-4.9/sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch @@ -0,0 +1,55 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Laszlo Toth +Date: Mon, 23 Oct 2017 19:19:33 +0200 +Subject: sctp: full support for ipv6 ip_nonlocal_bind & IP_FREEBIND + +From: Laszlo Toth + + +[ Upstream commit b71d21c274eff20a9db8158882b545b141b73ab8 ] + +Commit 9b9742022888 ("sctp: support ipv6 nonlocal bind") +introduced support for the above options as v4 sctp did, +so patched sctp_v6_available(). + +In the v4 implementation it's enough, because +sctp_inet_bind_verify() just returns with sctp_v4_available(). +However sctp_inet6_bind_verify() has an extra check before that +for link-local scope_id, which won't respect the above options. + +Added the checks before calling ipv6_chk_addr(), but +not before the validation of scope_id. + +before (w/ both options): + ./v6test fe80::10 sctp + bind failed, errno: 99 (Cannot assign requested address) + ./v6test fe80::10 tcp + bind success, errno: 0 (Success) + +after (w/ both options): + ./v6test fe80::10 sctp + bind success, errno: 0 (Success) + +Signed-off-by: Laszlo Toth +Reviewed-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/ipv6.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +--- a/net/sctp/ipv6.c ++++ b/net/sctp/ipv6.c +@@ -881,8 +881,10 @@ static int sctp_inet6_bind_verify(struct + net = sock_net(&opt->inet.sk); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id); +- if (!dev || +- !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) { ++ if (!dev || !(opt->inet.freebind || ++ net->ipv6.sysctl.ip_nonlocal_bind || ++ ipv6_chk_addr(net, &addr->v6.sin6_addr, ++ dev, 0))) { + rcu_read_unlock(); + return 0; + } diff --git a/queue-4.9/sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch b/queue-4.9/sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch new file mode 100644 index 00000000000..60f863c900a --- /dev/null +++ b/queue-4.9/sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch @@ -0,0 +1,100 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Xin Long +Date: Sat, 28 Oct 2017 02:13:29 +0800 +Subject: sctp: reset owner sk for data chunks on out queues when migrating a sock + +From: Xin Long + + +[ Upstream commit d04adf1b355181e737b6b1e23d801b07f0b7c4c0 ] + +Now when migrating sock to another one in sctp_sock_migrate(), it only +resets owner sk for the data in receive queues, not the chunks on out +queues. + +It would cause that data chunks length on the sock is not consistent +with sk sk_wmem_alloc. When closing the sock or freeing these chunks, +the old sk would never be freed, and the new sock may crash due to +the overflow sk_wmem_alloc. + +syzbot found this issue with this series: + + r0 = socket$inet_sctp() + sendto$inet(r0) + listen(r0) + accept4(r0) + close(r0) + +Although listen() should have returned error when one TCP-style socket +is in connecting (I may fix this one in another patch), it could also +be reproduced by peeling off an assoc. + +This issue is there since very beginning. + +This patch is to reset owner sk for the chunks on out queues so that +sk sk_wmem_alloc has correct value after accept one sock or peeloff +an assoc to one sock. + +Note that when resetting owner sk for chunks on outqueue, it has to +sctp_clear_owner_w/skb_orphan chunks before changing assoc->base.sk +first and then sctp_set_owner_w them after changing assoc->base.sk, +due to that sctp_wfree and it's callees are using assoc->base.sk. + +Reported-by: Dmitry Vyukov +Signed-off-by: Xin Long +Acked-by: Marcelo Ricardo Leitner +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/sctp/socket.c | 32 ++++++++++++++++++++++++++++++++ + 1 file changed, 32 insertions(+) + +--- a/net/sctp/socket.c ++++ b/net/sctp/socket.c +@@ -168,6 +168,36 @@ static inline void sctp_set_owner_w(stru + sk_mem_charge(sk, chunk->skb->truesize); + } + ++static void sctp_clear_owner_w(struct sctp_chunk *chunk) ++{ ++ skb_orphan(chunk->skb); ++} ++ ++static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, ++ void (*cb)(struct sctp_chunk *)) ++ ++{ ++ struct sctp_outq *q = &asoc->outqueue; ++ struct sctp_transport *t; ++ struct sctp_chunk *chunk; ++ ++ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) ++ list_for_each_entry(chunk, &t->transmitted, transmitted_list) ++ cb(chunk); ++ ++ list_for_each_entry(chunk, &q->retransmit, list) ++ cb(chunk); ++ ++ list_for_each_entry(chunk, &q->sacked, list) ++ cb(chunk); ++ ++ list_for_each_entry(chunk, &q->abandoned, list) ++ cb(chunk); ++ ++ list_for_each_entry(chunk, &q->out_chunk_list, list) ++ cb(chunk); ++} ++ + /* Verify that this is a valid address. */ + static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, + int len) +@@ -7826,7 +7856,9 @@ static void sctp_sock_migrate(struct soc + * paths won't try to lock it and then oldsk. + */ + lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); ++ sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w); + sctp_assoc_migrate(assoc, newsk); ++ sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w); + + /* If the association on the newsk is already closed before accept() + * is called, set RCV_SHUTDOWN flag. diff --git a/queue-4.9/series b/queue-4.9/series new file mode 100644 index 00000000000..bf4e3af695e --- /dev/null +++ b/queue-4.9/series @@ -0,0 +1,25 @@ +gso-fix-payload-length-when-gso_size-is-zero.patch +tun-tap-sanitize-tunsetsndbuf-input.patch +ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch +netlink-do-not-set-cb_running-if-dump-s-start-errs.patch +net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch +tcp-fix-tcp_mtu_probe-vs-highest_sack.patch +l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch +tun-call-dev_get_valid_name-before-register_netdevice.patch +sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch +tcp-dccp-fix-ireq-opt-races.patch +packet-avoid-panic-in-packet_getsockopt.patch +soreuseport-fix-initialization-race.patch +ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch +sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch +tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch +tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch +net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch +tap-double-free-in-error-path-in-tap_open.patch +ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch +ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch +ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch +tun-allow-positive-return-values-on-dev_get_valid_name-call.patch +sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch +net_sched-avoid-matching-qdisc-with-zero-handle.patch +ppp-fix-race-in-ppp-device-destruction.patch diff --git a/queue-4.9/soreuseport-fix-initialization-race.patch b/queue-4.9/soreuseport-fix-initialization-race.patch new file mode 100644 index 00000000000..4d4fbac8432 --- /dev/null +++ b/queue-4.9/soreuseport-fix-initialization-race.patch @@ -0,0 +1,91 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Craig Gallek +Date: Thu, 19 Oct 2017 15:00:29 -0400 +Subject: soreuseport: fix initialization race + +From: Craig Gallek + + +[ Upstream commit 1b5f962e71bfad6284574655c406597535c3ea7a ] + +Syzkaller stumbled upon a way to trigger +WARNING: CPU: 1 PID: 13881 at net/core/sock_reuseport.c:41 +reuseport_alloc+0x306/0x3b0 net/core/sock_reuseport.c:39 + +There are two initialization paths for the sock_reuseport structure in a +socket: Through the udp/tcp bind paths of SO_REUSEPORT sockets or through +SO_ATTACH_REUSEPORT_[CE]BPF before bind. The existing implementation +assumedthat the socket lock protected both of these paths when it actually +only protects the SO_ATTACH_REUSEPORT path. Syzkaller triggered this +double allocation by running these paths concurrently. + +This patch moves the check for double allocation into the reuseport_alloc +function which is protected by a global spin lock. + +Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") +Fixes: c125e80b8868 ("soreuseport: fast reuseport TCP socket selection") +Signed-off-by: Craig Gallek +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/core/sock_reuseport.c | 12 +++++++++--- + net/ipv4/inet_hashtables.c | 5 +---- + net/ipv4/udp.c | 5 +---- + 3 files changed, 11 insertions(+), 11 deletions(-) + +--- a/net/core/sock_reuseport.c ++++ b/net/core/sock_reuseport.c +@@ -36,9 +36,14 @@ int reuseport_alloc(struct sock *sk) + * soft irq of receive path or setsockopt from process context + */ + spin_lock_bh(&reuseport_lock); +- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, +- lockdep_is_held(&reuseport_lock)), +- "multiple allocations for the same socket"); ++ ++ /* Allocation attempts can occur concurrently via the setsockopt path ++ * and the bind/hash path. Nothing to do when we lose the race. ++ */ ++ if (rcu_dereference_protected(sk->sk_reuseport_cb, ++ lockdep_is_held(&reuseport_lock))) ++ goto out; ++ + reuse = __reuseport_alloc(INIT_SOCKS); + if (!reuse) { + spin_unlock_bh(&reuseport_lock); +@@ -49,6 +54,7 @@ int reuseport_alloc(struct sock *sk) + reuse->num_socks = 1; + rcu_assign_pointer(sk->sk_reuseport_cb, reuse); + ++out: + spin_unlock_bh(&reuseport_lock); + + return 0; +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -455,10 +455,7 @@ static int inet_reuseport_add_sock(struc + return reuseport_add_sock(sk, sk2); + } + +- /* Initial allocation may have already happened via setsockopt */ +- if (!rcu_access_pointer(sk->sk_reuseport_cb)) +- return reuseport_alloc(sk); +- return 0; ++ return reuseport_alloc(sk); + } + + int __inet_hash(struct sock *sk, struct sock *osk, +--- a/net/ipv4/udp.c ++++ b/net/ipv4/udp.c +@@ -222,10 +222,7 @@ static int udp_reuseport_add_sock(struct + } + } + +- /* Initial allocation may have already happened via setsockopt */ +- if (!rcu_access_pointer(sk->sk_reuseport_cb)) +- return reuseport_alloc(sk); +- return 0; ++ return reuseport_alloc(sk); + } + + /** diff --git a/queue-4.9/tap-double-free-in-error-path-in-tap_open.patch b/queue-4.9/tap-double-free-in-error-path-in-tap_open.patch new file mode 100644 index 00000000000..5162d1d4c18 --- /dev/null +++ b/queue-4.9/tap-double-free-in-error-path-in-tap_open.patch @@ -0,0 +1,66 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Girish Moodalbail +Date: Wed, 25 Oct 2017 00:23:04 -0700 +Subject: tap: double-free in error path in tap_open() + +From: Girish Moodalbail + + +[ Upstream commit 78e0ea6791d7baafb8a0ca82b1bd0c7b3453c919 ] + +Double free of skb_array in tap module is causing kernel panic. When +tap_set_queue() fails we free skb_array right away by calling +skb_array_cleanup(). However, later on skb_array_cleanup() is called +again by tap_sock_destruct through sock_put(). This patch fixes that +issue. + +Fixes: 362899b8725b35e3 (macvtap: switch to use skb array) +Signed-off-by: Girish Moodalbail +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvtap.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -559,6 +559,10 @@ static int macvtap_open(struct inode *in + &macvtap_proto, 0); + if (!q) + goto err; ++ if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL)) { ++ sk_free(&q->sk); ++ goto err; ++ } + + RCU_INIT_POINTER(q->sock.wq, &q->wq); + init_waitqueue_head(&q->wq.wait); +@@ -582,22 +586,18 @@ static int macvtap_open(struct inode *in + if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG)) + sock_set_flag(&q->sk, SOCK_ZEROCOPY); + +- err = -ENOMEM; +- if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL)) +- goto err_array; +- + err = macvtap_set_queue(dev, file, q); +- if (err) +- goto err_queue; ++ if (err) { ++ /* macvtap_sock_destruct() will take care of freeing skb_array */ ++ goto err_put; ++ } + + dev_put(dev); + + rtnl_unlock(); + return err; + +-err_queue: +- skb_array_cleanup(&q->skb_array); +-err_array: ++err_put: + sock_put(&q->sk); + err: + if (dev) diff --git a/queue-4.9/tcp-dccp-fix-ireq-opt-races.patch b/queue-4.9/tcp-dccp-fix-ireq-opt-races.patch new file mode 100644 index 00000000000..47418a0aa4d --- /dev/null +++ b/queue-4.9/tcp-dccp-fix-ireq-opt-races.patch @@ -0,0 +1,408 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Eric Dumazet +Date: Fri, 20 Oct 2017 09:04:13 -0700 +Subject: tcp/dccp: fix ireq->opt races + +From: Eric Dumazet + + +[ Upstream commit c92e8c02fe664155ac4234516e32544bec0f113d ] + +syzkaller found another bug in DCCP/TCP stacks [1] + +For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix +ireq->pktopts race"), we need to make sure we do not access +ireq->opt unless we own the request sock. + +Note the opt field is renamed to ireq_opt to ease grep games. + +[1] +BUG: KASAN: use-after-free in ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474 +Read of size 1 at addr ffff8801c951039c by task syz-executor5/3295 + +CPU: 1 PID: 3295 Comm: syz-executor5 Not tainted 4.14.0-rc4+ #80 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + print_address_description+0x73/0x250 mm/kasan/report.c:252 + kasan_report_error mm/kasan/report.c:351 [inline] + kasan_report+0x25b/0x340 mm/kasan/report.c:409 + __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:427 + ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474 + tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1135 + tcp_send_ack.part.37+0x3bb/0x650 net/ipv4/tcp_output.c:3587 + tcp_send_ack+0x49/0x60 net/ipv4/tcp_output.c:3557 + __tcp_ack_snd_check+0x2c6/0x4b0 net/ipv4/tcp_input.c:5072 + tcp_ack_snd_check net/ipv4/tcp_input.c:5085 [inline] + tcp_rcv_state_process+0x2eff/0x4850 net/ipv4/tcp_input.c:6071 + tcp_child_process+0x342/0x990 net/ipv4/tcp_minisocks.c:816 + tcp_v4_rcv+0x1827/0x2f80 net/ipv4/tcp_ipv4.c:1682 + ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 + dst_input include/net/dst.h:464 [inline] + ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 + __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 + __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 + netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 + netif_receive_skb+0xae/0x390 net/core/dev.c:4611 + tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 + tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 + tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 + call_write_iter include/linux/fs.h:1770 [inline] + new_sync_write fs/read_write.c:468 [inline] + __vfs_write+0x68a/0x970 fs/read_write.c:481 + vfs_write+0x18f/0x510 fs/read_write.c:543 + SYSC_write fs/read_write.c:588 [inline] + SyS_write+0xef/0x220 fs/read_write.c:580 + entry_SYSCALL_64_fastpath+0x1f/0xbe +RIP: 0033:0x40c341 +RSP: 002b:00007f469523ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 +RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 000000000040c341 +RDX: 0000000000000037 RSI: 0000000020004000 RDI: 0000000000000015 +RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 +R10: 00000000000f4240 R11: 0000000000000293 R12: 00000000004b7fd1 +R13: 00000000ffffffff R14: 0000000020000000 R15: 0000000000025000 + +Allocated by task 3295: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 + __do_kmalloc mm/slab.c:3725 [inline] + __kmalloc+0x162/0x760 mm/slab.c:3734 + kmalloc include/linux/slab.h:498 [inline] + tcp_v4_save_options include/net/tcp.h:1962 [inline] + tcp_v4_init_req+0x2d3/0x3e0 net/ipv4/tcp_ipv4.c:1271 + tcp_conn_request+0xf6d/0x3410 net/ipv4/tcp_input.c:6283 + tcp_v4_conn_request+0x157/0x210 net/ipv4/tcp_ipv4.c:1313 + tcp_rcv_state_process+0x8ea/0x4850 net/ipv4/tcp_input.c:5857 + tcp_v4_do_rcv+0x55c/0x7d0 net/ipv4/tcp_ipv4.c:1482 + tcp_v4_rcv+0x2d10/0x2f80 net/ipv4/tcp_ipv4.c:1711 + ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 + dst_input include/net/dst.h:464 [inline] + ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 + __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 + __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 + netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 + netif_receive_skb+0xae/0x390 net/core/dev.c:4611 + tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 + tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 + tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 + call_write_iter include/linux/fs.h:1770 [inline] + new_sync_write fs/read_write.c:468 [inline] + __vfs_write+0x68a/0x970 fs/read_write.c:481 + vfs_write+0x18f/0x510 fs/read_write.c:543 + SYSC_write fs/read_write.c:588 [inline] + SyS_write+0xef/0x220 fs/read_write.c:580 + entry_SYSCALL_64_fastpath+0x1f/0xbe + +Freed by task 3306: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 + __cache_free mm/slab.c:3503 [inline] + kfree+0xca/0x250 mm/slab.c:3820 + inet_sock_destruct+0x59d/0x950 net/ipv4/af_inet.c:157 + __sk_destruct+0xfd/0x910 net/core/sock.c:1560 + sk_destruct+0x47/0x80 net/core/sock.c:1595 + __sk_free+0x57/0x230 net/core/sock.c:1603 + sk_free+0x2a/0x40 net/core/sock.c:1614 + sock_put include/net/sock.h:1652 [inline] + inet_csk_complete_hashdance+0xd5/0xf0 net/ipv4/inet_connection_sock.c:959 + tcp_check_req+0xf4d/0x1620 net/ipv4/tcp_minisocks.c:765 + tcp_v4_rcv+0x17f6/0x2f80 net/ipv4/tcp_ipv4.c:1675 + ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 + dst_input include/net/dst.h:464 [inline] + ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 + __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 + __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 + netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 + netif_receive_skb+0xae/0x390 net/core/dev.c:4611 + tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 + tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 + tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 + call_write_iter include/linux/fs.h:1770 [inline] + new_sync_write fs/read_write.c:468 [inline] + __vfs_write+0x68a/0x970 fs/read_write.c:481 + vfs_write+0x18f/0x510 fs/read_write.c:543 + SYSC_write fs/read_write.c:588 [inline] + SyS_write+0xef/0x220 fs/read_write.c:580 + entry_SYSCALL_64_fastpath+0x1f/0xbe + +Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") +Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_sock.h | 2 +- + net/dccp/ipv4.c | 13 ++++++++----- + net/ipv4/cipso_ipv4.c | 24 +++++++----------------- + net/ipv4/inet_connection_sock.c | 8 +++----- + net/ipv4/syncookies.c | 2 +- + net/ipv4/tcp_input.c | 2 +- + net/ipv4/tcp_ipv4.c | 21 ++++++++++++--------- + 7 files changed, 33 insertions(+), 39 deletions(-) + +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -96,7 +96,7 @@ struct inet_request_sock { + kmemcheck_bitfield_end(flags); + u32 ir_mark; + union { +- struct ip_options_rcu *opt; ++ struct ip_options_rcu __rcu *ireq_opt; + #if IS_ENABLED(CONFIG_IPV6) + struct { + struct ipv6_txoptions *ipv6_opt; +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(c + sk_daddr_set(newsk, ireq->ir_rmt_addr); + sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); + newinet->inet_saddr = ireq->ir_loc_addr; +- newinet->inet_opt = ireq->opt; +- ireq->opt = NULL; ++ RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); + newinet->mc_index = inet_iif(skb); + newinet->mc_ttl = ip_hdr(skb)->ttl; + newinet->inet_id = jiffies; +@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(c + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); +- ++ if (*own_req) ++ ireq->ireq_opt = NULL; ++ else ++ newinet->inet_opt = NULL; + return newsk; + + exit_overflow: +@@ -441,6 +443,7 @@ exit: + __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); + return NULL; + put_and_exit: ++ newinet->inet_opt = NULL; + inet_csk_prepare_forced_close(newsk); + dccp_done(newsk); + goto exit; +@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const s + ireq->ir_rmt_addr); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- ireq->opt); ++ rcu_dereference(ireq->ireq_opt)); + err = net_xmit_eval(err); + } + +@@ -548,7 +551,7 @@ out: + static void dccp_v4_reqsk_destructor(struct request_sock *req) + { + dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); +- kfree(inet_rsk(req)->opt); ++ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); + } + + void dccp_syn_ack_timeout(const struct request_sock *req) +--- a/net/ipv4/cipso_ipv4.c ++++ b/net/ipv4/cipso_ipv4.c +@@ -1943,7 +1943,7 @@ int cipso_v4_req_setattr(struct request_ + buf = NULL; + + req_inet = inet_rsk(req); +- opt = xchg(&req_inet->opt, opt); ++ opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt); + if (opt) + kfree_rcu(opt, rcu); + +@@ -1965,11 +1965,13 @@ req_setattr_failure: + * values on failure. + * + */ +-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) ++static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) + { ++ struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1); + int hdr_delta = 0; +- struct ip_options_rcu *opt = *opt_ptr; + ++ if (!opt || opt->opt.cipso == 0) ++ return 0; + if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { + u8 cipso_len; + u8 cipso_off; +@@ -2031,14 +2033,10 @@ static int cipso_v4_delopt(struct ip_opt + */ + void cipso_v4_sock_delattr(struct sock *sk) + { +- int hdr_delta; +- struct ip_options_rcu *opt; + struct inet_sock *sk_inet; ++ int hdr_delta; + + sk_inet = inet_sk(sk); +- opt = rcu_dereference_protected(sk_inet->inet_opt, 1); +- if (!opt || opt->opt.cipso == 0) +- return; + + hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); + if (sk_inet->is_icsk && hdr_delta > 0) { +@@ -2058,15 +2056,7 @@ void cipso_v4_sock_delattr(struct sock * + */ + void cipso_v4_req_delattr(struct request_sock *req) + { +- struct ip_options_rcu *opt; +- struct inet_request_sock *req_inet; +- +- req_inet = inet_rsk(req); +- opt = req_inet->opt; +- if (!opt || opt->opt.cipso == 0) +- return; +- +- cipso_v4_delopt(&req_inet->opt); ++ cipso_v4_delopt(&inet_rsk(req)->ireq_opt); + } + + /** +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -407,9 +407,10 @@ struct dst_entry *inet_csk_route_req(con + { + const struct inet_request_sock *ireq = inet_rsk(req); + struct net *net = read_pnet(&ireq->ireq_net); +- struct ip_options_rcu *opt = ireq->opt; ++ struct ip_options_rcu *opt; + struct rtable *rt; + ++ opt = rcu_dereference(ireq->ireq_opt); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), +@@ -443,10 +444,9 @@ struct dst_entry *inet_csk_route_child_s + struct flowi4 *fl4; + struct rtable *rt; + ++ opt = rcu_dereference(ireq->ireq_opt); + fl4 = &newinet->cork.fl.u.ip4; + +- rcu_read_lock(); +- opt = rcu_dereference(newinet->inet_opt); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), +@@ -459,13 +459,11 @@ struct dst_entry *inet_csk_route_child_s + goto no_route; + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) + goto route_err; +- rcu_read_unlock(); + return &rt->dst; + + route_err: + ip_rt_put(rt); + no_route: +- rcu_read_unlock(); + __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); + return NULL; + } +--- a/net/ipv4/syncookies.c ++++ b/net/ipv4/syncookies.c +@@ -354,7 +354,7 @@ struct sock *cookie_v4_check(struct sock + /* We throwed the options of the initial SYN away, so we hope + * the ACK carries the same options again (see RFC1122 4.2.3.8) + */ +- ireq->opt = tcp_v4_save_options(skb); ++ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb)); + + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -6237,7 +6237,7 @@ struct request_sock *inet_reqsk_alloc(co + struct inet_request_sock *ireq = inet_rsk(req); + + kmemcheck_annotate_bitfield(ireq, flags); +- ireq->opt = NULL; ++ ireq->ireq_opt = NULL; + #if IS_ENABLED(CONFIG_IPV6) + ireq->pktopts = NULL; + #endif +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -861,7 +861,7 @@ static int tcp_v4_send_synack(const stru + + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- ireq->opt); ++ rcu_dereference(ireq->ireq_opt)); + err = net_xmit_eval(err); + } + +@@ -873,7 +873,7 @@ static int tcp_v4_send_synack(const stru + */ + static void tcp_v4_reqsk_destructor(struct request_sock *req) + { +- kfree(inet_rsk(req)->opt); ++ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); + } + + #ifdef CONFIG_TCP_MD5SIG +@@ -1199,7 +1199,7 @@ static void tcp_v4_init_req(struct reque + + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); + sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); +- ireq->opt = tcp_v4_save_options(skb); ++ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb)); + } + + static struct dst_entry *tcp_v4_route_req(const struct sock *sk, +@@ -1295,10 +1295,9 @@ struct sock *tcp_v4_syn_recv_sock(const + sk_daddr_set(newsk, ireq->ir_rmt_addr); + sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); + newsk->sk_bound_dev_if = ireq->ir_iif; +- newinet->inet_saddr = ireq->ir_loc_addr; +- inet_opt = ireq->opt; +- rcu_assign_pointer(newinet->inet_opt, inet_opt); +- ireq->opt = NULL; ++ newinet->inet_saddr = ireq->ir_loc_addr; ++ inet_opt = rcu_dereference(ireq->ireq_opt); ++ RCU_INIT_POINTER(newinet->inet_opt, inet_opt); + newinet->mc_index = inet_iif(skb); + newinet->mc_ttl = ip_hdr(skb)->ttl; + newinet->rcv_tos = ip_hdr(skb)->tos; +@@ -1346,9 +1345,12 @@ struct sock *tcp_v4_syn_recv_sock(const + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); +- if (*own_req) ++ if (likely(*own_req)) { + tcp_move_syn(newtp, req); +- ++ ireq->ireq_opt = NULL; ++ } else { ++ newinet->inet_opt = NULL; ++ } + return newsk; + + exit_overflow: +@@ -1359,6 +1361,7 @@ exit: + tcp_listendrop(sk); + return NULL; + put_and_exit: ++ newinet->inet_opt = NULL; + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); + goto exit; diff --git a/queue-4.9/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch b/queue-4.9/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch new file mode 100644 index 00000000000..fff7df2ee67 --- /dev/null +++ b/queue-4.9/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch @@ -0,0 +1,49 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Eric Dumazet +Date: Sun, 22 Oct 2017 12:33:57 -0700 +Subject: tcp/dccp: fix lockdep splat in inet_csk_route_req() + +From: Eric Dumazet + + +[ Upstream commit a6ca7abe53633d08eea1c6756cb49c9b2d4c90bf ] + +This patch fixes the following lockdep splat in inet_csk_route_req() + + lockdep_rcu_suspicious + inet_csk_route_req + tcp_v4_send_synack + tcp_rtx_synack + inet_rtx_syn_ack + tcp_fastopen_synack_time + tcp_retransmit_timer + tcp_write_timer_handler + tcp_write_timer + call_timer_fn + +Thread running inet_csk_route_req() owns a reference on the request +socket, so we have the guarantee ireq->ireq_opt wont be changed or +freed. + +lockdep can enforce this invariant for us. + +Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_connection_sock.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -410,7 +410,8 @@ struct dst_entry *inet_csk_route_req(con + struct ip_options_rcu *opt; + struct rtable *rt; + +- opt = rcu_dereference(ireq->ireq_opt); ++ opt = rcu_dereference_protected(ireq->ireq_opt, ++ atomic_read(&req->rsk_refcnt) > 0); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), diff --git a/queue-4.9/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch b/queue-4.9/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch new file mode 100644 index 00000000000..7a9ebd46a55 --- /dev/null +++ b/queue-4.9/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch @@ -0,0 +1,113 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Eric Dumazet +Date: Tue, 24 Oct 2017 08:20:31 -0700 +Subject: tcp/dccp: fix other lockdep splats accessing ireq_opt + +From: Eric Dumazet + + +[ Upstream commit 06f877d613be3621604c2520ec0351d9fbdca15f ] + +In my first attempt to fix the lockdep splat, I forgot we could +enter inet_csk_route_req() with a freshly allocated request socket, +for which refcount has not yet been elevated, due to complex +SLAB_TYPESAFE_BY_RCU rules. + +We either are in rcu_read_lock() section _or_ we own a refcount on the +request. + +Correct RCU verb to use here is rcu_dereference_check(), although it is +not possible to prove we actually own a reference on a shared +refcount :/ + +In v2, I added ireq_opt_deref() helper and use in three places, to fix other +possible splats. + +[ 49.844590] lockdep_rcu_suspicious+0xea/0xf3 +[ 49.846487] inet_csk_route_req+0x53/0x14d +[ 49.848334] tcp_v4_route_req+0xe/0x10 +[ 49.850174] tcp_conn_request+0x31c/0x6a0 +[ 49.851992] ? __lock_acquire+0x614/0x822 +[ 49.854015] tcp_v4_conn_request+0x5a/0x79 +[ 49.855957] ? tcp_v4_conn_request+0x5a/0x79 +[ 49.858052] tcp_rcv_state_process+0x98/0xdcc +[ 49.859990] ? sk_filter_trim_cap+0x2f6/0x307 +[ 49.862085] tcp_v4_do_rcv+0xfc/0x145 +[ 49.864055] ? tcp_v4_do_rcv+0xfc/0x145 +[ 49.866173] tcp_v4_rcv+0x5ab/0xaf9 +[ 49.868029] ip_local_deliver_finish+0x1af/0x2e7 +[ 49.870064] ip_local_deliver+0x1b2/0x1c5 +[ 49.871775] ? inet_del_offload+0x45/0x45 +[ 49.873916] ip_rcv_finish+0x3f7/0x471 +[ 49.875476] ip_rcv+0x3f1/0x42f +[ 49.876991] ? ip_local_deliver_finish+0x2e7/0x2e7 +[ 49.878791] __netif_receive_skb_core+0x6d3/0x950 +[ 49.880701] ? process_backlog+0x7e/0x216 +[ 49.882589] __netif_receive_skb+0x1d/0x5e +[ 49.884122] process_backlog+0x10c/0x216 +[ 49.885812] net_rx_action+0x147/0x3df + +Fixes: a6ca7abe53633 ("tcp/dccp: fix lockdep splat in inet_csk_route_req()") +Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races") +Signed-off-by: Eric Dumazet +Reported-by: kernel test robot +Reported-by: Maciej Żenczykowski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_sock.h | 6 ++++++ + net/dccp/ipv4.c | 2 +- + net/ipv4/inet_connection_sock.c | 4 ++-- + net/ipv4/tcp_ipv4.c | 2 +- + 4 files changed, 10 insertions(+), 4 deletions(-) + +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -132,6 +132,12 @@ static inline int inet_request_bound_dev + return sk->sk_bound_dev_if; + } + ++static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) ++{ ++ return rcu_dereference_check(ireq->ireq_opt, ++ atomic_read(&ireq->req.rsk_refcnt) > 0); ++} ++ + struct inet_cork { + unsigned int flags; + __be32 addr; +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -495,7 +495,7 @@ static int dccp_v4_send_response(const s + ireq->ir_rmt_addr); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- rcu_dereference(ireq->ireq_opt)); ++ ireq_opt_deref(ireq)); + err = net_xmit_eval(err); + } + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -410,8 +410,8 @@ struct dst_entry *inet_csk_route_req(con + struct ip_options_rcu *opt; + struct rtable *rt; + +- opt = rcu_dereference_protected(ireq->ireq_opt, +- atomic_read(&req->rsk_refcnt) > 0); ++ opt = ireq_opt_deref(ireq); ++ + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -861,7 +861,7 @@ static int tcp_v4_send_synack(const stru + + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- rcu_dereference(ireq->ireq_opt)); ++ ireq_opt_deref(ireq)); + err = net_xmit_eval(err); + } + diff --git a/queue-4.9/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch b/queue-4.9/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch new file mode 100644 index 00000000000..9d976e89eda --- /dev/null +++ b/queue-4.9/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch @@ -0,0 +1,81 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Eric Dumazet +Date: Mon, 30 Oct 2017 23:08:20 -0700 +Subject: tcp: fix tcp_mtu_probe() vs highest_sack + +From: Eric Dumazet + + +[ Upstream commit 2b7cda9c35d3b940eb9ce74b30bbd5eb30db493d ] + +Based on SNMP values provided by Roman, Yuchung made the observation +that some crashes in tcp_sacktag_walk() might be caused by MTU probing. + +Looking at tcp_mtu_probe(), I found that when a new skb was placed +in front of the write queue, we were not updating tcp highest sack. + +If one skb is freed because all its content was copied to the new skb +(for MTU probing), then tp->highest_sack could point to a now freed skb. + +Bad things would then happen, including infinite loops. + +This patch renames tcp_highest_sack_combine() and uses it +from tcp_mtu_probe() to fix the bug. + +Note that I also removed one test against tp->sacked_out, +since we want to replace tp->highest_sack regardless of whatever +condition, since keeping a stale pointer to freed skb is a recipe +for disaster. + +Fixes: a47e5a988a57 ("[TCP]: Convert highest_sack to sk_buff to allow direct access") +Signed-off-by: Eric Dumazet +Reported-by: Alexei Starovoitov +Reported-by: Roman Gushchin +Reported-by: Oleksandr Natalenko +Acked-by: Alexei Starovoitov +Acked-by: Neal Cardwell +Acked-by: Yuchung Cheng +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 6 +++--- + net/ipv4/tcp_output.c | 3 ++- + 2 files changed, 5 insertions(+), 4 deletions(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1681,12 +1681,12 @@ static inline void tcp_highest_sack_rese + tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk); + } + +-/* Called when old skb is about to be deleted (to be combined with new skb) */ +-static inline void tcp_highest_sack_combine(struct sock *sk, ++/* Called when old skb is about to be deleted and replaced by new skb */ ++static inline void tcp_highest_sack_replace(struct sock *sk, + struct sk_buff *old, + struct sk_buff *new) + { +- if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack)) ++ if (old == tcp_highest_sack(sk)) + tcp_sk(sk)->highest_sack = new; + } + +--- a/net/ipv4/tcp_output.c ++++ b/net/ipv4/tcp_output.c +@@ -1996,6 +1996,7 @@ static int tcp_mtu_probe(struct sock *sk + nskb->ip_summed = skb->ip_summed; + + tcp_insert_write_queue_before(nskb, skb, sk); ++ tcp_highest_sack_replace(sk, skb, nskb); + + len = 0; + tcp_for_write_queue_from_safe(skb, next, sk) { +@@ -2535,7 +2536,7 @@ static void tcp_collapse_retrans(struct + + BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); + +- tcp_highest_sack_combine(sk, next_skb, skb); ++ tcp_highest_sack_replace(sk, next_skb, skb); + + tcp_unlink_write_queue(next_skb, sk); + diff --git a/queue-4.9/tun-allow-positive-return-values-on-dev_get_valid_name-call.patch b/queue-4.9/tun-allow-positive-return-values-on-dev_get_valid_name-call.patch new file mode 100644 index 00000000000..a7b4e6b22ac --- /dev/null +++ b/queue-4.9/tun-allow-positive-return-values-on-dev_get_valid_name-call.patch @@ -0,0 +1,36 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Julien Gomes +Date: Wed, 25 Oct 2017 11:50:50 -0700 +Subject: tun: allow positive return values on dev_get_valid_name() call + +From: Julien Gomes + + +[ Upstream commit 5c25f65fd1e42685f7ccd80e0621829c105785d9 ] + +If the name argument of dev_get_valid_name() contains "%d", it will try +to assign it a unit number in __dev__alloc_name() and return either the +unit number (>= 0) or an error code (< 0). +Considering positive values as error values prevent tun device creations +relying this mechanism, therefor we should only consider negative values +as errors here. + +Signed-off-by: Julien Gomes +Acked-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1788,7 +1788,7 @@ static int tun_set_iff(struct net *net, + if (!dev) + return -ENOMEM; + err = dev_get_valid_name(net, dev, name); +- if (err) ++ if (err < 0) + goto err_free_dev; + + dev_net_set(dev, net); diff --git a/queue-4.9/tun-call-dev_get_valid_name-before-register_netdevice.patch b/queue-4.9/tun-call-dev_get_valid_name-before-register_netdevice.patch new file mode 100644 index 00000000000..9816331ccb4 --- /dev/null +++ b/queue-4.9/tun-call-dev_get_valid_name-before-register_netdevice.patch @@ -0,0 +1,82 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Cong Wang +Date: Fri, 13 Oct 2017 11:58:53 -0700 +Subject: tun: call dev_get_valid_name() before register_netdevice() + +From: Cong Wang + + +[ Upstream commit 0ad646c81b2182f7fa67ec0c8c825e0ee165696d ] + +register_netdevice() could fail early when we have an invalid +dev name, in which case ->ndo_uninit() is not called. For tun +device, this is a problem because a timer etc. are already +initialized and it expects ->ndo_uninit() to clean them up. + +We could move these initializations into a ->ndo_init() so +that register_netdevice() knows better, however this is still +complicated due to the logic in tun_detach(). + +Therefore, I choose to just call dev_get_valid_name() before +register_netdevice(), which is quicker and much easier to audit. +And for this specific case, it is already enough. + +Fixes: 96442e42429e ("tuntap: choose the txq based on rxq") +Reported-by: Dmitry Alexeev +Cc: Jason Wang +Cc: "Michael S. Tsirkin" +Signed-off-by: Cong Wang +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/tun.c | 3 +++ + include/linux/netdevice.h | 3 +++ + net/core/dev.c | 6 +++--- + 3 files changed, 9 insertions(+), 3 deletions(-) + +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -1787,6 +1787,9 @@ static int tun_set_iff(struct net *net, + + if (!dev) + return -ENOMEM; ++ err = dev_get_valid_name(net, dev, name); ++ if (err) ++ goto err_free_dev; + + dev_net_set(dev, net); + dev->rtnl_link_ops = &tun_link_ops; +--- a/include/linux/netdevice.h ++++ b/include/linux/netdevice.h +@@ -3742,6 +3742,9 @@ struct net_device *alloc_netdev_mqs(int + unsigned char name_assign_type, + void (*setup)(struct net_device *), + unsigned int txqs, unsigned int rxqs); ++int dev_get_valid_name(struct net *net, struct net_device *dev, ++ const char *name); ++ + #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \ + alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1) + +--- a/net/core/dev.c ++++ b/net/core/dev.c +@@ -1115,9 +1115,8 @@ static int dev_alloc_name_ns(struct net + return ret; + } + +-static int dev_get_valid_name(struct net *net, +- struct net_device *dev, +- const char *name) ++int dev_get_valid_name(struct net *net, struct net_device *dev, ++ const char *name) + { + BUG_ON(!net); + +@@ -1133,6 +1132,7 @@ static int dev_get_valid_name(struct net + + return 0; + } ++EXPORT_SYMBOL(dev_get_valid_name); + + /** + * dev_change_name - change name of a device diff --git a/queue-4.9/tun-tap-sanitize-tunsetsndbuf-input.patch b/queue-4.9/tun-tap-sanitize-tunsetsndbuf-input.patch new file mode 100644 index 00000000000..afb057f3b6b --- /dev/null +++ b/queue-4.9/tun-tap-sanitize-tunsetsndbuf-input.patch @@ -0,0 +1,88 @@ +From foo@baz Wed Nov 15 17:24:03 CET 2017 +From: Craig Gallek +Date: Mon, 30 Oct 2017 18:50:11 -0400 +Subject: tun/tap: sanitize TUNSETSNDBUF input + +From: Craig Gallek + + +[ Upstream commit 93161922c658c714715686cd0cf69b090cb9bf1d ] + +Syzkaller found several variants of the lockup below by setting negative +values with the TUNSETSNDBUF ioctl. This patch adds a sanity check +to both the tun and tap versions of this ioctl. + + watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [repro:2389] + Modules linked in: + irq event stamp: 329692056 + hardirqs last enabled at (329692055): [] _raw_spin_unlock_irqrestore+0x31/0x75 + hardirqs last disabled at (329692056): [] apic_timer_interrupt+0x98/0xb0 + softirqs last enabled at (35659740): [] __do_softirq+0x328/0x48c + softirqs last disabled at (35659731): [] irq_exit+0xbc/0xd0 + CPU: 0 PID: 2389 Comm: repro Not tainted 4.14.0-rc7 #23 + Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 + task: ffff880009452140 task.stack: ffff880006a20000 + RIP: 0010:_raw_spin_lock_irqsave+0x11/0x80 + RSP: 0018:ffff880006a27c50 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff10 + RAX: ffff880009ac68d0 RBX: ffff880006a27ce0 RCX: 0000000000000000 + RDX: 0000000000000001 RSI: ffff880006a27ce0 RDI: ffff880009ac6900 + RBP: ffff880006a27c60 R08: 0000000000000000 R09: 0000000000000000 + R10: 0000000000000001 R11: 000000000063ff00 R12: ffff880009ac6900 + R13: ffff880006a27cf8 R14: 0000000000000001 R15: ffff880006a27cf8 + FS: 00007f4be4838700(0000) GS:ffff88000cc00000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 0000000020101000 CR3: 0000000009616000 CR4: 00000000000006f0 + Call Trace: + prepare_to_wait+0x26/0xc0 + sock_alloc_send_pskb+0x14e/0x270 + ? remove_wait_queue+0x60/0x60 + tun_get_user+0x2cc/0x19d0 + ? __tun_get+0x60/0x1b0 + tun_chr_write_iter+0x57/0x86 + __vfs_write+0x156/0x1e0 + vfs_write+0xf7/0x230 + SyS_write+0x57/0xd0 + entry_SYSCALL_64_fastpath+0x1f/0xbe + RIP: 0033:0x7f4be4356df9 + RSP: 002b:00007ffc18101c08 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 + RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f4be4356df9 + RDX: 0000000000000046 RSI: 0000000020101000 RDI: 0000000000000005 + RBP: 00007ffc18101c40 R08: 0000000000000001 R09: 0000000000000001 + R10: 0000000000000001 R11: 0000000000000293 R12: 0000559c75f64780 + R13: 00007ffc18101d30 R14: 0000000000000000 R15: 0000000000000000 + +Fixes: 33dccbb050bb ("tun: Limit amount of queued packets per device") +Fixes: 20d29d7a916a ("net: macvtap driver") +Signed-off-by: Craig Gallek +Reviewed-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + drivers/net/macvtap.c | 2 ++ + drivers/net/tun.c | 4 ++++ + 2 files changed, 6 insertions(+) + +--- a/drivers/net/macvtap.c ++++ b/drivers/net/macvtap.c +@@ -1077,6 +1077,8 @@ static long macvtap_ioctl(struct file *f + case TUNSETSNDBUF: + if (get_user(s, sp)) + return -EFAULT; ++ if (s <= 0) ++ return -EINVAL; + + q->sk.sk_sndbuf = s; + return 0; +--- a/drivers/net/tun.c ++++ b/drivers/net/tun.c +@@ -2180,6 +2180,10 @@ static long __tun_chr_ioctl(struct file + ret = -EFAULT; + break; + } ++ if (sndbuf <= 0) { ++ ret = -EINVAL; ++ break; ++ } + + tun->sndbuf = sndbuf; + tun_set_sndbuf(tun);