--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+Date: Fri, 6 Oct 2017 19:02:35 +0300
+Subject: gso: fix payload length when gso_size is zero
+
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+
+
+[ Upstream commit 3d0241d57c7b25bb75ac9d7a62753642264fdbce ]
+
+When gso_size reset to zero for the tail segment in skb_segment(), later
+in ipv6_gso_segment(), __skb_udp_tunnel_segment() and gre_gso_segment()
+we will get incorrect results (payload length, pcsum) for that segment.
+inet_gso_segment() already has a check for gso_size before calculating
+payload.
+
+The issue was found with LTP vxlan & gre tests over ixgbe NIC.
+
+Fixes: 07b26c9454a2 ("gso: Support partial splitting at the frag_list pointer")
+Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/gre_offload.c | 2 +-
+ net/ipv4/udp_offload.c | 2 +-
+ net/ipv6/ip6_offload.c | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/gre_offload.c
++++ b/net/ipv4/gre_offload.c
+@@ -98,7 +98,7 @@ static struct sk_buff *gre_gso_segment(s
+ greh = (struct gre_base_hdr *)skb_transport_header(skb);
+ pcsum = (__sum16 *)(greh + 1);
+
+- if (gso_partial) {
++ if (gso_partial && skb_is_gso(skb)) {
+ unsigned int partial_adj;
+
+ /* Adjust checksum to account for the fact that
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -122,7 +122,7 @@ static struct sk_buff *__skb_udp_tunnel_
+ * will be using a length value equal to only one MSS sized
+ * segment instead of the entire frame.
+ */
+- if (gso_partial) {
++ if (gso_partial && skb_is_gso(skb)) {
+ uh->len = htons(skb_shinfo(skb)->gso_size +
+ SKB_GSO_CB(skb)->data_offset +
+ skb->head - (unsigned char *)uh);
+--- a/net/ipv6/ip6_offload.c
++++ b/net/ipv6/ip6_offload.c
+@@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(
+
+ for (skb = segs; skb; skb = skb->next) {
+ ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
+- if (gso_partial)
++ if (gso_partial && skb_is_gso(skb))
+ payload_len = skb_shinfo(skb)->gso_size +
+ SKB_GSO_CB(skb)->data_offset +
+ skb->head - (unsigned char *)(ipv6h + 1);
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 26 Oct 2017 19:23:27 +0800
+Subject: ip6_gre: only increase err_count for some certain type icmpv6 in ip6gre_err
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit f8d20b46ce55cf40afb30dcef6d9288f7ef46d9b ]
+
+The similar fix in patch 'ipip: only increase err_count for some
+certain type icmp in ipip_err' is needed for ip6gre_err.
+
+In Jianlin's case, udp netperf broke even when receiving a TooBig
+icmpv6 packet.
+
+Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -408,13 +408,16 @@ static void ip6gre_err(struct sk_buff *s
+ case ICMPV6_DEST_UNREACH:
+ net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
+ t->parms.name);
+- break;
++ if (code != ICMPV6_PORT_UNREACH)
++ break;
++ return;
+ case ICMPV6_TIME_EXCEED:
+ if (code == ICMPV6_EXC_HOPLIMIT) {
+ net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+ t->parms.name);
++ break;
+ }
+- break;
++ return;
+ case ICMPV6_PARAMPROB:
+ teli = 0;
+ if (code == ICMPV6_HDR_FIELD)
+@@ -430,7 +433,7 @@ static void ip6gre_err(struct sk_buff *s
+ net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+ t->parms.name);
+ }
+- break;
++ return;
+ case ICMPV6_PKT_TOOBIG:
+ mtu = be32_to_cpu(info) - offset - t->tun_hlen;
+ if (t->dev->type == ARPHRD_ETHER)
+@@ -438,7 +441,7 @@ static void ip6gre_err(struct sk_buff *s
+ if (mtu < IPV6_MIN_MTU)
+ mtu = IPV6_MIN_MTU;
+ t->dev->mtu = mtu;
+- break;
++ return;
+ }
+
+ if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 26 Oct 2017 19:27:17 +0800
+Subject: ip6_gre: update dst pmtu if dev mtu has been updated by toobig in __gre6_xmit
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 8aec4959d832bae0889a8e2f348973b5e4abffef ]
+
+When receiving a Toobig icmpv6 packet, ip6gre_err would just set
+tunnel dev's mtu, that's not enough. For skb_dst(skb)'s pmtu may
+still be using the old value, it has no chance to be updated with
+tunnel dev's mtu.
+
+Jianlin found this issue by reducing route's mtu while running
+netperf, the performance went to 0.
+
+ip6ip6 and ip4ip6 tunnel can work well with this, as they lookup
+the upper dst and update_pmtu it's pmtu or icmpv6_send a Toobig
+to upper socket after setting tunnel dev's mtu.
+
+We couldn't do that for ip6_gre, as gre's inner packet could be
+any protocol, it's difficult to handle them (like lookup upper
+dst) in a good way.
+
+So this patch is to fix it by updating skb_dst(skb)'s pmtu when
+dev->mtu < skb_dst(skb)'s pmtu in tx path. It's safe to do this
+update there, as usually dev->mtu <= skb_dst(skb)'s pmtu and no
+performance regression can be caused by this.
+
+Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -508,8 +508,8 @@ static netdev_tx_t __gre6_xmit(struct sk
+ __u32 *pmtu, __be16 proto)
+ {
+ struct ip6_tnl *tunnel = netdev_priv(dev);
+- __be16 protocol = (dev->type == ARPHRD_ETHER) ?
+- htons(ETH_P_TEB) : proto;
++ struct dst_entry *dst = skb_dst(skb);
++ __be16 protocol;
+
+ if (dev->type == ARPHRD_ETHER)
+ IPCB(skb)->flags = 0;
+@@ -523,9 +523,14 @@ static netdev_tx_t __gre6_xmit(struct sk
+ tunnel->o_seqno++;
+
+ /* Push GRE header. */
++ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
+ gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+
++ /* TooBig packet may have updated dst->dev's mtu */
++ if (dst && dst_mtu(dst) > dst->dev->mtu)
++ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
++
+ return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
+ NEXTHDR_GRE);
+ }
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 26 Oct 2017 19:19:56 +0800
+Subject: ipip: only increase err_count for some certain type icmp in ipip_err
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit f3594f0a7ea36661d7fd942facd7f31a64245f1a ]
+
+t->err_count is used to count the link failure on tunnel and an err
+will be reported to user socket in tx path if t->err_count is not 0.
+udp socket could even return EHOSTUNREACH to users.
+
+Since commit fd58156e456d ("IPIP: Use ip-tunneling code.") removed
+the 'switch check' for icmp type in ipip_err(), err_count would be
+increased by the icmp packet with ICMP_EXC_FRAGTIME code. an link
+failure would be reported out due to this.
+
+In Jianlin's case, when receiving ICMP_EXC_FRAGTIME a icmp packet,
+udp netperf failed with the err:
+ send_data: data send error: No route to host (errno 113)
+
+We expect this error reported from tunnel to socket when receiving
+some certain type icmp, but not ICMP_EXC_FRAGTIME, ICMP_SR_FAILED
+or ICMP_PARAMETERPROB ones.
+
+This patch is to bring 'switch check' for icmp type back to ipip_err
+so that it only reports link failure for the right type icmp, just as
+in ipgre_err() and ipip6_err().
+
+Fixes: fd58156e456d ("IPIP: Use ip-tunneling code.")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ipip.c | 59 +++++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 42 insertions(+), 17 deletions(-)
+
+--- a/net/ipv4/ipip.c
++++ b/net/ipv4/ipip.c
+@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_op
+
+ static int ipip_err(struct sk_buff *skb, u32 info)
+ {
+-
+-/* All the routers (except for Linux) return only
+- 8 bytes of packet payload. It means, that precise relaying of
+- ICMP in the real Internet is absolutely infeasible.
+- */
++ /* All the routers (except for Linux) return only
++ * 8 bytes of packet payload. It means, that precise relaying of
++ * ICMP in the real Internet is absolutely infeasible.
++ */
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
+- struct ip_tunnel *t;
+- int err;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
++ struct ip_tunnel *t;
++ int err = 0;
++
++ switch (type) {
++ case ICMP_DEST_UNREACH:
++ switch (code) {
++ case ICMP_SR_FAILED:
++ /* Impossible event. */
++ goto out;
++ default:
++ /* All others are translated to HOST_UNREACH.
++ * rfc2003 contains "deep thoughts" about NET_UNREACH,
++ * I believe they are just ether pollution. --ANK
++ */
++ break;
++ }
++ break;
++
++ case ICMP_TIME_EXCEEDED:
++ if (code != ICMP_EXC_TTL)
++ goto out;
++ break;
++
++ case ICMP_REDIRECT:
++ break;
++
++ default:
++ goto out;
++ }
+
+- err = -ENOENT;
+ t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->daddr, iph->saddr, 0);
+- if (!t)
++ if (!t) {
++ err = -ENOENT;
+ goto out;
++ }
+
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+- ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+- t->parms.link, 0, iph->protocol, 0);
+- err = 0;
++ ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
++ iph->protocol, 0);
+ goto out;
+ }
+
+ if (type == ICMP_REDIRECT) {
+- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
+- iph->protocol, 0);
+- err = 0;
++ ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
+ goto out;
+ }
+
+- if (t->parms.iph.daddr == 0)
++ if (t->parms.iph.daddr == 0) {
++ err = -ENOENT;
+ goto out;
++ }
+
+- err = 0;
+ if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+ goto out;
+
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 30 Oct 2017 22:47:09 -0700
+Subject: ipv6: addrconf: increment ifp refcount before ipv6_del_addr()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit e669b86945478b3d90d2d87e3793a6eed06d332f ]
+
+In the (unlikely) event fixup_permanent_addr() returns a failure,
+addrconf_permanent_addr() calls ipv6_del_addr() without the
+mandatory call to in6_ifa_hold(), leading to a refcount error,
+spotted by syzkaller :
+
+WARNING: CPU: 1 PID: 3142 at lib/refcount.c:227 refcount_dec+0x4c/0x50
+lib/refcount.c:227
+Kernel panic - not syncing: panic_on_warn set ...
+
+CPU: 1 PID: 3142 Comm: ip Not tainted 4.14.0-rc4-next-20171009+ #33
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
+Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:16 [inline]
+ dump_stack+0x194/0x257 lib/dump_stack.c:52
+ panic+0x1e4/0x41c kernel/panic.c:181
+ __warn+0x1c4/0x1e0 kernel/panic.c:544
+ report_bug+0x211/0x2d0 lib/bug.c:183
+ fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:178
+ do_trap_no_signal arch/x86/kernel/traps.c:212 [inline]
+ do_trap+0x260/0x390 arch/x86/kernel/traps.c:261
+ do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:298
+ do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:311
+ invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905
+RIP: 0010:refcount_dec+0x4c/0x50 lib/refcount.c:227
+RSP: 0018:ffff8801ca49e680 EFLAGS: 00010286
+RAX: 000000000000002c RBX: ffff8801d07cfcdc RCX: 0000000000000000
+RDX: 000000000000002c RSI: 1ffff10039493c90 RDI: ffffed0039493cc4
+RBP: ffff8801ca49e688 R08: ffff8801ca49dd70 R09: 0000000000000000
+R10: ffff8801ca49df58 R11: 0000000000000000 R12: 1ffff10039493cd9
+R13: ffff8801ca49e6e8 R14: ffff8801ca49e7e8 R15: ffff8801d07cfcdc
+ __in6_ifa_put include/net/addrconf.h:369 [inline]
+ ipv6_del_addr+0x42b/0xb60 net/ipv6/addrconf.c:1208
+ addrconf_permanent_addr net/ipv6/addrconf.c:3327 [inline]
+ addrconf_notify+0x1c66/0x2190 net/ipv6/addrconf.c:3393
+ notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93
+ __raw_notifier_call_chain kernel/notifier.c:394 [inline]
+ raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
+ call_netdevice_notifiers_info+0x32/0x60 net/core/dev.c:1697
+ call_netdevice_notifiers net/core/dev.c:1715 [inline]
+ __dev_notify_flags+0x15d/0x430 net/core/dev.c:6843
+ dev_change_flags+0xf5/0x140 net/core/dev.c:6879
+ do_setlink+0xa1b/0x38e0 net/core/rtnetlink.c:2113
+ rtnl_newlink+0xf0d/0x1a40 net/core/rtnetlink.c:2661
+ rtnetlink_rcv_msg+0x733/0x1090 net/core/rtnetlink.c:4301
+ netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2408
+ rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4313
+ netlink_unicast_kernel net/netlink/af_netlink.c:1273 [inline]
+ netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1299
+ netlink_sendmsg+0xa4a/0xe70 net/netlink/af_netlink.c:1862
+ sock_sendmsg_nosec net/socket.c:633 [inline]
+ sock_sendmsg+0xca/0x110 net/socket.c:643
+ ___sys_sendmsg+0x75b/0x8a0 net/socket.c:2049
+ __sys_sendmsg+0xe5/0x210 net/socket.c:2083
+ SYSC_sendmsg net/socket.c:2094 [inline]
+ SyS_sendmsg+0x2d/0x50 net/socket.c:2090
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+RIP: 0033:0x7fa9174d3320
+RSP: 002b:00007ffe302ae9e8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 00007ffe302b2ae0 RCX: 00007fa9174d3320
+RDX: 0000000000000000 RSI: 00007ffe302aea20 RDI: 0000000000000016
+RBP: 0000000000000082 R08: 0000000000000000 R09: 000000000000000f
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffe302b32a0
+R13: 0000000000000000 R14: 00007ffe302b2ab8 R15: 00007ffe302b32b8
+
+Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: David Ahern <dsahern@gmail.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -3299,6 +3299,7 @@ static void addrconf_permanent_addr(stru
+ if ((ifp->flags & IFA_F_PERMANENT) &&
+ fixup_permanent_addr(idev, ifp) < 0) {
+ write_unlock_bh(&idev->lock);
++ in6_ifa_hold(ifp);
+ ipv6_del_addr(ifp);
+ write_lock_bh(&idev->lock);
+
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 21 Oct 2017 12:26:23 -0700
+Subject: ipv6: flowlabel: do not leave opt->tot_len with garbage
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 864e2a1f8aac05effac6063ce316b480facb46ff ]
+
+When syzkaller team brought us a C repro for the crash [1] that
+had been reported many times in the past, I finally could find
+the root cause.
+
+If FlowLabel info is merged by fl6_merge_options(), we leave
+part of the opt_space storage provided by udp/raw/l2tp with random value
+in opt_space.tot_len, unless a control message was provided at sendmsg()
+time.
+
+Then ip6_setup_cork() would use this random value to perform a kzalloc()
+call. Undefined behavior and crashes.
+
+Fix is to properly set tot_len in fl6_merge_options()
+
+At the same time, we can also avoid consuming memory and cpu cycles
+to clear it, if every option is copied via a kmemdup(). This is the
+change in ip6_setup_cork().
+
+[1]
+kasan: CONFIG_KASAN_INLINE enabled
+kasan: GPF could be caused by NULL-ptr deref or user memory access
+general protection fault: 0000 [#1] SMP KASAN
+Dumping ftrace buffer:
+ (ftrace buffer empty)
+Modules linked in:
+CPU: 0 PID: 6613 Comm: syz-executor0 Not tainted 4.14.0-rc4+ #127
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+task: ffff8801cb64a100 task.stack: ffff8801cc350000
+RIP: 0010:ip6_setup_cork+0x274/0x15c0 net/ipv6/ip6_output.c:1168
+RSP: 0018:ffff8801cc357550 EFLAGS: 00010203
+RAX: dffffc0000000000 RBX: ffff8801cc357748 RCX: 0000000000000010
+RDX: 0000000000000002 RSI: ffffffff842bd1d9 RDI: 0000000000000014
+RBP: ffff8801cc357620 R08: ffff8801cb17f380 R09: ffff8801cc357b10
+R10: ffff8801cb64a100 R11: 0000000000000000 R12: ffff8801cc357ab0
+R13: ffff8801cc357b10 R14: 0000000000000000 R15: ffff8801c3bbf0c0
+FS: 00007f9c5c459700(0000) GS:ffff8801db200000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000020324000 CR3: 00000001d1cf2000 CR4: 00000000001406f0
+DR0: 0000000020001010 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600
+Call Trace:
+ ip6_make_skb+0x282/0x530 net/ipv6/ip6_output.c:1729
+ udpv6_sendmsg+0x2769/0x3380 net/ipv6/udp.c:1340
+ inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762
+ sock_sendmsg_nosec net/socket.c:633 [inline]
+ sock_sendmsg+0xca/0x110 net/socket.c:643
+ SYSC_sendto+0x358/0x5a0 net/socket.c:1750
+ SyS_sendto+0x40/0x50 net/socket.c:1718
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+RIP: 0033:0x4520a9
+RSP: 002b:00007f9c5c458c08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c
+RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 00000000004520a9
+RDX: 0000000000000001 RSI: 0000000020fd1000 RDI: 0000000000000016
+RBP: 0000000000000086 R08: 0000000020e0afe4 R09: 000000000000001c
+R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004bb1ee
+R13: 00000000ffffffff R14: 0000000000000016 R15: 0000000000000029
+Code: e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 ea 0f 00 00 48 8d 79 04 48 b8 00 00 00 00 00 fc ff df 45 8b 74 24 04 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85
+RIP: ip6_setup_cork+0x274/0x15c0 net/ipv6/ip6_output.c:1168 RSP: ffff8801cc357550
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_flowlabel.c | 1 +
+ net/ipv6/ip6_output.c | 4 ++--
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ip6_flowlabel.c
++++ b/net/ipv6/ip6_flowlabel.c
+@@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options
+ }
+ opt_space->dst1opt = fopt->dst1opt;
+ opt_space->opt_flen = fopt->opt_flen;
++ opt_space->tot_len = fopt->tot_len;
+ return opt_space;
+ }
+ EXPORT_SYMBOL_GPL(fl6_merge_options);
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1215,11 +1215,11 @@ static int ip6_setup_cork(struct sock *s
+ if (WARN_ON(v6_cork->opt))
+ return -EINVAL;
+
+- v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
++ v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
+ if (unlikely(!v6_cork->opt))
+ return -ENOBUFS;
+
+- v6_cork->opt->tot_len = opt->tot_len;
++ v6_cork->opt->tot_len = sizeof(*opt);
+ v6_cork->opt->opt_flen = opt->opt_flen;
+ v6_cork->opt->opt_nflen = opt->opt_nflen;
+
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Fri, 13 Oct 2017 19:22:35 +0200
+Subject: l2tp: check ps->sock before running pppol2tp_session_ioctl()
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit 5903f594935a3841137c86b9d5b75143a5b7121c ]
+
+When pppol2tp_session_ioctl() is called by pppol2tp_tunnel_ioctl(),
+the session may be unconnected. That is, it was created by
+pppol2tp_session_create() and hasn't been connected with
+pppol2tp_connect(). In this case, ps->sock is NULL, so we need to check
+for this case in order to avoid dereferencing a NULL pointer.
+
+Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP")
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_ppp.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/l2tp/l2tp_ppp.c
++++ b/net/l2tp/l2tp_ppp.c
+@@ -993,6 +993,9 @@ static int pppol2tp_session_ioctl(struct
+ session->name, cmd, arg);
+
+ sk = ps->sock;
++ if (!sk)
++ return -EBADR;
++
+ sock_hold(sk);
+
+ switch (cmd) {
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 10 Oct 2017 19:12:33 -0700
+Subject: net: call cgroup_sk_alloc() earlier in sk_clone_lock()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit c0576e3975084d4699b7bfef578613fb8e1144f6 ]
+
+If for some reason, the newly allocated child need to be freed,
+we will call cgroup_put() (via sk_free_unlock_clone()) while the
+corresponding cgroup_get() was not yet done, and we will free memory
+too soon.
+
+Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Tejun Heo <tj@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1526,6 +1526,7 @@ struct sock *sk_clone_lock(const struct
+ newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+
+ sock_reset_flag(newsk, SOCK_DONE);
++ cgroup_sk_alloc(&newsk->sk_cgrp_data);
+ skb_queue_head_init(&newsk->sk_error_queue);
+
+ filter = rcu_dereference_protected(newsk->sk_filter, 1);
+@@ -1560,8 +1561,6 @@ struct sock *sk_clone_lock(const struct
+ atomic64_set(&newsk->sk_cookie, 0);
+
+ mem_cgroup_sk_alloc(newsk);
+- cgroup_sk_alloc(&newsk->sk_cgrp_data);
+-
+ /*
+ * Before updating sk_refcnt, we must commit prior changes to memory
+ * (Documentation/RCU/rculist_nulls.txt for details)
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Andrei Vagin <avagin@openvz.org>
+Date: Wed, 25 Oct 2017 10:16:42 -0700
+Subject: net/unix: don't show information about sockets from other namespaces
+
+From: Andrei Vagin <avagin@openvz.org>
+
+
+[ Upstream commit 0f5da659d8f1810f44de14acf2c80cd6499623a0 ]
+
+socket_diag shows information only about sockets from a namespace where
+a diag socket lives.
+
+But if we request information about one unix socket, the kernel don't
+check that its netns is matched with a diag socket namespace, so any
+user can get information about any unix socket in a system. This looks
+like a bug.
+
+v2: add a Fixes tag
+
+Fixes: 51d7cccf0723 ("net: make sock diag per-namespace")
+Signed-off-by: Andrei Vagin <avagin@openvz.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/diag.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/unix/diag.c
++++ b/net/unix/diag.c
+@@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk
+ err = -ENOENT;
+ if (sk == NULL)
+ goto out_nosk;
++ if (!net_eq(sock_net(sk), net))
++ goto out;
+
+ err = sock_diag_check_cookie(sk, req->udiag_cookie);
+ if (err)
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Fri, 27 Oct 2017 22:08:56 -0700
+Subject: net_sched: avoid matching qdisc with zero handle
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit 50317fce2cc70a2bbbc4b42c31bbad510382a53c ]
+
+Davide found the following script triggers a NULL pointer
+dereference:
+
+ip l a name eth0 type dummy
+tc q a dev eth0 parent :1 handle 1: htb
+
+This is because for a freshly created netdevice noop_qdisc
+is attached and when passing 'parent :1', kernel actually
+tries to match the major handle which is 0 and noop_qdisc
+has handle 0 so is matched by mistake. Commit 69012ae425d7
+tries to fix a similar bug but still misses this case.
+
+Handle 0 is not a valid one, should be just skipped. In
+fact, kernel uses it as TC_H_UNSPEC.
+
+Fixes: 69012ae425d7 ("net: sched: fix handling of singleton qdiscs with qdisc_hash")
+Fixes: 59cc1f61f09c ("net: sched:convert qdisc linked list to hashtable")
+Reported-by: Davide Caratti <dcaratti@redhat.com>
+Cc: Jiri Kosina <jkosina@suse.cz>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_api.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -296,6 +296,8 @@ struct Qdisc *qdisc_lookup(struct net_de
+ {
+ struct Qdisc *q;
+
++ if (!handle)
++ return NULL;
+ q = qdisc_match_from_root(dev->qdisc, handle);
+ if (q)
+ goto out;
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Mon, 9 Oct 2017 14:14:51 +0200
+Subject: netlink: do not set cb_running if dump's start() errs
+
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+
+
+[ Upstream commit 41c87425a1ac9b633e0fcc78eb1f19640c8fb5a0 ]
+
+It turns out that multiple places can call netlink_dump(), which means
+it's still possible to dereference partially initialized values in
+dump() that were the result of a faulty returned start().
+
+This fixes the issue by calling start() _before_ setting cb_running to
+true, so that there's no chance at all of hitting the dump() function
+through any indirect paths.
+
+It also moves the call to start() to be when the mutex is held. This has
+the nice side effect of serializing invocations to start(), which is
+likely desirable anyway. It also prevents any possible other races that
+might come out of this logic.
+
+In testing this with several different pieces of tricky code to trigger
+these issues, this commit fixes all avenues that I'm aware of.
+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Cc: Johannes Berg <johannes@sipsolutions.net>
+Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c | 13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -2207,16 +2207,17 @@ int __netlink_dump_start(struct sock *ss
+ cb->min_dump_alloc = control->min_dump_alloc;
+ cb->skb = skb;
+
++ if (cb->start) {
++ ret = cb->start(cb);
++ if (ret)
++ goto error_unlock;
++ }
++
+ nlk->cb_running = true;
+
+ mutex_unlock(nlk->cb_mutex);
+
+- ret = 0;
+- if (cb->start)
+- ret = cb->start(cb);
+-
+- if (!ret)
+- ret = netlink_dump(sk);
++ ret = netlink_dump(sk);
+
+ sock_put(sk);
+
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 18 Oct 2017 16:14:52 -0700
+Subject: packet: avoid panic in packet_getsockopt()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 509c7a1ecc8601f94ffba8a00889fefb239c00c6 ]
+
+syzkaller got crashes in packet_getsockopt() processing
+PACKET_ROLLOVER_STATS command while another thread was managing
+to change po->rollover
+
+Using RCU will fix this bug. We might later add proper RCU annotations
+for sparse sake.
+
+In v2: I replaced kfree(rollover) in fanout_add() to kfree_rcu()
+variant, as spotted by John.
+
+Fixes: a9b6391814d5 ("packet: rollover statistics")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Cc: John Sperbeck <jsperbeck@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c | 24 ++++++++++++++++--------
+ 1 file changed, 16 insertions(+), 8 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1720,7 +1720,7 @@ static int fanout_add(struct sock *sk, u
+
+ out:
+ if (err && rollover) {
+- kfree(rollover);
++ kfree_rcu(rollover, rcu);
+ po->rollover = NULL;
+ }
+ mutex_unlock(&fanout_mutex);
+@@ -1747,8 +1747,10 @@ static struct packet_fanout *fanout_rele
+ else
+ f = NULL;
+
+- if (po->rollover)
++ if (po->rollover) {
+ kfree_rcu(po->rollover, rcu);
++ po->rollover = NULL;
++ }
+ }
+ mutex_unlock(&fanout_mutex);
+
+@@ -3851,6 +3853,7 @@ static int packet_getsockopt(struct sock
+ void *data = &val;
+ union tpacket_stats_u st;
+ struct tpacket_rollover_stats rstats;
++ struct packet_rollover *rollover;
+
+ if (level != SOL_PACKET)
+ return -ENOPROTOOPT;
+@@ -3929,13 +3932,18 @@ static int packet_getsockopt(struct sock
+ 0);
+ break;
+ case PACKET_ROLLOVER_STATS:
+- if (!po->rollover)
++ rcu_read_lock();
++ rollover = rcu_dereference(po->rollover);
++ if (rollover) {
++ rstats.tp_all = atomic_long_read(&rollover->num);
++ rstats.tp_huge = atomic_long_read(&rollover->num_huge);
++ rstats.tp_failed = atomic_long_read(&rollover->num_failed);
++ data = &rstats;
++ lv = sizeof(rstats);
++ }
++ rcu_read_unlock();
++ if (!rollover)
+ return -EINVAL;
+- rstats.tp_all = atomic_long_read(&po->rollover->num);
+- rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
+- rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
+- data = &rstats;
+- lv = sizeof(rstats);
+ break;
+ case PACKET_TX_HAS_OFF:
+ val = po->tp_tx_has_off;
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Fri, 6 Oct 2017 17:05:49 +0200
+Subject: ppp: fix race in ppp device destruction
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit 6151b8b37b119e8e3a8401b080d532520c95faf4 ]
+
+ppp_release() tries to ensure that netdevices are unregistered before
+decrementing the unit refcount and running ppp_destroy_interface().
+
+This is all fine as long as the the device is unregistered by
+ppp_release(): the unregister_netdevice() call, followed by
+rtnl_unlock(), guarantee that the unregistration process completes
+before rtnl_unlock() returns.
+
+However, the device may be unregistered by other means (like
+ppp_nl_dellink()). If this happens right before ppp_release() calling
+rtnl_lock(), then ppp_release() has to wait for the concurrent
+unregistration code to release the lock.
+But rtnl_unlock() releases the lock before completing the device
+unregistration process. This allows ppp_release() to proceed and
+eventually call ppp_destroy_interface() before the unregistration
+process completes. Calling free_netdev() on this partially unregistered
+device will BUG():
+
+ ------------[ cut here ]------------
+ kernel BUG at net/core/dev.c:8141!
+ invalid opcode: 0000 [#1] SMP
+
+ CPU: 1 PID: 1557 Comm: pppd Not tainted 4.14.0-rc2+ #4
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014
+
+ Call Trace:
+ ppp_destroy_interface+0xd8/0xe0 [ppp_generic]
+ ppp_disconnect_channel+0xda/0x110 [ppp_generic]
+ ppp_unregister_channel+0x5e/0x110 [ppp_generic]
+ pppox_unbind_sock+0x23/0x30 [pppox]
+ pppoe_connect+0x130/0x440 [pppoe]
+ SYSC_connect+0x98/0x110
+ ? do_fcntl+0x2c0/0x5d0
+ SyS_connect+0xe/0x10
+ entry_SYSCALL_64_fastpath+0x1a/0xa5
+
+ RIP: free_netdev+0x107/0x110 RSP: ffffc28a40573d88
+ ---[ end trace ed294ff0cc40eeff ]---
+
+We could set the ->needs_free_netdev flag on PPP devices and move the
+ppp_destroy_interface() logic in the ->priv_destructor() callback. But
+that'd be quite intrusive as we'd first need to unlink from the other
+channels and units that depend on the device (the ones that used the
+PPPIOCCONNECT and PPPIOCATTACH ioctls).
+
+Instead, we can just let the netdevice hold a reference on its
+ppp_file. This reference is dropped in ->priv_destructor(), at the very
+end of the unregistration process, so that neither ppp_release() nor
+ppp_disconnect_channel() can call ppp_destroy_interface() in the interim.
+
+Reported-by: Beniamino Galvani <bgalvani@redhat.com>
+Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion")
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/ppp_generic.c | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -1338,7 +1338,17 @@ ppp_get_stats64(struct net_device *dev,
+
+ static int ppp_dev_init(struct net_device *dev)
+ {
++ struct ppp *ppp;
++
+ netdev_lockdep_set_classes(dev);
++
++ ppp = netdev_priv(dev);
++ /* Let the netdevice take a reference on the ppp file. This ensures
++ * that ppp_destroy_interface() won't run before the device gets
++ * unregistered.
++ */
++ atomic_inc(&ppp->file.refcnt);
++
+ return 0;
+ }
+
+@@ -1361,6 +1371,15 @@ static void ppp_dev_uninit(struct net_de
+ wake_up_interruptible(&ppp->file.rwait);
+ }
+
++static void ppp_dev_priv_destructor(struct net_device *dev)
++{
++ struct ppp *ppp;
++
++ ppp = netdev_priv(dev);
++ if (atomic_dec_and_test(&ppp->file.refcnt))
++ ppp_destroy_interface(ppp);
++}
++
+ static const struct net_device_ops ppp_netdev_ops = {
+ .ndo_init = ppp_dev_init,
+ .ndo_uninit = ppp_dev_uninit,
+@@ -1386,6 +1405,7 @@ static void ppp_setup(struct net_device
+ dev->tx_queue_len = 3;
+ dev->type = ARPHRD_PPP;
+ dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
++ dev->destructor = ppp_dev_priv_destructor;
+ netif_keep_dst(dev);
+ }
+
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 18 Oct 2017 21:37:49 +0800
+Subject: sctp: add the missing sock_owned_by_user check in sctp_icmp_redirect
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 1cc276cec9ec574d41cf47dfc0f51406b6f26ab4 ]
+
+Now sctp processes icmp redirect packet in sctp_icmp_redirect where
+it calls sctp_transport_dst_check in which tp->dst can be released.
+
+The problem is before calling sctp_transport_dst_check, it doesn't
+check sock_owned_by_user, which means tp->dst could be freed while
+a process is accessing it with owning the socket.
+
+An use-after-free issue could be triggered by this.
+
+This patch is to fix it by checking sock_owned_by_user before calling
+sctp_transport_dst_check in sctp_icmp_redirect, so that it would not
+release tp->dst if users still hold sock lock.
+
+Besides, the same issue fixed in commit 45caeaa5ac0b ("dccp/tcp: fix
+routing redirect race") on sctp also needs this check.
+
+Fixes: 55be7a9c6074 ("ipv4: Add redirect support to all protocol icmp error handlers")
+Reported-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/input.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sctp/input.c
++++ b/net/sctp/input.c
+@@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk,
+ {
+ struct dst_entry *dst;
+
+- if (!t)
++ if (sock_owned_by_user(sk) || !t)
+ return;
+ dst = sctp_transport_dst_check(t);
+ if (dst)
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Laszlo Toth <laszlth@gmail.com>
+Date: Mon, 23 Oct 2017 19:19:33 +0200
+Subject: sctp: full support for ipv6 ip_nonlocal_bind & IP_FREEBIND
+
+From: Laszlo Toth <laszlth@gmail.com>
+
+
+[ Upstream commit b71d21c274eff20a9db8158882b545b141b73ab8 ]
+
+Commit 9b9742022888 ("sctp: support ipv6 nonlocal bind")
+introduced support for the above options as v4 sctp did,
+so patched sctp_v6_available().
+
+In the v4 implementation it's enough, because
+sctp_inet_bind_verify() just returns with sctp_v4_available().
+However sctp_inet6_bind_verify() has an extra check before that
+for link-local scope_id, which won't respect the above options.
+
+Added the checks before calling ipv6_chk_addr(), but
+not before the validation of scope_id.
+
+before (w/ both options):
+ ./v6test fe80::10 sctp
+ bind failed, errno: 99 (Cannot assign requested address)
+ ./v6test fe80::10 tcp
+ bind success, errno: 0 (Success)
+
+after (w/ both options):
+ ./v6test fe80::10 sctp
+ bind success, errno: 0 (Success)
+
+Signed-off-by: Laszlo Toth <laszlth@gmail.com>
+Reviewed-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/ipv6.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/ipv6.c
++++ b/net/sctp/ipv6.c
+@@ -881,8 +881,10 @@ static int sctp_inet6_bind_verify(struct
+ net = sock_net(&opt->inet.sk);
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id);
+- if (!dev ||
+- !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) {
++ if (!dev || !(opt->inet.freebind ||
++ net->ipv6.sysctl.ip_nonlocal_bind ||
++ ipv6_chk_addr(net, &addr->v6.sin6_addr,
++ dev, 0))) {
+ rcu_read_unlock();
+ return 0;
+ }
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sat, 28 Oct 2017 02:13:29 +0800
+Subject: sctp: reset owner sk for data chunks on out queues when migrating a sock
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit d04adf1b355181e737b6b1e23d801b07f0b7c4c0 ]
+
+Now when migrating sock to another one in sctp_sock_migrate(), it only
+resets owner sk for the data in receive queues, not the chunks on out
+queues.
+
+It would cause that data chunks length on the sock is not consistent
+with sk sk_wmem_alloc. When closing the sock or freeing these chunks,
+the old sk would never be freed, and the new sock may crash due to
+the overflow sk_wmem_alloc.
+
+syzbot found this issue with this series:
+
+ r0 = socket$inet_sctp()
+ sendto$inet(r0)
+ listen(r0)
+ accept4(r0)
+ close(r0)
+
+Although listen() should have returned error when one TCP-style socket
+is in connecting (I may fix this one in another patch), it could also
+be reproduced by peeling off an assoc.
+
+This issue is there since very beginning.
+
+This patch is to reset owner sk for the chunks on out queues so that
+sk sk_wmem_alloc has correct value after accept one sock or peeloff
+an assoc to one sock.
+
+Note that when resetting owner sk for chunks on outqueue, it has to
+sctp_clear_owner_w/skb_orphan chunks before changing assoc->base.sk
+first and then sctp_set_owner_w them after changing assoc->base.sk,
+due to that sctp_wfree and it's callees are using assoc->base.sk.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c | 32 ++++++++++++++++++++++++++++++++
+ 1 file changed, 32 insertions(+)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -168,6 +168,36 @@ static inline void sctp_set_owner_w(stru
+ sk_mem_charge(sk, chunk->skb->truesize);
+ }
+
++static void sctp_clear_owner_w(struct sctp_chunk *chunk)
++{
++ skb_orphan(chunk->skb);
++}
++
++static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
++ void (*cb)(struct sctp_chunk *))
++
++{
++ struct sctp_outq *q = &asoc->outqueue;
++ struct sctp_transport *t;
++ struct sctp_chunk *chunk;
++
++ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
++ list_for_each_entry(chunk, &t->transmitted, transmitted_list)
++ cb(chunk);
++
++ list_for_each_entry(chunk, &q->retransmit, list)
++ cb(chunk);
++
++ list_for_each_entry(chunk, &q->sacked, list)
++ cb(chunk);
++
++ list_for_each_entry(chunk, &q->abandoned, list)
++ cb(chunk);
++
++ list_for_each_entry(chunk, &q->out_chunk_list, list)
++ cb(chunk);
++}
++
+ /* Verify that this is a valid address. */
+ static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
+ int len)
+@@ -7826,7 +7856,9 @@ static void sctp_sock_migrate(struct soc
+ * paths won't try to lock it and then oldsk.
+ */
+ lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
++ sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w);
+ sctp_assoc_migrate(assoc, newsk);
++ sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w);
+
+ /* If the association on the newsk is already closed before accept()
+ * is called, set RCV_SHUTDOWN flag.
--- /dev/null
+gso-fix-payload-length-when-gso_size-is-zero.patch
+tun-tap-sanitize-tunsetsndbuf-input.patch
+ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch
+netlink-do-not-set-cb_running-if-dump-s-start-errs.patch
+net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch
+tcp-fix-tcp_mtu_probe-vs-highest_sack.patch
+l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch
+tun-call-dev_get_valid_name-before-register_netdevice.patch
+sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch
+tcp-dccp-fix-ireq-opt-races.patch
+packet-avoid-panic-in-packet_getsockopt.patch
+soreuseport-fix-initialization-race.patch
+ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch
+sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch
+tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch
+tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch
+net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch
+tap-double-free-in-error-path-in-tap_open.patch
+ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch
+ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch
+ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch
+tun-allow-positive-return-values-on-dev_get_valid_name-call.patch
+sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch
+net_sched-avoid-matching-qdisc-with-zero-handle.patch
+ppp-fix-race-in-ppp-device-destruction.patch
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Craig Gallek <kraig@google.com>
+Date: Thu, 19 Oct 2017 15:00:29 -0400
+Subject: soreuseport: fix initialization race
+
+From: Craig Gallek <kraig@google.com>
+
+
+[ Upstream commit 1b5f962e71bfad6284574655c406597535c3ea7a ]
+
+Syzkaller stumbled upon a way to trigger
+WARNING: CPU: 1 PID: 13881 at net/core/sock_reuseport.c:41
+reuseport_alloc+0x306/0x3b0 net/core/sock_reuseport.c:39
+
+There are two initialization paths for the sock_reuseport structure in a
+socket: Through the udp/tcp bind paths of SO_REUSEPORT sockets or through
+SO_ATTACH_REUSEPORT_[CE]BPF before bind. The existing implementation
+assumedthat the socket lock protected both of these paths when it actually
+only protects the SO_ATTACH_REUSEPORT path. Syzkaller triggered this
+double allocation by running these paths concurrently.
+
+This patch moves the check for double allocation into the reuseport_alloc
+function which is protected by a global spin lock.
+
+Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
+Fixes: c125e80b8868 ("soreuseport: fast reuseport TCP socket selection")
+Signed-off-by: Craig Gallek <kraig@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_reuseport.c | 12 +++++++++---
+ net/ipv4/inet_hashtables.c | 5 +----
+ net/ipv4/udp.c | 5 +----
+ 3 files changed, 11 insertions(+), 11 deletions(-)
+
+--- a/net/core/sock_reuseport.c
++++ b/net/core/sock_reuseport.c
+@@ -36,9 +36,14 @@ int reuseport_alloc(struct sock *sk)
+ * soft irq of receive path or setsockopt from process context
+ */
+ spin_lock_bh(&reuseport_lock);
+- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
+- lockdep_is_held(&reuseport_lock)),
+- "multiple allocations for the same socket");
++
++ /* Allocation attempts can occur concurrently via the setsockopt path
++ * and the bind/hash path. Nothing to do when we lose the race.
++ */
++ if (rcu_dereference_protected(sk->sk_reuseport_cb,
++ lockdep_is_held(&reuseport_lock)))
++ goto out;
++
+ reuse = __reuseport_alloc(INIT_SOCKS);
+ if (!reuse) {
+ spin_unlock_bh(&reuseport_lock);
+@@ -49,6 +54,7 @@ int reuseport_alloc(struct sock *sk)
+ reuse->num_socks = 1;
+ rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+
++out:
+ spin_unlock_bh(&reuseport_lock);
+
+ return 0;
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -455,10 +455,7 @@ static int inet_reuseport_add_sock(struc
+ return reuseport_add_sock(sk, sk2);
+ }
+
+- /* Initial allocation may have already happened via setsockopt */
+- if (!rcu_access_pointer(sk->sk_reuseport_cb))
+- return reuseport_alloc(sk);
+- return 0;
++ return reuseport_alloc(sk);
+ }
+
+ int __inet_hash(struct sock *sk, struct sock *osk,
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -222,10 +222,7 @@ static int udp_reuseport_add_sock(struct
+ }
+ }
+
+- /* Initial allocation may have already happened via setsockopt */
+- if (!rcu_access_pointer(sk->sk_reuseport_cb))
+- return reuseport_alloc(sk);
+- return 0;
++ return reuseport_alloc(sk);
+ }
+
+ /**
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Girish Moodalbail <girish.moodalbail@oracle.com>
+Date: Wed, 25 Oct 2017 00:23:04 -0700
+Subject: tap: double-free in error path in tap_open()
+
+From: Girish Moodalbail <girish.moodalbail@oracle.com>
+
+
+[ Upstream commit 78e0ea6791d7baafb8a0ca82b1bd0c7b3453c919 ]
+
+Double free of skb_array in tap module is causing kernel panic. When
+tap_set_queue() fails we free skb_array right away by calling
+skb_array_cleanup(). However, later on skb_array_cleanup() is called
+again by tap_sock_destruct through sock_put(). This patch fixes that
+issue.
+
+Fixes: 362899b8725b35e3 (macvtap: switch to use skb array)
+Signed-off-by: Girish Moodalbail <girish.moodalbail@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macvtap.c | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/macvtap.c
++++ b/drivers/net/macvtap.c
+@@ -559,6 +559,10 @@ static int macvtap_open(struct inode *in
+ &macvtap_proto, 0);
+ if (!q)
+ goto err;
++ if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL)) {
++ sk_free(&q->sk);
++ goto err;
++ }
+
+ RCU_INIT_POINTER(q->sock.wq, &q->wq);
+ init_waitqueue_head(&q->wq.wait);
+@@ -582,22 +586,18 @@ static int macvtap_open(struct inode *in
+ if ((dev->features & NETIF_F_HIGHDMA) && (dev->features & NETIF_F_SG))
+ sock_set_flag(&q->sk, SOCK_ZEROCOPY);
+
+- err = -ENOMEM;
+- if (skb_array_init(&q->skb_array, dev->tx_queue_len, GFP_KERNEL))
+- goto err_array;
+-
+ err = macvtap_set_queue(dev, file, q);
+- if (err)
+- goto err_queue;
++ if (err) {
++ /* macvtap_sock_destruct() will take care of freeing skb_array */
++ goto err_put;
++ }
+
+ dev_put(dev);
+
+ rtnl_unlock();
+ return err;
+
+-err_queue:
+- skb_array_cleanup(&q->skb_array);
+-err_array:
++err_put:
+ sock_put(&q->sk);
+ err:
+ if (dev)
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Oct 2017 09:04:13 -0700
+Subject: tcp/dccp: fix ireq->opt races
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit c92e8c02fe664155ac4234516e32544bec0f113d ]
+
+syzkaller found another bug in DCCP/TCP stacks [1]
+
+For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix
+ireq->pktopts race"), we need to make sure we do not access
+ireq->opt unless we own the request sock.
+
+Note the opt field is renamed to ireq_opt to ease grep games.
+
+[1]
+BUG: KASAN: use-after-free in ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474
+Read of size 1 at addr ffff8801c951039c by task syz-executor5/3295
+
+CPU: 1 PID: 3295 Comm: syz-executor5 Not tainted 4.14.0-rc4+ #80
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:16 [inline]
+ dump_stack+0x194/0x257 lib/dump_stack.c:52
+ print_address_description+0x73/0x250 mm/kasan/report.c:252
+ kasan_report_error mm/kasan/report.c:351 [inline]
+ kasan_report+0x25b/0x340 mm/kasan/report.c:409
+ __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:427
+ ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474
+ tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1135
+ tcp_send_ack.part.37+0x3bb/0x650 net/ipv4/tcp_output.c:3587
+ tcp_send_ack+0x49/0x60 net/ipv4/tcp_output.c:3557
+ __tcp_ack_snd_check+0x2c6/0x4b0 net/ipv4/tcp_input.c:5072
+ tcp_ack_snd_check net/ipv4/tcp_input.c:5085 [inline]
+ tcp_rcv_state_process+0x2eff/0x4850 net/ipv4/tcp_input.c:6071
+ tcp_child_process+0x342/0x990 net/ipv4/tcp_minisocks.c:816
+ tcp_v4_rcv+0x1827/0x2f80 net/ipv4/tcp_ipv4.c:1682
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:464 [inline]
+ ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
+ __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
+ netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
+ netif_receive_skb+0xae/0x390 net/core/dev.c:4611
+ tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
+ tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
+ tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
+ call_write_iter include/linux/fs.h:1770 [inline]
+ new_sync_write fs/read_write.c:468 [inline]
+ __vfs_write+0x68a/0x970 fs/read_write.c:481
+ vfs_write+0x18f/0x510 fs/read_write.c:543
+ SYSC_write fs/read_write.c:588 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:580
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+RIP: 0033:0x40c341
+RSP: 002b:00007f469523ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 000000000040c341
+RDX: 0000000000000037 RSI: 0000000020004000 RDI: 0000000000000015
+RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000
+R10: 00000000000f4240 R11: 0000000000000293 R12: 00000000004b7fd1
+R13: 00000000ffffffff R14: 0000000020000000 R15: 0000000000025000
+
+Allocated by task 3295:
+ save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+ set_track mm/kasan/kasan.c:459 [inline]
+ kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
+ __do_kmalloc mm/slab.c:3725 [inline]
+ __kmalloc+0x162/0x760 mm/slab.c:3734
+ kmalloc include/linux/slab.h:498 [inline]
+ tcp_v4_save_options include/net/tcp.h:1962 [inline]
+ tcp_v4_init_req+0x2d3/0x3e0 net/ipv4/tcp_ipv4.c:1271
+ tcp_conn_request+0xf6d/0x3410 net/ipv4/tcp_input.c:6283
+ tcp_v4_conn_request+0x157/0x210 net/ipv4/tcp_ipv4.c:1313
+ tcp_rcv_state_process+0x8ea/0x4850 net/ipv4/tcp_input.c:5857
+ tcp_v4_do_rcv+0x55c/0x7d0 net/ipv4/tcp_ipv4.c:1482
+ tcp_v4_rcv+0x2d10/0x2f80 net/ipv4/tcp_ipv4.c:1711
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:464 [inline]
+ ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
+ __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
+ netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
+ netif_receive_skb+0xae/0x390 net/core/dev.c:4611
+ tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
+ tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
+ tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
+ call_write_iter include/linux/fs.h:1770 [inline]
+ new_sync_write fs/read_write.c:468 [inline]
+ __vfs_write+0x68a/0x970 fs/read_write.c:481
+ vfs_write+0x18f/0x510 fs/read_write.c:543
+ SYSC_write fs/read_write.c:588 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:580
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+Freed by task 3306:
+ save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+ set_track mm/kasan/kasan.c:459 [inline]
+ kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
+ __cache_free mm/slab.c:3503 [inline]
+ kfree+0xca/0x250 mm/slab.c:3820
+ inet_sock_destruct+0x59d/0x950 net/ipv4/af_inet.c:157
+ __sk_destruct+0xfd/0x910 net/core/sock.c:1560
+ sk_destruct+0x47/0x80 net/core/sock.c:1595
+ __sk_free+0x57/0x230 net/core/sock.c:1603
+ sk_free+0x2a/0x40 net/core/sock.c:1614
+ sock_put include/net/sock.h:1652 [inline]
+ inet_csk_complete_hashdance+0xd5/0xf0 net/ipv4/inet_connection_sock.c:959
+ tcp_check_req+0xf4d/0x1620 net/ipv4/tcp_minisocks.c:765
+ tcp_v4_rcv+0x17f6/0x2f80 net/ipv4/tcp_ipv4.c:1675
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:464 [inline]
+ ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
+ __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
+ netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
+ netif_receive_skb+0xae/0x390 net/core/dev.c:4611
+ tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
+ tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
+ tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
+ call_write_iter include/linux/fs.h:1770 [inline]
+ new_sync_write fs/read_write.c:468 [inline]
+ __vfs_write+0x68a/0x970 fs/read_write.c:481
+ vfs_write+0x18f/0x510 fs/read_write.c:543
+ SYSC_write fs/read_write.c:588 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:580
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
+Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_sock.h | 2 +-
+ net/dccp/ipv4.c | 13 ++++++++-----
+ net/ipv4/cipso_ipv4.c | 24 +++++++-----------------
+ net/ipv4/inet_connection_sock.c | 8 +++-----
+ net/ipv4/syncookies.c | 2 +-
+ net/ipv4/tcp_input.c | 2 +-
+ net/ipv4/tcp_ipv4.c | 21 ++++++++++++---------
+ 7 files changed, 33 insertions(+), 39 deletions(-)
+
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -96,7 +96,7 @@ struct inet_request_sock {
+ kmemcheck_bitfield_end(flags);
+ u32 ir_mark;
+ union {
+- struct ip_options_rcu *opt;
++ struct ip_options_rcu __rcu *ireq_opt;
+ #if IS_ENABLED(CONFIG_IPV6)
+ struct {
+ struct ipv6_txoptions *ipv6_opt;
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(c
+ sk_daddr_set(newsk, ireq->ir_rmt_addr);
+ sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
+ newinet->inet_saddr = ireq->ir_loc_addr;
+- newinet->inet_opt = ireq->opt;
+- ireq->opt = NULL;
++ RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
+ newinet->mc_index = inet_iif(skb);
+ newinet->mc_ttl = ip_hdr(skb)->ttl;
+ newinet->inet_id = jiffies;
+@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(c
+ if (__inet_inherit_port(sk, newsk) < 0)
+ goto put_and_exit;
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+-
++ if (*own_req)
++ ireq->ireq_opt = NULL;
++ else
++ newinet->inet_opt = NULL;
+ return newsk;
+
+ exit_overflow:
+@@ -441,6 +443,7 @@ exit:
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
+ return NULL;
+ put_and_exit:
++ newinet->inet_opt = NULL;
+ inet_csk_prepare_forced_close(newsk);
+ dccp_done(newsk);
+ goto exit;
+@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const s
+ ireq->ir_rmt_addr);
+ err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr,
+- ireq->opt);
++ rcu_dereference(ireq->ireq_opt));
+ err = net_xmit_eval(err);
+ }
+
+@@ -548,7 +551,7 @@ out:
+ static void dccp_v4_reqsk_destructor(struct request_sock *req)
+ {
+ dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
+- kfree(inet_rsk(req)->opt);
++ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
+ }
+
+ void dccp_syn_ack_timeout(const struct request_sock *req)
+--- a/net/ipv4/cipso_ipv4.c
++++ b/net/ipv4/cipso_ipv4.c
+@@ -1943,7 +1943,7 @@ int cipso_v4_req_setattr(struct request_
+ buf = NULL;
+
+ req_inet = inet_rsk(req);
+- opt = xchg(&req_inet->opt, opt);
++ opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
+ if (opt)
+ kfree_rcu(opt, rcu);
+
+@@ -1965,11 +1965,13 @@ req_setattr_failure:
+ * values on failure.
+ *
+ */
+-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
++static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
+ {
++ struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
+ int hdr_delta = 0;
+- struct ip_options_rcu *opt = *opt_ptr;
+
++ if (!opt || opt->opt.cipso == 0)
++ return 0;
+ if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
+ u8 cipso_len;
+ u8 cipso_off;
+@@ -2031,14 +2033,10 @@ static int cipso_v4_delopt(struct ip_opt
+ */
+ void cipso_v4_sock_delattr(struct sock *sk)
+ {
+- int hdr_delta;
+- struct ip_options_rcu *opt;
+ struct inet_sock *sk_inet;
++ int hdr_delta;
+
+ sk_inet = inet_sk(sk);
+- opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
+- if (!opt || opt->opt.cipso == 0)
+- return;
+
+ hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
+ if (sk_inet->is_icsk && hdr_delta > 0) {
+@@ -2058,15 +2056,7 @@ void cipso_v4_sock_delattr(struct sock *
+ */
+ void cipso_v4_req_delattr(struct request_sock *req)
+ {
+- struct ip_options_rcu *opt;
+- struct inet_request_sock *req_inet;
+-
+- req_inet = inet_rsk(req);
+- opt = req_inet->opt;
+- if (!opt || opt->opt.cipso == 0)
+- return;
+-
+- cipso_v4_delopt(&req_inet->opt);
++ cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
+ }
+
+ /**
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -407,9 +407,10 @@ struct dst_entry *inet_csk_route_req(con
+ {
+ const struct inet_request_sock *ireq = inet_rsk(req);
+ struct net *net = read_pnet(&ireq->ireq_net);
+- struct ip_options_rcu *opt = ireq->opt;
++ struct ip_options_rcu *opt;
+ struct rtable *rt;
+
++ opt = rcu_dereference(ireq->ireq_opt);
+ flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+ RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ sk->sk_protocol, inet_sk_flowi_flags(sk),
+@@ -443,10 +444,9 @@ struct dst_entry *inet_csk_route_child_s
+ struct flowi4 *fl4;
+ struct rtable *rt;
+
++ opt = rcu_dereference(ireq->ireq_opt);
+ fl4 = &newinet->cork.fl.u.ip4;
+
+- rcu_read_lock();
+- opt = rcu_dereference(newinet->inet_opt);
+ flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+ RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ sk->sk_protocol, inet_sk_flowi_flags(sk),
+@@ -459,13 +459,11 @@ struct dst_entry *inet_csk_route_child_s
+ goto no_route;
+ if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
+ goto route_err;
+- rcu_read_unlock();
+ return &rt->dst;
+
+ route_err:
+ ip_rt_put(rt);
+ no_route:
+- rcu_read_unlock();
+ __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
+ return NULL;
+ }
+--- a/net/ipv4/syncookies.c
++++ b/net/ipv4/syncookies.c
+@@ -354,7 +354,7 @@ struct sock *cookie_v4_check(struct sock
+ /* We throwed the options of the initial SYN away, so we hope
+ * the ACK carries the same options again (see RFC1122 4.2.3.8)
+ */
+- ireq->opt = tcp_v4_save_options(skb);
++ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
+
+ if (security_inet_conn_request(sk, skb, req)) {
+ reqsk_free(req);
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -6237,7 +6237,7 @@ struct request_sock *inet_reqsk_alloc(co
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ kmemcheck_annotate_bitfield(ireq, flags);
+- ireq->opt = NULL;
++ ireq->ireq_opt = NULL;
+ #if IS_ENABLED(CONFIG_IPV6)
+ ireq->pktopts = NULL;
+ #endif
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -861,7 +861,7 @@ static int tcp_v4_send_synack(const stru
+
+ err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr,
+- ireq->opt);
++ rcu_dereference(ireq->ireq_opt));
+ err = net_xmit_eval(err);
+ }
+
+@@ -873,7 +873,7 @@ static int tcp_v4_send_synack(const stru
+ */
+ static void tcp_v4_reqsk_destructor(struct request_sock *req)
+ {
+- kfree(inet_rsk(req)->opt);
++ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
+ }
+
+ #ifdef CONFIG_TCP_MD5SIG
+@@ -1199,7 +1199,7 @@ static void tcp_v4_init_req(struct reque
+
+ sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
+ sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
+- ireq->opt = tcp_v4_save_options(skb);
++ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
+ }
+
+ static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
+@@ -1295,10 +1295,9 @@ struct sock *tcp_v4_syn_recv_sock(const
+ sk_daddr_set(newsk, ireq->ir_rmt_addr);
+ sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
+ newsk->sk_bound_dev_if = ireq->ir_iif;
+- newinet->inet_saddr = ireq->ir_loc_addr;
+- inet_opt = ireq->opt;
+- rcu_assign_pointer(newinet->inet_opt, inet_opt);
+- ireq->opt = NULL;
++ newinet->inet_saddr = ireq->ir_loc_addr;
++ inet_opt = rcu_dereference(ireq->ireq_opt);
++ RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
+ newinet->mc_index = inet_iif(skb);
+ newinet->mc_ttl = ip_hdr(skb)->ttl;
+ newinet->rcv_tos = ip_hdr(skb)->tos;
+@@ -1346,9 +1345,12 @@ struct sock *tcp_v4_syn_recv_sock(const
+ if (__inet_inherit_port(sk, newsk) < 0)
+ goto put_and_exit;
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+- if (*own_req)
++ if (likely(*own_req)) {
+ tcp_move_syn(newtp, req);
+-
++ ireq->ireq_opt = NULL;
++ } else {
++ newinet->inet_opt = NULL;
++ }
+ return newsk;
+
+ exit_overflow:
+@@ -1359,6 +1361,7 @@ exit:
+ tcp_listendrop(sk);
+ return NULL;
+ put_and_exit:
++ newinet->inet_opt = NULL;
+ inet_csk_prepare_forced_close(newsk);
+ tcp_done(newsk);
+ goto exit;
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 22 Oct 2017 12:33:57 -0700
+Subject: tcp/dccp: fix lockdep splat in inet_csk_route_req()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit a6ca7abe53633d08eea1c6756cb49c9b2d4c90bf ]
+
+This patch fixes the following lockdep splat in inet_csk_route_req()
+
+ lockdep_rcu_suspicious
+ inet_csk_route_req
+ tcp_v4_send_synack
+ tcp_rtx_synack
+ inet_rtx_syn_ack
+ tcp_fastopen_synack_time
+ tcp_retransmit_timer
+ tcp_write_timer_handler
+ tcp_write_timer
+ call_timer_fn
+
+Thread running inet_csk_route_req() owns a reference on the request
+socket, so we have the guarantee ireq->ireq_opt wont be changed or
+freed.
+
+lockdep can enforce this invariant for us.
+
+Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_connection_sock.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -410,7 +410,8 @@ struct dst_entry *inet_csk_route_req(con
+ struct ip_options_rcu *opt;
+ struct rtable *rt;
+
+- opt = rcu_dereference(ireq->ireq_opt);
++ opt = rcu_dereference_protected(ireq->ireq_opt,
++ atomic_read(&req->rsk_refcnt) > 0);
+ flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+ RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ sk->sk_protocol, inet_sk_flowi_flags(sk),
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 24 Oct 2017 08:20:31 -0700
+Subject: tcp/dccp: fix other lockdep splats accessing ireq_opt
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 06f877d613be3621604c2520ec0351d9fbdca15f ]
+
+In my first attempt to fix the lockdep splat, I forgot we could
+enter inet_csk_route_req() with a freshly allocated request socket,
+for which refcount has not yet been elevated, due to complex
+SLAB_TYPESAFE_BY_RCU rules.
+
+We either are in rcu_read_lock() section _or_ we own a refcount on the
+request.
+
+Correct RCU verb to use here is rcu_dereference_check(), although it is
+not possible to prove we actually own a reference on a shared
+refcount :/
+
+In v2, I added ireq_opt_deref() helper and use in three places, to fix other
+possible splats.
+
+[ 49.844590] lockdep_rcu_suspicious+0xea/0xf3
+[ 49.846487] inet_csk_route_req+0x53/0x14d
+[ 49.848334] tcp_v4_route_req+0xe/0x10
+[ 49.850174] tcp_conn_request+0x31c/0x6a0
+[ 49.851992] ? __lock_acquire+0x614/0x822
+[ 49.854015] tcp_v4_conn_request+0x5a/0x79
+[ 49.855957] ? tcp_v4_conn_request+0x5a/0x79
+[ 49.858052] tcp_rcv_state_process+0x98/0xdcc
+[ 49.859990] ? sk_filter_trim_cap+0x2f6/0x307
+[ 49.862085] tcp_v4_do_rcv+0xfc/0x145
+[ 49.864055] ? tcp_v4_do_rcv+0xfc/0x145
+[ 49.866173] tcp_v4_rcv+0x5ab/0xaf9
+[ 49.868029] ip_local_deliver_finish+0x1af/0x2e7
+[ 49.870064] ip_local_deliver+0x1b2/0x1c5
+[ 49.871775] ? inet_del_offload+0x45/0x45
+[ 49.873916] ip_rcv_finish+0x3f7/0x471
+[ 49.875476] ip_rcv+0x3f1/0x42f
+[ 49.876991] ? ip_local_deliver_finish+0x2e7/0x2e7
+[ 49.878791] __netif_receive_skb_core+0x6d3/0x950
+[ 49.880701] ? process_backlog+0x7e/0x216
+[ 49.882589] __netif_receive_skb+0x1d/0x5e
+[ 49.884122] process_backlog+0x10c/0x216
+[ 49.885812] net_rx_action+0x147/0x3df
+
+Fixes: a6ca7abe53633 ("tcp/dccp: fix lockdep splat in inet_csk_route_req()")
+Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: kernel test robot <fengguang.wu@intel.com>
+Reported-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_sock.h | 6 ++++++
+ net/dccp/ipv4.c | 2 +-
+ net/ipv4/inet_connection_sock.c | 4 ++--
+ net/ipv4/tcp_ipv4.c | 2 +-
+ 4 files changed, 10 insertions(+), 4 deletions(-)
+
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -132,6 +132,12 @@ static inline int inet_request_bound_dev
+ return sk->sk_bound_dev_if;
+ }
+
++static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq)
++{
++ return rcu_dereference_check(ireq->ireq_opt,
++ atomic_read(&ireq->req.rsk_refcnt) > 0);
++}
++
+ struct inet_cork {
+ unsigned int flags;
+ __be32 addr;
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -495,7 +495,7 @@ static int dccp_v4_send_response(const s
+ ireq->ir_rmt_addr);
+ err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr,
+- rcu_dereference(ireq->ireq_opt));
++ ireq_opt_deref(ireq));
+ err = net_xmit_eval(err);
+ }
+
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -410,8 +410,8 @@ struct dst_entry *inet_csk_route_req(con
+ struct ip_options_rcu *opt;
+ struct rtable *rt;
+
+- opt = rcu_dereference_protected(ireq->ireq_opt,
+- atomic_read(&req->rsk_refcnt) > 0);
++ opt = ireq_opt_deref(ireq);
++
+ flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+ RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ sk->sk_protocol, inet_sk_flowi_flags(sk),
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -861,7 +861,7 @@ static int tcp_v4_send_synack(const stru
+
+ err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr,
+- rcu_dereference(ireq->ireq_opt));
++ ireq_opt_deref(ireq));
+ err = net_xmit_eval(err);
+ }
+
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 30 Oct 2017 23:08:20 -0700
+Subject: tcp: fix tcp_mtu_probe() vs highest_sack
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 2b7cda9c35d3b940eb9ce74b30bbd5eb30db493d ]
+
+Based on SNMP values provided by Roman, Yuchung made the observation
+that some crashes in tcp_sacktag_walk() might be caused by MTU probing.
+
+Looking at tcp_mtu_probe(), I found that when a new skb was placed
+in front of the write queue, we were not updating tcp highest sack.
+
+If one skb is freed because all its content was copied to the new skb
+(for MTU probing), then tp->highest_sack could point to a now freed skb.
+
+Bad things would then happen, including infinite loops.
+
+This patch renames tcp_highest_sack_combine() and uses it
+from tcp_mtu_probe() to fix the bug.
+
+Note that I also removed one test against tp->sacked_out,
+since we want to replace tp->highest_sack regardless of whatever
+condition, since keeping a stale pointer to freed skb is a recipe
+for disaster.
+
+Fixes: a47e5a988a57 ("[TCP]: Convert highest_sack to sk_buff to allow direct access")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
+Reported-by: Roman Gushchin <guro@fb.com>
+Reported-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h | 6 +++---
+ net/ipv4/tcp_output.c | 3 ++-
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1681,12 +1681,12 @@ static inline void tcp_highest_sack_rese
+ tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk);
+ }
+
+-/* Called when old skb is about to be deleted (to be combined with new skb) */
+-static inline void tcp_highest_sack_combine(struct sock *sk,
++/* Called when old skb is about to be deleted and replaced by new skb */
++static inline void tcp_highest_sack_replace(struct sock *sk,
+ struct sk_buff *old,
+ struct sk_buff *new)
+ {
+- if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack))
++ if (old == tcp_highest_sack(sk))
+ tcp_sk(sk)->highest_sack = new;
+ }
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1996,6 +1996,7 @@ static int tcp_mtu_probe(struct sock *sk
+ nskb->ip_summed = skb->ip_summed;
+
+ tcp_insert_write_queue_before(nskb, skb, sk);
++ tcp_highest_sack_replace(sk, skb, nskb);
+
+ len = 0;
+ tcp_for_write_queue_from_safe(skb, next, sk) {
+@@ -2535,7 +2536,7 @@ static void tcp_collapse_retrans(struct
+
+ BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
+
+- tcp_highest_sack_combine(sk, next_skb, skb);
++ tcp_highest_sack_replace(sk, next_skb, skb);
+
+ tcp_unlink_write_queue(next_skb, sk);
+
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Julien Gomes <julien@arista.com>
+Date: Wed, 25 Oct 2017 11:50:50 -0700
+Subject: tun: allow positive return values on dev_get_valid_name() call
+
+From: Julien Gomes <julien@arista.com>
+
+
+[ Upstream commit 5c25f65fd1e42685f7ccd80e0621829c105785d9 ]
+
+If the name argument of dev_get_valid_name() contains "%d", it will try
+to assign it a unit number in __dev__alloc_name() and return either the
+unit number (>= 0) or an error code (< 0).
+Considering positive values as error values prevent tun device creations
+relying this mechanism, therefor we should only consider negative values
+as errors here.
+
+Signed-off-by: Julien Gomes <julien@arista.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1788,7 +1788,7 @@ static int tun_set_iff(struct net *net,
+ if (!dev)
+ return -ENOMEM;
+ err = dev_get_valid_name(net, dev, name);
+- if (err)
++ if (err < 0)
+ goto err_free_dev;
+
+ dev_net_set(dev, net);
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Fri, 13 Oct 2017 11:58:53 -0700
+Subject: tun: call dev_get_valid_name() before register_netdevice()
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit 0ad646c81b2182f7fa67ec0c8c825e0ee165696d ]
+
+register_netdevice() could fail early when we have an invalid
+dev name, in which case ->ndo_uninit() is not called. For tun
+device, this is a problem because a timer etc. are already
+initialized and it expects ->ndo_uninit() to clean them up.
+
+We could move these initializations into a ->ndo_init() so
+that register_netdevice() knows better, however this is still
+complicated due to the logic in tun_detach().
+
+Therefore, I choose to just call dev_get_valid_name() before
+register_netdevice(), which is quicker and much easier to audit.
+And for this specific case, it is already enough.
+
+Fixes: 96442e42429e ("tuntap: choose the txq based on rxq")
+Reported-by: Dmitry Alexeev <avekceeb@gmail.com>
+Cc: Jason Wang <jasowang@redhat.com>
+Cc: "Michael S. Tsirkin" <mst@redhat.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c | 3 +++
+ include/linux/netdevice.h | 3 +++
+ net/core/dev.c | 6 +++---
+ 3 files changed, 9 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1787,6 +1787,9 @@ static int tun_set_iff(struct net *net,
+
+ if (!dev)
+ return -ENOMEM;
++ err = dev_get_valid_name(net, dev, name);
++ if (err)
++ goto err_free_dev;
+
+ dev_net_set(dev, net);
+ dev->rtnl_link_ops = &tun_link_ops;
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -3742,6 +3742,9 @@ struct net_device *alloc_netdev_mqs(int
+ unsigned char name_assign_type,
+ void (*setup)(struct net_device *),
+ unsigned int txqs, unsigned int rxqs);
++int dev_get_valid_name(struct net *net, struct net_device *dev,
++ const char *name);
++
+ #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \
+ alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1)
+
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1115,9 +1115,8 @@ static int dev_alloc_name_ns(struct net
+ return ret;
+ }
+
+-static int dev_get_valid_name(struct net *net,
+- struct net_device *dev,
+- const char *name)
++int dev_get_valid_name(struct net *net, struct net_device *dev,
++ const char *name)
+ {
+ BUG_ON(!net);
+
+@@ -1133,6 +1132,7 @@ static int dev_get_valid_name(struct net
+
+ return 0;
+ }
++EXPORT_SYMBOL(dev_get_valid_name);
+
+ /**
+ * dev_change_name - change name of a device
--- /dev/null
+From foo@baz Wed Nov 15 17:24:03 CET 2017
+From: Craig Gallek <kraig@google.com>
+Date: Mon, 30 Oct 2017 18:50:11 -0400
+Subject: tun/tap: sanitize TUNSETSNDBUF input
+
+From: Craig Gallek <kraig@google.com>
+
+
+[ Upstream commit 93161922c658c714715686cd0cf69b090cb9bf1d ]
+
+Syzkaller found several variants of the lockup below by setting negative
+values with the TUNSETSNDBUF ioctl. This patch adds a sanity check
+to both the tun and tap versions of this ioctl.
+
+ watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [repro:2389]
+ Modules linked in:
+ irq event stamp: 329692056
+ hardirqs last enabled at (329692055): [<ffffffff824b8381>] _raw_spin_unlock_irqrestore+0x31/0x75
+ hardirqs last disabled at (329692056): [<ffffffff824b9e58>] apic_timer_interrupt+0x98/0xb0
+ softirqs last enabled at (35659740): [<ffffffff824bc958>] __do_softirq+0x328/0x48c
+ softirqs last disabled at (35659731): [<ffffffff811c796c>] irq_exit+0xbc/0xd0
+ CPU: 0 PID: 2389 Comm: repro Not tainted 4.14.0-rc7 #23
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+ task: ffff880009452140 task.stack: ffff880006a20000
+ RIP: 0010:_raw_spin_lock_irqsave+0x11/0x80
+ RSP: 0018:ffff880006a27c50 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff10
+ RAX: ffff880009ac68d0 RBX: ffff880006a27ce0 RCX: 0000000000000000
+ RDX: 0000000000000001 RSI: ffff880006a27ce0 RDI: ffff880009ac6900
+ RBP: ffff880006a27c60 R08: 0000000000000000 R09: 0000000000000000
+ R10: 0000000000000001 R11: 000000000063ff00 R12: ffff880009ac6900
+ R13: ffff880006a27cf8 R14: 0000000000000001 R15: ffff880006a27cf8
+ FS: 00007f4be4838700(0000) GS:ffff88000cc00000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000020101000 CR3: 0000000009616000 CR4: 00000000000006f0
+ Call Trace:
+ prepare_to_wait+0x26/0xc0
+ sock_alloc_send_pskb+0x14e/0x270
+ ? remove_wait_queue+0x60/0x60
+ tun_get_user+0x2cc/0x19d0
+ ? __tun_get+0x60/0x1b0
+ tun_chr_write_iter+0x57/0x86
+ __vfs_write+0x156/0x1e0
+ vfs_write+0xf7/0x230
+ SyS_write+0x57/0xd0
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+ RIP: 0033:0x7f4be4356df9
+ RSP: 002b:00007ffc18101c08 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
+ RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f4be4356df9
+ RDX: 0000000000000046 RSI: 0000000020101000 RDI: 0000000000000005
+ RBP: 00007ffc18101c40 R08: 0000000000000001 R09: 0000000000000001
+ R10: 0000000000000001 R11: 0000000000000293 R12: 0000559c75f64780
+ R13: 00007ffc18101d30 R14: 0000000000000000 R15: 0000000000000000
+
+Fixes: 33dccbb050bb ("tun: Limit amount of queued packets per device")
+Fixes: 20d29d7a916a ("net: macvtap driver")
+Signed-off-by: Craig Gallek <kraig@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macvtap.c | 2 ++
+ drivers/net/tun.c | 4 ++++
+ 2 files changed, 6 insertions(+)
+
+--- a/drivers/net/macvtap.c
++++ b/drivers/net/macvtap.c
+@@ -1077,6 +1077,8 @@ static long macvtap_ioctl(struct file *f
+ case TUNSETSNDBUF:
+ if (get_user(s, sp))
+ return -EFAULT;
++ if (s <= 0)
++ return -EINVAL;
+
+ q->sk.sk_sndbuf = s;
+ return 0;
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -2180,6 +2180,10 @@ static long __tun_chr_ioctl(struct file
+ ret = -EFAULT;
+ break;
+ }
++ if (sndbuf <= 0) {
++ ret = -EINVAL;
++ break;
++ }
+
+ tun->sndbuf = sndbuf;
+ tun_set_sndbuf(tun);