--- /dev/null
+From foo@baz Thu Nov 16 15:08:14 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 26 Oct 2017 19:19:56 +0800
+Subject: ipip: only increase err_count for some certain type icmp in ipip_err
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit f3594f0a7ea36661d7fd942facd7f31a64245f1a ]
+
+t->err_count is used to count the link failure on tunnel and an err
+will be reported to user socket in tx path if t->err_count is not 0.
+udp socket could even return EHOSTUNREACH to users.
+
+Since commit fd58156e456d ("IPIP: Use ip-tunneling code.") removed
+the 'switch check' for icmp type in ipip_err(), err_count would be
+increased by the icmp packet with ICMP_EXC_FRAGTIME code. an link
+failure would be reported out due to this.
+
+In Jianlin's case, when receiving ICMP_EXC_FRAGTIME a icmp packet,
+udp netperf failed with the err:
+ send_data: data send error: No route to host (errno 113)
+
+We expect this error reported from tunnel to socket when receiving
+some certain type icmp, but not ICMP_EXC_FRAGTIME, ICMP_SR_FAILED
+or ICMP_PARAMETERPROB ones.
+
+This patch is to bring 'switch check' for icmp type back to ipip_err
+so that it only reports link failure for the right type icmp, just as
+in ipgre_err() and ipip6_err().
+
+Fixes: fd58156e456d ("IPIP: Use ip-tunneling code.")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ipip.c | 58 ++++++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 42 insertions(+), 16 deletions(-)
+
+--- a/net/ipv4/ipip.c
++++ b/net/ipv4/ipip.c
+@@ -129,42 +129,68 @@ static struct rtnl_link_ops ipip_link_op
+ static int ipip_err(struct sk_buff *skb, u32 info)
+ {
+
+-/* All the routers (except for Linux) return only
+- 8 bytes of packet payload. It means, that precise relaying of
+- ICMP in the real Internet is absolutely infeasible.
+- */
++ /* All the routers (except for Linux) return only
++ 8 bytes of packet payload. It means, that precise relaying of
++ ICMP in the real Internet is absolutely infeasible.
++ */
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+ const struct iphdr *iph = (const struct iphdr *)skb->data;
+- struct ip_tunnel *t;
+- int err;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
++ struct ip_tunnel *t;
++ int err = 0;
++
++ switch (type) {
++ case ICMP_DEST_UNREACH:
++ switch (code) {
++ case ICMP_SR_FAILED:
++ /* Impossible event. */
++ goto out;
++ default:
++ /* All others are translated to HOST_UNREACH.
++ * rfc2003 contains "deep thoughts" about NET_UNREACH,
++ * I believe they are just ether pollution. --ANK
++ */
++ break;
++ }
++ break;
++
++ case ICMP_TIME_EXCEEDED:
++ if (code != ICMP_EXC_TTL)
++ goto out;
++ break;
++
++ case ICMP_REDIRECT:
++ break;
++
++ default:
++ goto out;
++ }
+
+- err = -ENOENT;
+ t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+ iph->daddr, iph->saddr, 0);
+- if (!t)
++ if (!t) {
++ err = -ENOENT;
+ goto out;
++ }
+
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+- ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+- t->parms.link, 0, IPPROTO_IPIP, 0);
+- err = 0;
++ ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
++ iph->protocol, 0);
+ goto out;
+ }
+
+ if (type == ICMP_REDIRECT) {
+- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
+- IPPROTO_IPIP, 0);
+- err = 0;
++ ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
+ goto out;
+ }
+
+- if (t->parms.iph.daddr == 0)
++ if (t->parms.iph.daddr == 0) {
++ err = -ENOENT;
+ goto out;
++ }
+
+- err = 0;
+ if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+ goto out;
+
--- /dev/null
+From foo@baz Thu Nov 16 15:08:14 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Oct 2017 09:04:13 -0700
+Subject: tcp/dccp: fix ireq->opt races
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit c92e8c02fe664155ac4234516e32544bec0f113d ]
+
+syzkaller found another bug in DCCP/TCP stacks [1]
+
+For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix
+ireq->pktopts race"), we need to make sure we do not access
+ireq->opt unless we own the request sock.
+
+Note the opt field is renamed to ireq_opt to ease grep games.
+
+[1]
+BUG: KASAN: use-after-free in ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474
+Read of size 1 at addr ffff8801c951039c by task syz-executor5/3295
+
+CPU: 1 PID: 3295 Comm: syz-executor5 Not tainted 4.14.0-rc4+ #80
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:16 [inline]
+ dump_stack+0x194/0x257 lib/dump_stack.c:52
+ print_address_description+0x73/0x250 mm/kasan/report.c:252
+ kasan_report_error mm/kasan/report.c:351 [inline]
+ kasan_report+0x25b/0x340 mm/kasan/report.c:409
+ __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:427
+ ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474
+ tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1135
+ tcp_send_ack.part.37+0x3bb/0x650 net/ipv4/tcp_output.c:3587
+ tcp_send_ack+0x49/0x60 net/ipv4/tcp_output.c:3557
+ __tcp_ack_snd_check+0x2c6/0x4b0 net/ipv4/tcp_input.c:5072
+ tcp_ack_snd_check net/ipv4/tcp_input.c:5085 [inline]
+ tcp_rcv_state_process+0x2eff/0x4850 net/ipv4/tcp_input.c:6071
+ tcp_child_process+0x342/0x990 net/ipv4/tcp_minisocks.c:816
+ tcp_v4_rcv+0x1827/0x2f80 net/ipv4/tcp_ipv4.c:1682
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:464 [inline]
+ ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
+ __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
+ netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
+ netif_receive_skb+0xae/0x390 net/core/dev.c:4611
+ tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
+ tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
+ tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
+ call_write_iter include/linux/fs.h:1770 [inline]
+ new_sync_write fs/read_write.c:468 [inline]
+ __vfs_write+0x68a/0x970 fs/read_write.c:481
+ vfs_write+0x18f/0x510 fs/read_write.c:543
+ SYSC_write fs/read_write.c:588 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:580
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+RIP: 0033:0x40c341
+RSP: 002b:00007f469523ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 000000000040c341
+RDX: 0000000000000037 RSI: 0000000020004000 RDI: 0000000000000015
+RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000
+R10: 00000000000f4240 R11: 0000000000000293 R12: 00000000004b7fd1
+R13: 00000000ffffffff R14: 0000000020000000 R15: 0000000000025000
+
+Allocated by task 3295:
+ save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+ set_track mm/kasan/kasan.c:459 [inline]
+ kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
+ __do_kmalloc mm/slab.c:3725 [inline]
+ __kmalloc+0x162/0x760 mm/slab.c:3734
+ kmalloc include/linux/slab.h:498 [inline]
+ tcp_v4_save_options include/net/tcp.h:1962 [inline]
+ tcp_v4_init_req+0x2d3/0x3e0 net/ipv4/tcp_ipv4.c:1271
+ tcp_conn_request+0xf6d/0x3410 net/ipv4/tcp_input.c:6283
+ tcp_v4_conn_request+0x157/0x210 net/ipv4/tcp_ipv4.c:1313
+ tcp_rcv_state_process+0x8ea/0x4850 net/ipv4/tcp_input.c:5857
+ tcp_v4_do_rcv+0x55c/0x7d0 net/ipv4/tcp_ipv4.c:1482
+ tcp_v4_rcv+0x2d10/0x2f80 net/ipv4/tcp_ipv4.c:1711
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:464 [inline]
+ ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
+ __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
+ netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
+ netif_receive_skb+0xae/0x390 net/core/dev.c:4611
+ tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
+ tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
+ tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
+ call_write_iter include/linux/fs.h:1770 [inline]
+ new_sync_write fs/read_write.c:468 [inline]
+ __vfs_write+0x68a/0x970 fs/read_write.c:481
+ vfs_write+0x18f/0x510 fs/read_write.c:543
+ SYSC_write fs/read_write.c:588 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:580
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+Freed by task 3306:
+ save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+ set_track mm/kasan/kasan.c:459 [inline]
+ kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
+ __cache_free mm/slab.c:3503 [inline]
+ kfree+0xca/0x250 mm/slab.c:3820
+ inet_sock_destruct+0x59d/0x950 net/ipv4/af_inet.c:157
+ __sk_destruct+0xfd/0x910 net/core/sock.c:1560
+ sk_destruct+0x47/0x80 net/core/sock.c:1595
+ __sk_free+0x57/0x230 net/core/sock.c:1603
+ sk_free+0x2a/0x40 net/core/sock.c:1614
+ sock_put include/net/sock.h:1652 [inline]
+ inet_csk_complete_hashdance+0xd5/0xf0 net/ipv4/inet_connection_sock.c:959
+ tcp_check_req+0xf4d/0x1620 net/ipv4/tcp_minisocks.c:765
+ tcp_v4_rcv+0x17f6/0x2f80 net/ipv4/tcp_ipv4.c:1675
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:464 [inline]
+ ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
+ __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
+ netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
+ netif_receive_skb+0xae/0x390 net/core/dev.c:4611
+ tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
+ tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
+ tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
+ call_write_iter include/linux/fs.h:1770 [inline]
+ new_sync_write fs/read_write.c:468 [inline]
+ __vfs_write+0x68a/0x970 fs/read_write.c:481
+ vfs_write+0x18f/0x510 fs/read_write.c:543
+ SYSC_write fs/read_write.c:588 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:580
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
+Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_sock.h | 2 +-
+ net/dccp/ipv4.c | 13 ++++++++-----
+ net/ipv4/cipso_ipv4.c | 24 +++++++-----------------
+ net/ipv4/inet_connection_sock.c | 8 +++-----
+ net/ipv4/syncookies.c | 2 +-
+ net/ipv4/tcp_input.c | 2 +-
+ net/ipv4/tcp_ipv4.c | 21 ++++++++++++---------
+ 7 files changed, 33 insertions(+), 39 deletions(-)
+
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -95,7 +95,7 @@ struct inet_request_sock {
+ kmemcheck_bitfield_end(flags);
+ u32 ir_mark;
+ union {
+- struct ip_options_rcu *opt;
++ struct ip_options_rcu __rcu *ireq_opt;
+ struct sk_buff *pktopts;
+ };
+ };
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(c
+ sk_daddr_set(newsk, ireq->ir_rmt_addr);
+ sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
+ newinet->inet_saddr = ireq->ir_loc_addr;
+- newinet->inet_opt = ireq->opt;
+- ireq->opt = NULL;
++ RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
+ newinet->mc_index = inet_iif(skb);
+ newinet->mc_ttl = ip_hdr(skb)->ttl;
+ newinet->inet_id = jiffies;
+@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(c
+ if (__inet_inherit_port(sk, newsk) < 0)
+ goto put_and_exit;
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+-
++ if (*own_req)
++ ireq->ireq_opt = NULL;
++ else
++ newinet->inet_opt = NULL;
+ return newsk;
+
+ exit_overflow:
+@@ -441,6 +443,7 @@ exit:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+ return NULL;
+ put_and_exit:
++ newinet->inet_opt = NULL;
+ inet_csk_prepare_forced_close(newsk);
+ dccp_done(newsk);
+ goto exit;
+@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const s
+ ireq->ir_rmt_addr);
+ err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr,
+- ireq->opt);
++ rcu_dereference(ireq->ireq_opt));
+ err = net_xmit_eval(err);
+ }
+
+@@ -546,7 +549,7 @@ out:
+ static void dccp_v4_reqsk_destructor(struct request_sock *req)
+ {
+ dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
+- kfree(inet_rsk(req)->opt);
++ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
+ }
+
+ void dccp_syn_ack_timeout(const struct request_sock *req)
+--- a/net/ipv4/cipso_ipv4.c
++++ b/net/ipv4/cipso_ipv4.c
+@@ -2012,7 +2012,7 @@ int cipso_v4_req_setattr(struct request_
+ buf = NULL;
+
+ req_inet = inet_rsk(req);
+- opt = xchg(&req_inet->opt, opt);
++ opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
+ if (opt)
+ kfree_rcu(opt, rcu);
+
+@@ -2034,11 +2034,13 @@ req_setattr_failure:
+ * values on failure.
+ *
+ */
+-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
++static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
+ {
++ struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
+ int hdr_delta = 0;
+- struct ip_options_rcu *opt = *opt_ptr;
+
++ if (!opt || opt->opt.cipso == 0)
++ return 0;
+ if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
+ u8 cipso_len;
+ u8 cipso_off;
+@@ -2100,14 +2102,10 @@ static int cipso_v4_delopt(struct ip_opt
+ */
+ void cipso_v4_sock_delattr(struct sock *sk)
+ {
+- int hdr_delta;
+- struct ip_options_rcu *opt;
+ struct inet_sock *sk_inet;
++ int hdr_delta;
+
+ sk_inet = inet_sk(sk);
+- opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
+- if (!opt || opt->opt.cipso == 0)
+- return;
+
+ hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
+ if (sk_inet->is_icsk && hdr_delta > 0) {
+@@ -2127,15 +2125,7 @@ void cipso_v4_sock_delattr(struct sock *
+ */
+ void cipso_v4_req_delattr(struct request_sock *req)
+ {
+- struct ip_options_rcu *opt;
+- struct inet_request_sock *req_inet;
+-
+- req_inet = inet_rsk(req);
+- opt = req_inet->opt;
+- if (!opt || opt->opt.cipso == 0)
+- return;
+-
+- cipso_v4_delopt(&req_inet->opt);
++ cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
+ }
+
+ /**
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -412,9 +412,10 @@ struct dst_entry *inet_csk_route_req(con
+ {
+ const struct inet_request_sock *ireq = inet_rsk(req);
+ struct net *net = read_pnet(&ireq->ireq_net);
+- struct ip_options_rcu *opt = ireq->opt;
++ struct ip_options_rcu *opt;
+ struct rtable *rt;
+
++ opt = rcu_dereference(ireq->ireq_opt);
+ flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+ RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ sk->sk_protocol, inet_sk_flowi_flags(sk),
+@@ -448,10 +449,9 @@ struct dst_entry *inet_csk_route_child_s
+ struct flowi4 *fl4;
+ struct rtable *rt;
+
++ opt = rcu_dereference(ireq->ireq_opt);
+ fl4 = &newinet->cork.fl.u.ip4;
+
+- rcu_read_lock();
+- opt = rcu_dereference(newinet->inet_opt);
+ flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+ RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ sk->sk_protocol, inet_sk_flowi_flags(sk),
+@@ -464,13 +464,11 @@ struct dst_entry *inet_csk_route_child_s
+ goto no_route;
+ if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
+ goto route_err;
+- rcu_read_unlock();
+ return &rt->dst;
+
+ route_err:
+ ip_rt_put(rt);
+ no_route:
+- rcu_read_unlock();
+ IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+ return NULL;
+ }
+--- a/net/ipv4/syncookies.c
++++ b/net/ipv4/syncookies.c
+@@ -357,7 +357,7 @@ struct sock *cookie_v4_check(struct sock
+ /* We throwed the options of the initial SYN away, so we hope
+ * the ACK carries the same options again (see RFC1122 4.2.3.8)
+ */
+- ireq->opt = tcp_v4_save_options(skb);
++ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
+
+ if (security_inet_conn_request(sk, skb, req)) {
+ reqsk_free(req);
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -6105,7 +6105,7 @@ struct request_sock *inet_reqsk_alloc(co
+ struct inet_request_sock *ireq = inet_rsk(req);
+
+ kmemcheck_annotate_bitfield(ireq, flags);
+- ireq->opt = NULL;
++ ireq->ireq_opt = NULL;
+ atomic64_set(&ireq->ir_cookie, 0);
+ ireq->ireq_state = TCP_NEW_SYN_RECV;
+ write_pnet(&ireq->ireq_net, sock_net(sk_listener));
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -856,7 +856,7 @@ static int tcp_v4_send_synack(const stru
+
+ err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr,
+- ireq->opt);
++ rcu_dereference(ireq->ireq_opt));
+ err = net_xmit_eval(err);
+ }
+
+@@ -868,7 +868,7 @@ static int tcp_v4_send_synack(const stru
+ */
+ static void tcp_v4_reqsk_destructor(struct request_sock *req)
+ {
+- kfree(inet_rsk(req)->opt);
++ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
+ }
+
+
+@@ -1197,7 +1197,7 @@ static void tcp_v4_init_req(struct reque
+ sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
+ sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
+ ireq->no_srccheck = inet_sk(sk_listener)->transparent;
+- ireq->opt = tcp_v4_save_options(skb);
++ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
+ }
+
+ static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
+@@ -1292,10 +1292,9 @@ struct sock *tcp_v4_syn_recv_sock(const
+ ireq = inet_rsk(req);
+ sk_daddr_set(newsk, ireq->ir_rmt_addr);
+ sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
+- newinet->inet_saddr = ireq->ir_loc_addr;
+- inet_opt = ireq->opt;
+- rcu_assign_pointer(newinet->inet_opt, inet_opt);
+- ireq->opt = NULL;
++ newinet->inet_saddr = ireq->ir_loc_addr;
++ inet_opt = rcu_dereference(ireq->ireq_opt);
++ RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
+ newinet->mc_index = inet_iif(skb);
+ newinet->mc_ttl = ip_hdr(skb)->ttl;
+ newinet->rcv_tos = ip_hdr(skb)->tos;
+@@ -1343,9 +1342,12 @@ struct sock *tcp_v4_syn_recv_sock(const
+ if (__inet_inherit_port(sk, newsk) < 0)
+ goto put_and_exit;
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+- if (*own_req)
++ if (likely(*own_req)) {
+ tcp_move_syn(newtp, req);
+-
++ ireq->ireq_opt = NULL;
++ } else {
++ newinet->inet_opt = NULL;
++ }
+ return newsk;
+
+ exit_overflow:
+@@ -1356,6 +1358,7 @@ exit:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+ return NULL;
+ put_and_exit:
++ newinet->inet_opt = NULL;
+ inet_csk_prepare_forced_close(newsk);
+ tcp_done(newsk);
+ goto exit;
--- /dev/null
+From foo@baz Thu Nov 16 15:08:14 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 24 Oct 2017 08:20:31 -0700
+Subject: tcp/dccp: fix other lockdep splats accessing ireq_opt
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 06f877d613be3621604c2520ec0351d9fbdca15f ]
+
+In my first attempt to fix the lockdep splat, I forgot we could
+enter inet_csk_route_req() with a freshly allocated request socket,
+for which refcount has not yet been elevated, due to complex
+SLAB_TYPESAFE_BY_RCU rules.
+
+We either are in rcu_read_lock() section _or_ we own a refcount on the
+request.
+
+Correct RCU verb to use here is rcu_dereference_check(), although it is
+not possible to prove we actually own a reference on a shared
+refcount :/
+
+In v2, I added ireq_opt_deref() helper and use in three places, to fix other
+possible splats.
+
+[ 49.844590] lockdep_rcu_suspicious+0xea/0xf3
+[ 49.846487] inet_csk_route_req+0x53/0x14d
+[ 49.848334] tcp_v4_route_req+0xe/0x10
+[ 49.850174] tcp_conn_request+0x31c/0x6a0
+[ 49.851992] ? __lock_acquire+0x614/0x822
+[ 49.854015] tcp_v4_conn_request+0x5a/0x79
+[ 49.855957] ? tcp_v4_conn_request+0x5a/0x79
+[ 49.858052] tcp_rcv_state_process+0x98/0xdcc
+[ 49.859990] ? sk_filter_trim_cap+0x2f6/0x307
+[ 49.862085] tcp_v4_do_rcv+0xfc/0x145
+[ 49.864055] ? tcp_v4_do_rcv+0xfc/0x145
+[ 49.866173] tcp_v4_rcv+0x5ab/0xaf9
+[ 49.868029] ip_local_deliver_finish+0x1af/0x2e7
+[ 49.870064] ip_local_deliver+0x1b2/0x1c5
+[ 49.871775] ? inet_del_offload+0x45/0x45
+[ 49.873916] ip_rcv_finish+0x3f7/0x471
+[ 49.875476] ip_rcv+0x3f1/0x42f
+[ 49.876991] ? ip_local_deliver_finish+0x2e7/0x2e7
+[ 49.878791] __netif_receive_skb_core+0x6d3/0x950
+[ 49.880701] ? process_backlog+0x7e/0x216
+[ 49.882589] __netif_receive_skb+0x1d/0x5e
+[ 49.884122] process_backlog+0x10c/0x216
+[ 49.885812] net_rx_action+0x147/0x3df
+
+Fixes: a6ca7abe53633 ("tcp/dccp: fix lockdep splat in inet_csk_route_req()")
+Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: kernel test robot <fengguang.wu@intel.com>
+Reported-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_sock.h | 6 ++++++
+ net/dccp/ipv4.c | 2 +-
+ net/ipv4/inet_connection_sock.c | 4 ++--
+ net/ipv4/tcp_ipv4.c | 2 +-
+ 4 files changed, 10 insertions(+), 4 deletions(-)
+
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -113,6 +113,12 @@ static inline u32 inet_request_mark(cons
+ return sk->sk_mark;
+ }
+
++static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq)
++{
++ return rcu_dereference_check(ireq->ireq_opt,
++ atomic_read(&ireq->req.rsk_refcnt) > 0);
++}
++
+ struct inet_cork {
+ unsigned int flags;
+ __be32 addr;
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -495,7 +495,7 @@ static int dccp_v4_send_response(const s
+ ireq->ir_rmt_addr);
+ err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr,
+- rcu_dereference(ireq->ireq_opt));
++ ireq_opt_deref(ireq));
+ err = net_xmit_eval(err);
+ }
+
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -415,8 +415,8 @@ struct dst_entry *inet_csk_route_req(con
+ struct ip_options_rcu *opt;
+ struct rtable *rt;
+
+- opt = rcu_dereference_protected(ireq->ireq_opt,
+- atomic_read(&req->rsk_refcnt) > 0);
++ opt = ireq_opt_deref(ireq);
++
+ flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+ RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ sk->sk_protocol, inet_sk_flowi_flags(sk),
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -856,7 +856,7 @@ static int tcp_v4_send_synack(const stru
+
+ err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+ ireq->ir_rmt_addr,
+- rcu_dereference(ireq->ireq_opt));
++ ireq_opt_deref(ireq));
+ err = net_xmit_eval(err);
+ }
+