From: Greg Kroah-Hartman Date: Thu, 16 Nov 2017 14:46:21 +0000 (+0100) Subject: 4.4-stable patches X-Git-Tag: v3.18.82~13 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=05402070cf37c8805bbd4905020ffe0b670798d1;p=thirdparty%2Fkernel%2Fstable-queue.git 4.4-stable patches added patches: ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch tcp-dccp-fix-ireq-opt-races.patch tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch --- diff --git a/queue-4.4/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch b/queue-4.4/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch new file mode 100644 index 00000000000..5bf71dc5a01 --- /dev/null +++ b/queue-4.4/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch @@ -0,0 +1,127 @@ +From foo@baz Thu Nov 16 15:08:14 CET 2017 +From: Xin Long +Date: Thu, 26 Oct 2017 19:19:56 +0800 +Subject: ipip: only increase err_count for some certain type icmp in ipip_err + +From: Xin Long + + +[ Upstream commit f3594f0a7ea36661d7fd942facd7f31a64245f1a ] + +t->err_count is used to count the link failure on tunnel and an err +will be reported to user socket in tx path if t->err_count is not 0. +udp socket could even return EHOSTUNREACH to users. + +Since commit fd58156e456d ("IPIP: Use ip-tunneling code.") removed +the 'switch check' for icmp type in ipip_err(), err_count would be +increased by the icmp packet with ICMP_EXC_FRAGTIME code. an link +failure would be reported out due to this. + +In Jianlin's case, when receiving ICMP_EXC_FRAGTIME a icmp packet, +udp netperf failed with the err: + send_data: data send error: No route to host (errno 113) + +We expect this error reported from tunnel to socket when receiving +some certain type icmp, but not ICMP_EXC_FRAGTIME, ICMP_SR_FAILED +or ICMP_PARAMETERPROB ones. + +This patch is to bring 'switch check' for icmp type back to ipip_err +so that it only reports link failure for the right type icmp, just as +in ipgre_err() and ipip6_err(). + +Fixes: fd58156e456d ("IPIP: Use ip-tunneling code.") +Reported-by: Jianlin Shi +Signed-off-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/ipip.c | 58 ++++++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 42 insertions(+), 16 deletions(-) + +--- a/net/ipv4/ipip.c ++++ b/net/ipv4/ipip.c +@@ -129,42 +129,68 @@ static struct rtnl_link_ops ipip_link_op + static int ipip_err(struct sk_buff *skb, u32 info) + { + +-/* All the routers (except for Linux) return only +- 8 bytes of packet payload. It means, that precise relaying of +- ICMP in the real Internet is absolutely infeasible. +- */ ++ /* All the routers (except for Linux) return only ++ 8 bytes of packet payload. It means, that precise relaying of ++ ICMP in the real Internet is absolutely infeasible. ++ */ + struct net *net = dev_net(skb->dev); + struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); + const struct iphdr *iph = (const struct iphdr *)skb->data; +- struct ip_tunnel *t; +- int err; + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; ++ struct ip_tunnel *t; ++ int err = 0; ++ ++ switch (type) { ++ case ICMP_DEST_UNREACH: ++ switch (code) { ++ case ICMP_SR_FAILED: ++ /* Impossible event. */ ++ goto out; ++ default: ++ /* All others are translated to HOST_UNREACH. ++ * rfc2003 contains "deep thoughts" about NET_UNREACH, ++ * I believe they are just ether pollution. --ANK ++ */ ++ break; ++ } ++ break; ++ ++ case ICMP_TIME_EXCEEDED: ++ if (code != ICMP_EXC_TTL) ++ goto out; ++ break; ++ ++ case ICMP_REDIRECT: ++ break; ++ ++ default: ++ goto out; ++ } + +- err = -ENOENT; + t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, + iph->daddr, iph->saddr, 0); +- if (!t) ++ if (!t) { ++ err = -ENOENT; + goto out; ++ } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { +- ipv4_update_pmtu(skb, dev_net(skb->dev), info, +- t->parms.link, 0, IPPROTO_IPIP, 0); +- err = 0; ++ ipv4_update_pmtu(skb, net, info, t->parms.link, 0, ++ iph->protocol, 0); + goto out; + } + + if (type == ICMP_REDIRECT) { +- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, +- IPPROTO_IPIP, 0); +- err = 0; ++ ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0); + goto out; + } + +- if (t->parms.iph.daddr == 0) ++ if (t->parms.iph.daddr == 0) { ++ err = -ENOENT; + goto out; ++ } + +- err = 0; + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) + goto out; + diff --git a/queue-4.4/series b/queue-4.4/series index 27d49fda240..88a8dc13b6f 100644 --- a/queue-4.4/series +++ b/queue-4.4/series @@ -16,3 +16,7 @@ ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch tun-allow-positive-return-values-on-dev_get_valid_name-call.patch sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch ppp-fix-race-in-ppp-device-destruction.patch +ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch +tcp-dccp-fix-ireq-opt-races.patch +tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch +tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch diff --git a/queue-4.4/tcp-dccp-fix-ireq-opt-races.patch b/queue-4.4/tcp-dccp-fix-ireq-opt-races.patch new file mode 100644 index 00000000000..b0129f2850b --- /dev/null +++ b/queue-4.4/tcp-dccp-fix-ireq-opt-races.patch @@ -0,0 +1,408 @@ +From foo@baz Thu Nov 16 15:08:14 CET 2017 +From: Eric Dumazet +Date: Fri, 20 Oct 2017 09:04:13 -0700 +Subject: tcp/dccp: fix ireq->opt races + +From: Eric Dumazet + + +[ Upstream commit c92e8c02fe664155ac4234516e32544bec0f113d ] + +syzkaller found another bug in DCCP/TCP stacks [1] + +For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix +ireq->pktopts race"), we need to make sure we do not access +ireq->opt unless we own the request sock. + +Note the opt field is renamed to ireq_opt to ease grep games. + +[1] +BUG: KASAN: use-after-free in ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474 +Read of size 1 at addr ffff8801c951039c by task syz-executor5/3295 + +CPU: 1 PID: 3295 Comm: syz-executor5 Not tainted 4.14.0-rc4+ #80 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Call Trace: + __dump_stack lib/dump_stack.c:16 [inline] + dump_stack+0x194/0x257 lib/dump_stack.c:52 + print_address_description+0x73/0x250 mm/kasan/report.c:252 + kasan_report_error mm/kasan/report.c:351 [inline] + kasan_report+0x25b/0x340 mm/kasan/report.c:409 + __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:427 + ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474 + tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1135 + tcp_send_ack.part.37+0x3bb/0x650 net/ipv4/tcp_output.c:3587 + tcp_send_ack+0x49/0x60 net/ipv4/tcp_output.c:3557 + __tcp_ack_snd_check+0x2c6/0x4b0 net/ipv4/tcp_input.c:5072 + tcp_ack_snd_check net/ipv4/tcp_input.c:5085 [inline] + tcp_rcv_state_process+0x2eff/0x4850 net/ipv4/tcp_input.c:6071 + tcp_child_process+0x342/0x990 net/ipv4/tcp_minisocks.c:816 + tcp_v4_rcv+0x1827/0x2f80 net/ipv4/tcp_ipv4.c:1682 + ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 + dst_input include/net/dst.h:464 [inline] + ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 + __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 + __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 + netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 + netif_receive_skb+0xae/0x390 net/core/dev.c:4611 + tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 + tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 + tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 + call_write_iter include/linux/fs.h:1770 [inline] + new_sync_write fs/read_write.c:468 [inline] + __vfs_write+0x68a/0x970 fs/read_write.c:481 + vfs_write+0x18f/0x510 fs/read_write.c:543 + SYSC_write fs/read_write.c:588 [inline] + SyS_write+0xef/0x220 fs/read_write.c:580 + entry_SYSCALL_64_fastpath+0x1f/0xbe +RIP: 0033:0x40c341 +RSP: 002b:00007f469523ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000001 +RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 000000000040c341 +RDX: 0000000000000037 RSI: 0000000020004000 RDI: 0000000000000015 +RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000 +R10: 00000000000f4240 R11: 0000000000000293 R12: 00000000004b7fd1 +R13: 00000000ffffffff R14: 0000000020000000 R15: 0000000000025000 + +Allocated by task 3295: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551 + __do_kmalloc mm/slab.c:3725 [inline] + __kmalloc+0x162/0x760 mm/slab.c:3734 + kmalloc include/linux/slab.h:498 [inline] + tcp_v4_save_options include/net/tcp.h:1962 [inline] + tcp_v4_init_req+0x2d3/0x3e0 net/ipv4/tcp_ipv4.c:1271 + tcp_conn_request+0xf6d/0x3410 net/ipv4/tcp_input.c:6283 + tcp_v4_conn_request+0x157/0x210 net/ipv4/tcp_ipv4.c:1313 + tcp_rcv_state_process+0x8ea/0x4850 net/ipv4/tcp_input.c:5857 + tcp_v4_do_rcv+0x55c/0x7d0 net/ipv4/tcp_ipv4.c:1482 + tcp_v4_rcv+0x2d10/0x2f80 net/ipv4/tcp_ipv4.c:1711 + ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 + dst_input include/net/dst.h:464 [inline] + ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 + __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 + __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 + netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 + netif_receive_skb+0xae/0x390 net/core/dev.c:4611 + tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 + tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 + tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 + call_write_iter include/linux/fs.h:1770 [inline] + new_sync_write fs/read_write.c:468 [inline] + __vfs_write+0x68a/0x970 fs/read_write.c:481 + vfs_write+0x18f/0x510 fs/read_write.c:543 + SYSC_write fs/read_write.c:588 [inline] + SyS_write+0xef/0x220 fs/read_write.c:580 + entry_SYSCALL_64_fastpath+0x1f/0xbe + +Freed by task 3306: + save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59 + save_stack+0x43/0xd0 mm/kasan/kasan.c:447 + set_track mm/kasan/kasan.c:459 [inline] + kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524 + __cache_free mm/slab.c:3503 [inline] + kfree+0xca/0x250 mm/slab.c:3820 + inet_sock_destruct+0x59d/0x950 net/ipv4/af_inet.c:157 + __sk_destruct+0xfd/0x910 net/core/sock.c:1560 + sk_destruct+0x47/0x80 net/core/sock.c:1595 + __sk_free+0x57/0x230 net/core/sock.c:1603 + sk_free+0x2a/0x40 net/core/sock.c:1614 + sock_put include/net/sock.h:1652 [inline] + inet_csk_complete_hashdance+0xd5/0xf0 net/ipv4/inet_connection_sock.c:959 + tcp_check_req+0xf4d/0x1620 net/ipv4/tcp_minisocks.c:765 + tcp_v4_rcv+0x17f6/0x2f80 net/ipv4/tcp_ipv4.c:1675 + ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257 + dst_input include/net/dst.h:464 [inline] + ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397 + NF_HOOK include/linux/netfilter.h:249 [inline] + ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493 + __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476 + __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514 + netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587 + netif_receive_skb+0xae/0x390 net/core/dev.c:4611 + tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372 + tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766 + tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792 + call_write_iter include/linux/fs.h:1770 [inline] + new_sync_write fs/read_write.c:468 [inline] + __vfs_write+0x68a/0x970 fs/read_write.c:481 + vfs_write+0x18f/0x510 fs/read_write.c:543 + SYSC_write fs/read_write.c:588 [inline] + SyS_write+0xef/0x220 fs/read_write.c:580 + entry_SYSCALL_64_fastpath+0x1f/0xbe + +Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") +Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_sock.h | 2 +- + net/dccp/ipv4.c | 13 ++++++++----- + net/ipv4/cipso_ipv4.c | 24 +++++++----------------- + net/ipv4/inet_connection_sock.c | 8 +++----- + net/ipv4/syncookies.c | 2 +- + net/ipv4/tcp_input.c | 2 +- + net/ipv4/tcp_ipv4.c | 21 ++++++++++++--------- + 7 files changed, 33 insertions(+), 39 deletions(-) + +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -95,7 +95,7 @@ struct inet_request_sock { + kmemcheck_bitfield_end(flags); + u32 ir_mark; + union { +- struct ip_options_rcu *opt; ++ struct ip_options_rcu __rcu *ireq_opt; + struct sk_buff *pktopts; + }; + }; +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(c + sk_daddr_set(newsk, ireq->ir_rmt_addr); + sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); + newinet->inet_saddr = ireq->ir_loc_addr; +- newinet->inet_opt = ireq->opt; +- ireq->opt = NULL; ++ RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); + newinet->mc_index = inet_iif(skb); + newinet->mc_ttl = ip_hdr(skb)->ttl; + newinet->inet_id = jiffies; +@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(c + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); +- ++ if (*own_req) ++ ireq->ireq_opt = NULL; ++ else ++ newinet->inet_opt = NULL; + return newsk; + + exit_overflow: +@@ -441,6 +443,7 @@ exit: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + return NULL; + put_and_exit: ++ newinet->inet_opt = NULL; + inet_csk_prepare_forced_close(newsk); + dccp_done(newsk); + goto exit; +@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const s + ireq->ir_rmt_addr); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- ireq->opt); ++ rcu_dereference(ireq->ireq_opt)); + err = net_xmit_eval(err); + } + +@@ -546,7 +549,7 @@ out: + static void dccp_v4_reqsk_destructor(struct request_sock *req) + { + dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); +- kfree(inet_rsk(req)->opt); ++ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); + } + + void dccp_syn_ack_timeout(const struct request_sock *req) +--- a/net/ipv4/cipso_ipv4.c ++++ b/net/ipv4/cipso_ipv4.c +@@ -2012,7 +2012,7 @@ int cipso_v4_req_setattr(struct request_ + buf = NULL; + + req_inet = inet_rsk(req); +- opt = xchg(&req_inet->opt, opt); ++ opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt); + if (opt) + kfree_rcu(opt, rcu); + +@@ -2034,11 +2034,13 @@ req_setattr_failure: + * values on failure. + * + */ +-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) ++static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) + { ++ struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1); + int hdr_delta = 0; +- struct ip_options_rcu *opt = *opt_ptr; + ++ if (!opt || opt->opt.cipso == 0) ++ return 0; + if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { + u8 cipso_len; + u8 cipso_off; +@@ -2100,14 +2102,10 @@ static int cipso_v4_delopt(struct ip_opt + */ + void cipso_v4_sock_delattr(struct sock *sk) + { +- int hdr_delta; +- struct ip_options_rcu *opt; + struct inet_sock *sk_inet; ++ int hdr_delta; + + sk_inet = inet_sk(sk); +- opt = rcu_dereference_protected(sk_inet->inet_opt, 1); +- if (!opt || opt->opt.cipso == 0) +- return; + + hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); + if (sk_inet->is_icsk && hdr_delta > 0) { +@@ -2127,15 +2125,7 @@ void cipso_v4_sock_delattr(struct sock * + */ + void cipso_v4_req_delattr(struct request_sock *req) + { +- struct ip_options_rcu *opt; +- struct inet_request_sock *req_inet; +- +- req_inet = inet_rsk(req); +- opt = req_inet->opt; +- if (!opt || opt->opt.cipso == 0) +- return; +- +- cipso_v4_delopt(&req_inet->opt); ++ cipso_v4_delopt(&inet_rsk(req)->ireq_opt); + } + + /** +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -412,9 +412,10 @@ struct dst_entry *inet_csk_route_req(con + { + const struct inet_request_sock *ireq = inet_rsk(req); + struct net *net = read_pnet(&ireq->ireq_net); +- struct ip_options_rcu *opt = ireq->opt; ++ struct ip_options_rcu *opt; + struct rtable *rt; + ++ opt = rcu_dereference(ireq->ireq_opt); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), +@@ -448,10 +449,9 @@ struct dst_entry *inet_csk_route_child_s + struct flowi4 *fl4; + struct rtable *rt; + ++ opt = rcu_dereference(ireq->ireq_opt); + fl4 = &newinet->cork.fl.u.ip4; + +- rcu_read_lock(); +- opt = rcu_dereference(newinet->inet_opt); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), +@@ -464,13 +464,11 @@ struct dst_entry *inet_csk_route_child_s + goto no_route; + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) + goto route_err; +- rcu_read_unlock(); + return &rt->dst; + + route_err: + ip_rt_put(rt); + no_route: +- rcu_read_unlock(); + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + return NULL; + } +--- a/net/ipv4/syncookies.c ++++ b/net/ipv4/syncookies.c +@@ -357,7 +357,7 @@ struct sock *cookie_v4_check(struct sock + /* We throwed the options of the initial SYN away, so we hope + * the ACK carries the same options again (see RFC1122 4.2.3.8) + */ +- ireq->opt = tcp_v4_save_options(skb); ++ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb)); + + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -6105,7 +6105,7 @@ struct request_sock *inet_reqsk_alloc(co + struct inet_request_sock *ireq = inet_rsk(req); + + kmemcheck_annotate_bitfield(ireq, flags); +- ireq->opt = NULL; ++ ireq->ireq_opt = NULL; + atomic64_set(&ireq->ir_cookie, 0); + ireq->ireq_state = TCP_NEW_SYN_RECV; + write_pnet(&ireq->ireq_net, sock_net(sk_listener)); +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -856,7 +856,7 @@ static int tcp_v4_send_synack(const stru + + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- ireq->opt); ++ rcu_dereference(ireq->ireq_opt)); + err = net_xmit_eval(err); + } + +@@ -868,7 +868,7 @@ static int tcp_v4_send_synack(const stru + */ + static void tcp_v4_reqsk_destructor(struct request_sock *req) + { +- kfree(inet_rsk(req)->opt); ++ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); + } + + +@@ -1197,7 +1197,7 @@ static void tcp_v4_init_req(struct reque + sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); + sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); + ireq->no_srccheck = inet_sk(sk_listener)->transparent; +- ireq->opt = tcp_v4_save_options(skb); ++ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb)); + } + + static struct dst_entry *tcp_v4_route_req(const struct sock *sk, +@@ -1292,10 +1292,9 @@ struct sock *tcp_v4_syn_recv_sock(const + ireq = inet_rsk(req); + sk_daddr_set(newsk, ireq->ir_rmt_addr); + sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); +- newinet->inet_saddr = ireq->ir_loc_addr; +- inet_opt = ireq->opt; +- rcu_assign_pointer(newinet->inet_opt, inet_opt); +- ireq->opt = NULL; ++ newinet->inet_saddr = ireq->ir_loc_addr; ++ inet_opt = rcu_dereference(ireq->ireq_opt); ++ RCU_INIT_POINTER(newinet->inet_opt, inet_opt); + newinet->mc_index = inet_iif(skb); + newinet->mc_ttl = ip_hdr(skb)->ttl; + newinet->rcv_tos = ip_hdr(skb)->tos; +@@ -1343,9 +1342,12 @@ struct sock *tcp_v4_syn_recv_sock(const + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; + *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); +- if (*own_req) ++ if (likely(*own_req)) { + tcp_move_syn(newtp, req); +- ++ ireq->ireq_opt = NULL; ++ } else { ++ newinet->inet_opt = NULL; ++ } + return newsk; + + exit_overflow: +@@ -1356,6 +1358,7 @@ exit: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + return NULL; + put_and_exit: ++ newinet->inet_opt = NULL; + inet_csk_prepare_forced_close(newsk); + tcp_done(newsk); + goto exit; diff --git a/queue-4.4/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch b/queue-4.4/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch new file mode 100644 index 00000000000..9c237c622eb --- /dev/null +++ b/queue-4.4/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch @@ -0,0 +1,49 @@ +From foo@baz Thu Nov 16 15:08:14 CET 2017 +From: Eric Dumazet +Date: Sun, 22 Oct 2017 12:33:57 -0700 +Subject: tcp/dccp: fix lockdep splat in inet_csk_route_req() + +From: Eric Dumazet + + +[ Upstream commit a6ca7abe53633d08eea1c6756cb49c9b2d4c90bf ] + +This patch fixes the following lockdep splat in inet_csk_route_req() + + lockdep_rcu_suspicious + inet_csk_route_req + tcp_v4_send_synack + tcp_rtx_synack + inet_rtx_syn_ack + tcp_fastopen_synack_time + tcp_retransmit_timer + tcp_write_timer_handler + tcp_write_timer + call_timer_fn + +Thread running inet_csk_route_req() owns a reference on the request +socket, so we have the guarantee ireq->ireq_opt wont be changed or +freed. + +lockdep can enforce this invariant for us. + +Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races") +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/inet_connection_sock.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -415,7 +415,8 @@ struct dst_entry *inet_csk_route_req(con + struct ip_options_rcu *opt; + struct rtable *rt; + +- opt = rcu_dereference(ireq->ireq_opt); ++ opt = rcu_dereference_protected(ireq->ireq_opt, ++ atomic_read(&req->rsk_refcnt) > 0); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), diff --git a/queue-4.4/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch b/queue-4.4/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch new file mode 100644 index 00000000000..74e99ee7977 --- /dev/null +++ b/queue-4.4/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch @@ -0,0 +1,113 @@ +From foo@baz Thu Nov 16 15:08:14 CET 2017 +From: Eric Dumazet +Date: Tue, 24 Oct 2017 08:20:31 -0700 +Subject: tcp/dccp: fix other lockdep splats accessing ireq_opt + +From: Eric Dumazet + + +[ Upstream commit 06f877d613be3621604c2520ec0351d9fbdca15f ] + +In my first attempt to fix the lockdep splat, I forgot we could +enter inet_csk_route_req() with a freshly allocated request socket, +for which refcount has not yet been elevated, due to complex +SLAB_TYPESAFE_BY_RCU rules. + +We either are in rcu_read_lock() section _or_ we own a refcount on the +request. + +Correct RCU verb to use here is rcu_dereference_check(), although it is +not possible to prove we actually own a reference on a shared +refcount :/ + +In v2, I added ireq_opt_deref() helper and use in three places, to fix other +possible splats. + +[ 49.844590] lockdep_rcu_suspicious+0xea/0xf3 +[ 49.846487] inet_csk_route_req+0x53/0x14d +[ 49.848334] tcp_v4_route_req+0xe/0x10 +[ 49.850174] tcp_conn_request+0x31c/0x6a0 +[ 49.851992] ? __lock_acquire+0x614/0x822 +[ 49.854015] tcp_v4_conn_request+0x5a/0x79 +[ 49.855957] ? tcp_v4_conn_request+0x5a/0x79 +[ 49.858052] tcp_rcv_state_process+0x98/0xdcc +[ 49.859990] ? sk_filter_trim_cap+0x2f6/0x307 +[ 49.862085] tcp_v4_do_rcv+0xfc/0x145 +[ 49.864055] ? tcp_v4_do_rcv+0xfc/0x145 +[ 49.866173] tcp_v4_rcv+0x5ab/0xaf9 +[ 49.868029] ip_local_deliver_finish+0x1af/0x2e7 +[ 49.870064] ip_local_deliver+0x1b2/0x1c5 +[ 49.871775] ? inet_del_offload+0x45/0x45 +[ 49.873916] ip_rcv_finish+0x3f7/0x471 +[ 49.875476] ip_rcv+0x3f1/0x42f +[ 49.876991] ? ip_local_deliver_finish+0x2e7/0x2e7 +[ 49.878791] __netif_receive_skb_core+0x6d3/0x950 +[ 49.880701] ? process_backlog+0x7e/0x216 +[ 49.882589] __netif_receive_skb+0x1d/0x5e +[ 49.884122] process_backlog+0x10c/0x216 +[ 49.885812] net_rx_action+0x147/0x3df + +Fixes: a6ca7abe53633 ("tcp/dccp: fix lockdep splat in inet_csk_route_req()") +Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races") +Signed-off-by: Eric Dumazet +Reported-by: kernel test robot +Reported-by: Maciej Żenczykowski +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/inet_sock.h | 6 ++++++ + net/dccp/ipv4.c | 2 +- + net/ipv4/inet_connection_sock.c | 4 ++-- + net/ipv4/tcp_ipv4.c | 2 +- + 4 files changed, 10 insertions(+), 4 deletions(-) + +--- a/include/net/inet_sock.h ++++ b/include/net/inet_sock.h +@@ -113,6 +113,12 @@ static inline u32 inet_request_mark(cons + return sk->sk_mark; + } + ++static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq) ++{ ++ return rcu_dereference_check(ireq->ireq_opt, ++ atomic_read(&ireq->req.rsk_refcnt) > 0); ++} ++ + struct inet_cork { + unsigned int flags; + __be32 addr; +--- a/net/dccp/ipv4.c ++++ b/net/dccp/ipv4.c +@@ -495,7 +495,7 @@ static int dccp_v4_send_response(const s + ireq->ir_rmt_addr); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- rcu_dereference(ireq->ireq_opt)); ++ ireq_opt_deref(ireq)); + err = net_xmit_eval(err); + } + +--- a/net/ipv4/inet_connection_sock.c ++++ b/net/ipv4/inet_connection_sock.c +@@ -415,8 +415,8 @@ struct dst_entry *inet_csk_route_req(con + struct ip_options_rcu *opt; + struct rtable *rt; + +- opt = rcu_dereference_protected(ireq->ireq_opt, +- atomic_read(&req->rsk_refcnt) > 0); ++ opt = ireq_opt_deref(ireq); ++ + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, + RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, + sk->sk_protocol, inet_sk_flowi_flags(sk), +--- a/net/ipv4/tcp_ipv4.c ++++ b/net/ipv4/tcp_ipv4.c +@@ -856,7 +856,7 @@ static int tcp_v4_send_synack(const stru + + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, +- rcu_dereference(ireq->ireq_opt)); ++ ireq_opt_deref(ireq)); + err = net_xmit_eval(err); + } +