]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.4-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 16 Nov 2017 14:46:21 +0000 (15:46 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 16 Nov 2017 14:46:21 +0000 (15:46 +0100)
added patches:
ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch
tcp-dccp-fix-ireq-opt-races.patch
tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch
tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch

queue-4.4/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch [new file with mode: 0644]
queue-4.4/series
queue-4.4/tcp-dccp-fix-ireq-opt-races.patch [new file with mode: 0644]
queue-4.4/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch [new file with mode: 0644]
queue-4.4/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch [new file with mode: 0644]

diff --git a/queue-4.4/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch b/queue-4.4/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch
new file mode 100644 (file)
index 0000000..5bf71dc
--- /dev/null
@@ -0,0 +1,127 @@
+From foo@baz Thu Nov 16 15:08:14 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 26 Oct 2017 19:19:56 +0800
+Subject: ipip: only increase err_count for some certain type icmp in ipip_err
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit f3594f0a7ea36661d7fd942facd7f31a64245f1a ]
+
+t->err_count is used to count the link failure on tunnel and an err
+will be reported to user socket in tx path if t->err_count is not 0.
+udp socket could even return EHOSTUNREACH to users.
+
+Since commit fd58156e456d ("IPIP: Use ip-tunneling code.") removed
+the 'switch check' for icmp type in ipip_err(), err_count would be
+increased by the icmp packet with ICMP_EXC_FRAGTIME code. an link
+failure would be reported out due to this.
+
+In Jianlin's case, when receiving ICMP_EXC_FRAGTIME a icmp packet,
+udp netperf failed with the err:
+  send_data: data send error: No route to host (errno 113)
+
+We expect this error reported from tunnel to socket when receiving
+some certain type icmp, but not ICMP_EXC_FRAGTIME, ICMP_SR_FAILED
+or ICMP_PARAMETERPROB ones.
+
+This patch is to bring 'switch check' for icmp type back to ipip_err
+so that it only reports link failure for the right type icmp, just as
+in ipgre_err() and ipip6_err().
+
+Fixes: fd58156e456d ("IPIP: Use ip-tunneling code.")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ipip.c |   58 ++++++++++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 42 insertions(+), 16 deletions(-)
+
+--- a/net/ipv4/ipip.c
++++ b/net/ipv4/ipip.c
+@@ -129,42 +129,68 @@ static struct rtnl_link_ops ipip_link_op
+ static int ipip_err(struct sk_buff *skb, u32 info)
+ {
+-/* All the routers (except for Linux) return only
+-   8 bytes of packet payload. It means, that precise relaying of
+-   ICMP in the real Internet is absolutely infeasible.
+- */
++      /* All the routers (except for Linux) return only
++         8 bytes of packet payload. It means, that precise relaying of
++         ICMP in the real Internet is absolutely infeasible.
++       */
+       struct net *net = dev_net(skb->dev);
+       struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+       const struct iphdr *iph = (const struct iphdr *)skb->data;
+-      struct ip_tunnel *t;
+-      int err;
+       const int type = icmp_hdr(skb)->type;
+       const int code = icmp_hdr(skb)->code;
++      struct ip_tunnel *t;
++      int err = 0;
++
++      switch (type) {
++      case ICMP_DEST_UNREACH:
++              switch (code) {
++              case ICMP_SR_FAILED:
++                      /* Impossible event. */
++                      goto out;
++              default:
++                      /* All others are translated to HOST_UNREACH.
++                       * rfc2003 contains "deep thoughts" about NET_UNREACH,
++                       * I believe they are just ether pollution. --ANK
++                       */
++                      break;
++              }
++              break;
++
++      case ICMP_TIME_EXCEEDED:
++              if (code != ICMP_EXC_TTL)
++                      goto out;
++              break;
++
++      case ICMP_REDIRECT:
++              break;
++
++      default:
++              goto out;
++      }
+-      err = -ENOENT;
+       t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+                            iph->daddr, iph->saddr, 0);
+-      if (!t)
++      if (!t) {
++              err = -ENOENT;
+               goto out;
++      }
+       if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+-              ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+-                               t->parms.link, 0, IPPROTO_IPIP, 0);
+-              err = 0;
++              ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
++                               iph->protocol, 0);
+               goto out;
+       }
+       if (type == ICMP_REDIRECT) {
+-              ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
+-                            IPPROTO_IPIP, 0);
+-              err = 0;
++              ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
+               goto out;
+       }
+-      if (t->parms.iph.daddr == 0)
++      if (t->parms.iph.daddr == 0) {
++              err = -ENOENT;
+               goto out;
++      }
+-      err = 0;
+       if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+               goto out;
index 27d49fda24053691101f2f08f03467a460440243..88a8dc13b6f5a82d2e8338ca1b8ec3277076dce4 100644 (file)
@@ -16,3 +16,7 @@ ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch
 tun-allow-positive-return-values-on-dev_get_valid_name-call.patch
 sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch
 ppp-fix-race-in-ppp-device-destruction.patch
+ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch
+tcp-dccp-fix-ireq-opt-races.patch
+tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch
+tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch
diff --git a/queue-4.4/tcp-dccp-fix-ireq-opt-races.patch b/queue-4.4/tcp-dccp-fix-ireq-opt-races.patch
new file mode 100644 (file)
index 0000000..b0129f2
--- /dev/null
@@ -0,0 +1,408 @@
+From foo@baz Thu Nov 16 15:08:14 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Oct 2017 09:04:13 -0700
+Subject: tcp/dccp: fix ireq->opt races
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit c92e8c02fe664155ac4234516e32544bec0f113d ]
+
+syzkaller found another bug in DCCP/TCP stacks [1]
+
+For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix
+ireq->pktopts race"), we need to make sure we do not access
+ireq->opt unless we own the request sock.
+
+Note the opt field is renamed to ireq_opt to ease grep games.
+
+[1]
+BUG: KASAN: use-after-free in ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474
+Read of size 1 at addr ffff8801c951039c by task syz-executor5/3295
+
+CPU: 1 PID: 3295 Comm: syz-executor5 Not tainted 4.14.0-rc4+ #80
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:16 [inline]
+ dump_stack+0x194/0x257 lib/dump_stack.c:52
+ print_address_description+0x73/0x250 mm/kasan/report.c:252
+ kasan_report_error mm/kasan/report.c:351 [inline]
+ kasan_report+0x25b/0x340 mm/kasan/report.c:409
+ __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:427
+ ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474
+ tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1135
+ tcp_send_ack.part.37+0x3bb/0x650 net/ipv4/tcp_output.c:3587
+ tcp_send_ack+0x49/0x60 net/ipv4/tcp_output.c:3557
+ __tcp_ack_snd_check+0x2c6/0x4b0 net/ipv4/tcp_input.c:5072
+ tcp_ack_snd_check net/ipv4/tcp_input.c:5085 [inline]
+ tcp_rcv_state_process+0x2eff/0x4850 net/ipv4/tcp_input.c:6071
+ tcp_child_process+0x342/0x990 net/ipv4/tcp_minisocks.c:816
+ tcp_v4_rcv+0x1827/0x2f80 net/ipv4/tcp_ipv4.c:1682
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:464 [inline]
+ ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
+ __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
+ netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
+ netif_receive_skb+0xae/0x390 net/core/dev.c:4611
+ tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
+ tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
+ tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
+ call_write_iter include/linux/fs.h:1770 [inline]
+ new_sync_write fs/read_write.c:468 [inline]
+ __vfs_write+0x68a/0x970 fs/read_write.c:481
+ vfs_write+0x18f/0x510 fs/read_write.c:543
+ SYSC_write fs/read_write.c:588 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:580
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+RIP: 0033:0x40c341
+RSP: 002b:00007f469523ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 000000000040c341
+RDX: 0000000000000037 RSI: 0000000020004000 RDI: 0000000000000015
+RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000
+R10: 00000000000f4240 R11: 0000000000000293 R12: 00000000004b7fd1
+R13: 00000000ffffffff R14: 0000000020000000 R15: 0000000000025000
+
+Allocated by task 3295:
+ save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+ set_track mm/kasan/kasan.c:459 [inline]
+ kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
+ __do_kmalloc mm/slab.c:3725 [inline]
+ __kmalloc+0x162/0x760 mm/slab.c:3734
+ kmalloc include/linux/slab.h:498 [inline]
+ tcp_v4_save_options include/net/tcp.h:1962 [inline]
+ tcp_v4_init_req+0x2d3/0x3e0 net/ipv4/tcp_ipv4.c:1271
+ tcp_conn_request+0xf6d/0x3410 net/ipv4/tcp_input.c:6283
+ tcp_v4_conn_request+0x157/0x210 net/ipv4/tcp_ipv4.c:1313
+ tcp_rcv_state_process+0x8ea/0x4850 net/ipv4/tcp_input.c:5857
+ tcp_v4_do_rcv+0x55c/0x7d0 net/ipv4/tcp_ipv4.c:1482
+ tcp_v4_rcv+0x2d10/0x2f80 net/ipv4/tcp_ipv4.c:1711
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:464 [inline]
+ ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
+ __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
+ netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
+ netif_receive_skb+0xae/0x390 net/core/dev.c:4611
+ tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
+ tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
+ tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
+ call_write_iter include/linux/fs.h:1770 [inline]
+ new_sync_write fs/read_write.c:468 [inline]
+ __vfs_write+0x68a/0x970 fs/read_write.c:481
+ vfs_write+0x18f/0x510 fs/read_write.c:543
+ SYSC_write fs/read_write.c:588 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:580
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+Freed by task 3306:
+ save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+ set_track mm/kasan/kasan.c:459 [inline]
+ kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
+ __cache_free mm/slab.c:3503 [inline]
+ kfree+0xca/0x250 mm/slab.c:3820
+ inet_sock_destruct+0x59d/0x950 net/ipv4/af_inet.c:157
+ __sk_destruct+0xfd/0x910 net/core/sock.c:1560
+ sk_destruct+0x47/0x80 net/core/sock.c:1595
+ __sk_free+0x57/0x230 net/core/sock.c:1603
+ sk_free+0x2a/0x40 net/core/sock.c:1614
+ sock_put include/net/sock.h:1652 [inline]
+ inet_csk_complete_hashdance+0xd5/0xf0 net/ipv4/inet_connection_sock.c:959
+ tcp_check_req+0xf4d/0x1620 net/ipv4/tcp_minisocks.c:765
+ tcp_v4_rcv+0x17f6/0x2f80 net/ipv4/tcp_ipv4.c:1675
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:464 [inline]
+ ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
+ __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
+ netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
+ netif_receive_skb+0xae/0x390 net/core/dev.c:4611
+ tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
+ tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
+ tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
+ call_write_iter include/linux/fs.h:1770 [inline]
+ new_sync_write fs/read_write.c:468 [inline]
+ __vfs_write+0x68a/0x970 fs/read_write.c:481
+ vfs_write+0x18f/0x510 fs/read_write.c:543
+ SYSC_write fs/read_write.c:588 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:580
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
+Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_sock.h         |    2 +-
+ net/dccp/ipv4.c                 |   13 ++++++++-----
+ net/ipv4/cipso_ipv4.c           |   24 +++++++-----------------
+ net/ipv4/inet_connection_sock.c |    8 +++-----
+ net/ipv4/syncookies.c           |    2 +-
+ net/ipv4/tcp_input.c            |    2 +-
+ net/ipv4/tcp_ipv4.c             |   21 ++++++++++++---------
+ 7 files changed, 33 insertions(+), 39 deletions(-)
+
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -95,7 +95,7 @@ struct inet_request_sock {
+       kmemcheck_bitfield_end(flags);
+       u32                     ir_mark;
+       union {
+-              struct ip_options_rcu   *opt;
++              struct ip_options_rcu __rcu     *ireq_opt;
+               struct sk_buff          *pktopts;
+       };
+ };
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(c
+       sk_daddr_set(newsk, ireq->ir_rmt_addr);
+       sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
+       newinet->inet_saddr     = ireq->ir_loc_addr;
+-      newinet->inet_opt       = ireq->opt;
+-      ireq->opt          = NULL;
++      RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
+       newinet->mc_index  = inet_iif(skb);
+       newinet->mc_ttl    = ip_hdr(skb)->ttl;
+       newinet->inet_id   = jiffies;
+@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(c
+       if (__inet_inherit_port(sk, newsk) < 0)
+               goto put_and_exit;
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+-
++      if (*own_req)
++              ireq->ireq_opt = NULL;
++      else
++              newinet->inet_opt = NULL;
+       return newsk;
+ exit_overflow:
+@@ -441,6 +443,7 @@ exit:
+       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+       return NULL;
+ put_and_exit:
++      newinet->inet_opt = NULL;
+       inet_csk_prepare_forced_close(newsk);
+       dccp_done(newsk);
+       goto exit;
+@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const s
+                                                             ireq->ir_rmt_addr);
+               err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+                                           ireq->ir_rmt_addr,
+-                                          ireq->opt);
++                                          rcu_dereference(ireq->ireq_opt));
+               err = net_xmit_eval(err);
+       }
+@@ -546,7 +549,7 @@ out:
+ static void dccp_v4_reqsk_destructor(struct request_sock *req)
+ {
+       dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
+-      kfree(inet_rsk(req)->opt);
++      kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
+ }
+ void dccp_syn_ack_timeout(const struct request_sock *req)
+--- a/net/ipv4/cipso_ipv4.c
++++ b/net/ipv4/cipso_ipv4.c
+@@ -2012,7 +2012,7 @@ int cipso_v4_req_setattr(struct request_
+       buf = NULL;
+       req_inet = inet_rsk(req);
+-      opt = xchg(&req_inet->opt, opt);
++      opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
+       if (opt)
+               kfree_rcu(opt, rcu);
+@@ -2034,11 +2034,13 @@ req_setattr_failure:
+  * values on failure.
+  *
+  */
+-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
++static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
+ {
++      struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
+       int hdr_delta = 0;
+-      struct ip_options_rcu *opt = *opt_ptr;
++      if (!opt || opt->opt.cipso == 0)
++              return 0;
+       if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
+               u8 cipso_len;
+               u8 cipso_off;
+@@ -2100,14 +2102,10 @@ static int cipso_v4_delopt(struct ip_opt
+  */
+ void cipso_v4_sock_delattr(struct sock *sk)
+ {
+-      int hdr_delta;
+-      struct ip_options_rcu *opt;
+       struct inet_sock *sk_inet;
++      int hdr_delta;
+       sk_inet = inet_sk(sk);
+-      opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
+-      if (!opt || opt->opt.cipso == 0)
+-              return;
+       hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
+       if (sk_inet->is_icsk && hdr_delta > 0) {
+@@ -2127,15 +2125,7 @@ void cipso_v4_sock_delattr(struct sock *
+  */
+ void cipso_v4_req_delattr(struct request_sock *req)
+ {
+-      struct ip_options_rcu *opt;
+-      struct inet_request_sock *req_inet;
+-
+-      req_inet = inet_rsk(req);
+-      opt = req_inet->opt;
+-      if (!opt || opt->opt.cipso == 0)
+-              return;
+-
+-      cipso_v4_delopt(&req_inet->opt);
++      cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
+ }
+ /**
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -412,9 +412,10 @@ struct dst_entry *inet_csk_route_req(con
+ {
+       const struct inet_request_sock *ireq = inet_rsk(req);
+       struct net *net = read_pnet(&ireq->ireq_net);
+-      struct ip_options_rcu *opt = ireq->opt;
++      struct ip_options_rcu *opt;
+       struct rtable *rt;
++      opt = rcu_dereference(ireq->ireq_opt);
+       flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+                          RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+                          sk->sk_protocol, inet_sk_flowi_flags(sk),
+@@ -448,10 +449,9 @@ struct dst_entry *inet_csk_route_child_s
+       struct flowi4 *fl4;
+       struct rtable *rt;
++      opt = rcu_dereference(ireq->ireq_opt);
+       fl4 = &newinet->cork.fl.u.ip4;
+-      rcu_read_lock();
+-      opt = rcu_dereference(newinet->inet_opt);
+       flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+                          RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+                          sk->sk_protocol, inet_sk_flowi_flags(sk),
+@@ -464,13 +464,11 @@ struct dst_entry *inet_csk_route_child_s
+               goto no_route;
+       if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
+               goto route_err;
+-      rcu_read_unlock();
+       return &rt->dst;
+ route_err:
+       ip_rt_put(rt);
+ no_route:
+-      rcu_read_unlock();
+       IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+       return NULL;
+ }
+--- a/net/ipv4/syncookies.c
++++ b/net/ipv4/syncookies.c
+@@ -357,7 +357,7 @@ struct sock *cookie_v4_check(struct sock
+       /* We throwed the options of the initial SYN away, so we hope
+        * the ACK carries the same options again (see RFC1122 4.2.3.8)
+        */
+-      ireq->opt = tcp_v4_save_options(skb);
++      RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
+       if (security_inet_conn_request(sk, skb, req)) {
+               reqsk_free(req);
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -6105,7 +6105,7 @@ struct request_sock *inet_reqsk_alloc(co
+               struct inet_request_sock *ireq = inet_rsk(req);
+               kmemcheck_annotate_bitfield(ireq, flags);
+-              ireq->opt = NULL;
++              ireq->ireq_opt = NULL;
+               atomic64_set(&ireq->ir_cookie, 0);
+               ireq->ireq_state = TCP_NEW_SYN_RECV;
+               write_pnet(&ireq->ireq_net, sock_net(sk_listener));
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -856,7 +856,7 @@ static int tcp_v4_send_synack(const stru
+               err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+                                           ireq->ir_rmt_addr,
+-                                          ireq->opt);
++                                          rcu_dereference(ireq->ireq_opt));
+               err = net_xmit_eval(err);
+       }
+@@ -868,7 +868,7 @@ static int tcp_v4_send_synack(const stru
+  */
+ static void tcp_v4_reqsk_destructor(struct request_sock *req)
+ {
+-      kfree(inet_rsk(req)->opt);
++      kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
+ }
+@@ -1197,7 +1197,7 @@ static void tcp_v4_init_req(struct reque
+       sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
+       sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
+       ireq->no_srccheck = inet_sk(sk_listener)->transparent;
+-      ireq->opt = tcp_v4_save_options(skb);
++      RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
+ }
+ static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
+@@ -1292,10 +1292,9 @@ struct sock *tcp_v4_syn_recv_sock(const
+       ireq                  = inet_rsk(req);
+       sk_daddr_set(newsk, ireq->ir_rmt_addr);
+       sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
+-      newinet->inet_saddr           = ireq->ir_loc_addr;
+-      inet_opt              = ireq->opt;
+-      rcu_assign_pointer(newinet->inet_opt, inet_opt);
+-      ireq->opt             = NULL;
++      newinet->inet_saddr   = ireq->ir_loc_addr;
++      inet_opt              = rcu_dereference(ireq->ireq_opt);
++      RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
+       newinet->mc_index     = inet_iif(skb);
+       newinet->mc_ttl       = ip_hdr(skb)->ttl;
+       newinet->rcv_tos      = ip_hdr(skb)->tos;
+@@ -1343,9 +1342,12 @@ struct sock *tcp_v4_syn_recv_sock(const
+       if (__inet_inherit_port(sk, newsk) < 0)
+               goto put_and_exit;
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+-      if (*own_req)
++      if (likely(*own_req)) {
+               tcp_move_syn(newtp, req);
+-
++              ireq->ireq_opt = NULL;
++      } else {
++              newinet->inet_opt = NULL;
++      }
+       return newsk;
+ exit_overflow:
+@@ -1356,6 +1358,7 @@ exit:
+       NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
+       return NULL;
+ put_and_exit:
++      newinet->inet_opt = NULL;
+       inet_csk_prepare_forced_close(newsk);
+       tcp_done(newsk);
+       goto exit;
diff --git a/queue-4.4/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch b/queue-4.4/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch
new file mode 100644 (file)
index 0000000..9c237c6
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Thu Nov 16 15:08:14 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 22 Oct 2017 12:33:57 -0700
+Subject: tcp/dccp: fix lockdep splat in inet_csk_route_req()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit a6ca7abe53633d08eea1c6756cb49c9b2d4c90bf ]
+
+This patch fixes the following lockdep splat in inet_csk_route_req()
+
+  lockdep_rcu_suspicious
+  inet_csk_route_req
+  tcp_v4_send_synack
+  tcp_rtx_synack
+  inet_rtx_syn_ack
+  tcp_fastopen_synack_time
+  tcp_retransmit_timer
+  tcp_write_timer_handler
+  tcp_write_timer
+  call_timer_fn
+
+Thread running inet_csk_route_req() owns a reference on the request
+socket, so we have the guarantee ireq->ireq_opt wont be changed or
+freed.
+
+lockdep can enforce this invariant for us.
+
+Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_connection_sock.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -415,7 +415,8 @@ struct dst_entry *inet_csk_route_req(con
+       struct ip_options_rcu *opt;
+       struct rtable *rt;
+-      opt = rcu_dereference(ireq->ireq_opt);
++      opt = rcu_dereference_protected(ireq->ireq_opt,
++                                      atomic_read(&req->rsk_refcnt) > 0);
+       flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+                          RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+                          sk->sk_protocol, inet_sk_flowi_flags(sk),
diff --git a/queue-4.4/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch b/queue-4.4/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch
new file mode 100644 (file)
index 0000000..74e99ee
--- /dev/null
@@ -0,0 +1,113 @@
+From foo@baz Thu Nov 16 15:08:14 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 24 Oct 2017 08:20:31 -0700
+Subject: tcp/dccp: fix other lockdep splats accessing ireq_opt
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 06f877d613be3621604c2520ec0351d9fbdca15f ]
+
+In my first attempt to fix the lockdep splat, I forgot we could
+enter inet_csk_route_req() with a freshly allocated request socket,
+for which refcount has not yet been elevated, due to complex
+SLAB_TYPESAFE_BY_RCU rules.
+
+We either are in rcu_read_lock() section _or_ we own a refcount on the
+request.
+
+Correct RCU verb to use here is rcu_dereference_check(), although it is
+not possible to prove we actually own a reference on a shared
+refcount :/
+
+In v2, I added ireq_opt_deref() helper and use in three places, to fix other
+possible splats.
+
+[   49.844590]  lockdep_rcu_suspicious+0xea/0xf3
+[   49.846487]  inet_csk_route_req+0x53/0x14d
+[   49.848334]  tcp_v4_route_req+0xe/0x10
+[   49.850174]  tcp_conn_request+0x31c/0x6a0
+[   49.851992]  ? __lock_acquire+0x614/0x822
+[   49.854015]  tcp_v4_conn_request+0x5a/0x79
+[   49.855957]  ? tcp_v4_conn_request+0x5a/0x79
+[   49.858052]  tcp_rcv_state_process+0x98/0xdcc
+[   49.859990]  ? sk_filter_trim_cap+0x2f6/0x307
+[   49.862085]  tcp_v4_do_rcv+0xfc/0x145
+[   49.864055]  ? tcp_v4_do_rcv+0xfc/0x145
+[   49.866173]  tcp_v4_rcv+0x5ab/0xaf9
+[   49.868029]  ip_local_deliver_finish+0x1af/0x2e7
+[   49.870064]  ip_local_deliver+0x1b2/0x1c5
+[   49.871775]  ? inet_del_offload+0x45/0x45
+[   49.873916]  ip_rcv_finish+0x3f7/0x471
+[   49.875476]  ip_rcv+0x3f1/0x42f
+[   49.876991]  ? ip_local_deliver_finish+0x2e7/0x2e7
+[   49.878791]  __netif_receive_skb_core+0x6d3/0x950
+[   49.880701]  ? process_backlog+0x7e/0x216
+[   49.882589]  __netif_receive_skb+0x1d/0x5e
+[   49.884122]  process_backlog+0x10c/0x216
+[   49.885812]  net_rx_action+0x147/0x3df
+
+Fixes: a6ca7abe53633 ("tcp/dccp: fix lockdep splat in inet_csk_route_req()")
+Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: kernel test robot <fengguang.wu@intel.com>
+Reported-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_sock.h         |    6 ++++++
+ net/dccp/ipv4.c                 |    2 +-
+ net/ipv4/inet_connection_sock.c |    4 ++--
+ net/ipv4/tcp_ipv4.c             |    2 +-
+ 4 files changed, 10 insertions(+), 4 deletions(-)
+
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -113,6 +113,12 @@ static inline u32 inet_request_mark(cons
+       return sk->sk_mark;
+ }
++static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq)
++{
++      return rcu_dereference_check(ireq->ireq_opt,
++                                   atomic_read(&ireq->req.rsk_refcnt) > 0);
++}
++
+ struct inet_cork {
+       unsigned int            flags;
+       __be32                  addr;
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -495,7 +495,7 @@ static int dccp_v4_send_response(const s
+                                                             ireq->ir_rmt_addr);
+               err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+                                           ireq->ir_rmt_addr,
+-                                          rcu_dereference(ireq->ireq_opt));
++                                          ireq_opt_deref(ireq));
+               err = net_xmit_eval(err);
+       }
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -415,8 +415,8 @@ struct dst_entry *inet_csk_route_req(con
+       struct ip_options_rcu *opt;
+       struct rtable *rt;
+-      opt = rcu_dereference_protected(ireq->ireq_opt,
+-                                      atomic_read(&req->rsk_refcnt) > 0);
++      opt = ireq_opt_deref(ireq);
++
+       flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+                          RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+                          sk->sk_protocol, inet_sk_flowi_flags(sk),
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -856,7 +856,7 @@ static int tcp_v4_send_synack(const stru
+               err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+                                           ireq->ir_rmt_addr,
+-                                          rcu_dereference(ireq->ireq_opt));
++                                          ireq_opt_deref(ireq));
+               err = net_xmit_eval(err);
+       }