From: Greg Kroah-Hartman Date: Tue, 1 Mar 2016 05:24:39 +0000 (-0800) Subject: 3.10-stable patches X-Git-Tag: v3.10.99~58 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a521d4ce8a452b534ab90e0e12b6e082a63a06fb;p=thirdparty%2Fkernel%2Fstable-queue.git 3.10-stable patches added patches: netfilter-nf_conntrack-fix-rcu-race-in-nf_conntrack_find_get.patch --- diff --git a/queue-3.10/netfilter-nf_conntrack-fix-rcu-race-in-nf_conntrack_find_get.patch b/queue-3.10/netfilter-nf_conntrack-fix-rcu-race-in-nf_conntrack_find_get.patch new file mode 100644 index 00000000000..0864e56d589 --- /dev/null +++ b/queue-3.10/netfilter-nf_conntrack-fix-rcu-race-in-nf_conntrack_find_get.patch @@ -0,0 +1,148 @@ +From c6825c0976fa7893692e0e43b09740b419b23c09 Mon Sep 17 00:00:00 2001 +From: Andrey Vagin +Date: Wed, 29 Jan 2014 19:34:14 +0100 +Subject: netfilter: nf_conntrack: fix RCU race in nf_conntrack_find_get + +From: Andrey Vagin + +commit c6825c0976fa7893692e0e43b09740b419b23c09 upstream. + +Lets look at destroy_conntrack: + +hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); +... +nf_conntrack_free(ct) + kmem_cache_free(net->ct.nf_conntrack_cachep, ct); + +net->ct.nf_conntrack_cachep is created with SLAB_DESTROY_BY_RCU. + +The hash is protected by rcu, so readers look up conntracks without +locks. +A conntrack is removed from the hash, but in this moment a few readers +still can use the conntrack. Then this conntrack is released and another +thread creates conntrack with the same address and the equal tuple. +After this a reader starts to validate the conntrack: +* It's not dying, because a new conntrack was created +* nf_ct_tuple_equal() returns true. + +But this conntrack is not initialized yet, so it can not be used by two +threads concurrently. In this case BUG_ON may be triggered from +nf_nat_setup_info(). + +Florian Westphal suggested to check the confirm bit too. I think it's +right. + +task 1 task 2 task 3 + nf_conntrack_find_get + ____nf_conntrack_find +destroy_conntrack + hlist_nulls_del_rcu + nf_conntrack_free + kmem_cache_free + __nf_conntrack_alloc + kmem_cache_alloc + memset(&ct->tuplehash[IP_CT_DIR_MAX], + if (nf_ct_is_dying(ct)) + if (!nf_ct_tuple_equal() + +I'm not sure, that I have ever seen this race condition in a real life. +Currently we are investigating a bug, which is reproduced on a few nodes. +In our case one conntrack is initialized from a few tasks concurrently, +we don't have any other explanation for this. + +<2>[46267.083061] kernel BUG at net/ipv4/netfilter/nf_nat_core.c:322! +... +<4>[46267.083951] RIP: 0010:[] [] nf_nat_setup_info+0x564/0x590 [nf_nat] +... +<4>[46267.085549] Call Trace: +<4>[46267.085622] [] alloc_null_binding+0x5b/0xa0 [iptable_nat] +<4>[46267.085697] [] nf_nat_rule_find+0x5c/0x80 [iptable_nat] +<4>[46267.085770] [] nf_nat_fn+0x111/0x260 [iptable_nat] +<4>[46267.085843] [] nf_nat_out+0x48/0xd0 [iptable_nat] +<4>[46267.085919] [] nf_iterate+0x69/0xb0 +<4>[46267.085991] [] ? ip_finish_output+0x0/0x2f0 +<4>[46267.086063] [] nf_hook_slow+0x74/0x110 +<4>[46267.086133] [] ? ip_finish_output+0x0/0x2f0 +<4>[46267.086207] [] ? dst_output+0x0/0x20 +<4>[46267.086277] [] ip_output+0xa4/0xc0 +<4>[46267.086346] [] raw_sendmsg+0x8b4/0x910 +<4>[46267.086419] [] inet_sendmsg+0x4a/0xb0 +<4>[46267.086491] [] ? sock_update_classid+0x3a/0x50 +<4>[46267.086562] [] sock_sendmsg+0x117/0x140 +<4>[46267.086638] [] ? _spin_unlock_bh+0x1b/0x20 +<4>[46267.086712] [] ? autoremove_wake_function+0x0/0x40 +<4>[46267.086785] [] ? do_ip_setsockopt+0x90/0xd80 +<4>[46267.086858] [] ? call_function_interrupt+0xe/0x20 +<4>[46267.086936] [] ? ub_slab_ptr+0x20/0x90 +<4>[46267.087006] [] ? ub_slab_ptr+0x20/0x90 +<4>[46267.087081] [] ? kmem_cache_alloc+0xd8/0x1e0 +<4>[46267.087151] [] sys_sendto+0x139/0x190 +<4>[46267.087229] [] ? sock_setsockopt+0x16d/0x6f0 +<4>[46267.087303] [] ? audit_syscall_entry+0x1d7/0x200 +<4>[46267.087378] [] ? __audit_syscall_exit+0x265/0x290 +<4>[46267.087454] [] ? compat_sys_setsockopt+0x75/0x210 +<4>[46267.087531] [] compat_sys_socketcall+0x13f/0x210 +<4>[46267.087607] [] ia32_sysret+0x0/0x5 +<4>[46267.087676] Code: 91 20 e2 01 75 29 48 89 de 4c 89 f7 e8 56 fa ff ff 85 c0 0f 84 68 fc ff ff 0f b6 4d c6 41 8b 45 00 e9 4d fb ff ff e8 7c 19 e9 e0 <0f> 0b eb fe f6 05 17 91 20 e2 80 74 ce 80 3d 5f 2e 00 00 00 74 +<1>[46267.088023] RIP [] nf_nat_setup_info+0x564/0x590 + +Cc: Eric Dumazet +Cc: Florian Westphal +Cc: Pablo Neira Ayuso +Cc: Patrick McHardy +Cc: Jozsef Kadlecsik +Cc: "David S. Miller" +Cc: Cyrill Gorcunov +Signed-off-by: Andrey Vagin +Acked-by: Eric Dumazet +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman + +--- + net/netfilter/nf_conntrack_core.c | 21 +++++++++++++++++---- + 1 file changed, 17 insertions(+), 4 deletions(-) + +--- a/net/netfilter/nf_conntrack_core.c ++++ b/net/netfilter/nf_conntrack_core.c +@@ -311,6 +311,21 @@ static void death_by_timeout(unsigned lo + nf_ct_put(ct); + } + ++static inline bool ++nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, ++ const struct nf_conntrack_tuple *tuple, ++ u16 zone) ++{ ++ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); ++ ++ /* A conntrack can be recreated with the equal tuple, ++ * so we need to check that the conntrack is confirmed ++ */ ++ return nf_ct_tuple_equal(tuple, &h->tuple) && ++ nf_ct_zone(ct) == zone && ++ nf_ct_is_confirmed(ct); ++} ++ + /* + * Warning : + * - Caller must take a reference on returned object +@@ -332,8 +347,7 @@ ____nf_conntrack_find(struct net *net, u + local_bh_disable(); + begin: + hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) { +- if (nf_ct_tuple_equal(tuple, &h->tuple) && +- nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { ++ if (nf_ct_key_equal(h, tuple, zone)) { + NF_CT_STAT_INC(net, found); + local_bh_enable(); + return h; +@@ -380,8 +394,7 @@ begin: + !atomic_inc_not_zero(&ct->ct_general.use))) + h = NULL; + else { +- if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) || +- nf_ct_zone(ct) != zone)) { ++ if (unlikely(!nf_ct_key_equal(h, tuple, zone))) { + nf_ct_put(ct); + goto begin; + } diff --git a/queue-3.10/series b/queue-3.10/series index 80382e6e498..6b7ed17df62 100644 --- a/queue-3.10/series +++ b/queue-3.10/series @@ -1,2 +1,3 @@ tracepoints-do-not-trace-when-cpu-is-offline.patch drm-ast-initialized-data-needed-to-map-fbdev-memory.patch +netfilter-nf_conntrack-fix-rcu-race-in-nf_conntrack_find_get.patch