]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/blob - releases/4.14.111/netfilter-conntrack-fix-cloned-unconfirmed-skb-_nfct.patch
Linux 4.14.111
[thirdparty/kernel/stable-queue.git] / releases / 4.14.111 / netfilter-conntrack-fix-cloned-unconfirmed-skb-_nfct.patch
1 From a30e1f026b58ba33cc8e0984737396c4a0f321c7 Mon Sep 17 00:00:00 2001
2 From: Chieh-Min Wang <chiehminw@synology.com>
3 Date: Tue, 12 Feb 2019 00:59:55 +0100
4 Subject: netfilter: conntrack: fix cloned unconfirmed skb->_nfct race in
5 __nf_conntrack_confirm
6
7 [ Upstream commit 13f5251fd17088170c18844534682d9cab5ff5aa ]
8
9 For bridge(br_flood) or broadcast/multicast packets, they could clone
10 skb with unconfirmed conntrack which break the rule that unconfirmed
11 skb->_nfct is never shared. With nfqueue running on my system, the race
12 can be easily reproduced with following warning calltrace:
13
14 [13257.707525] CPU: 0 PID: 12132 Comm: main Tainted: P W 4.4.60 #7744
15 [13257.707568] Hardware name: Qualcomm (Flattened Device Tree)
16 [13257.714700] [<c021f6dc>] (unwind_backtrace) from [<c021bce8>] (show_stack+0x10/0x14)
17 [13257.720253] [<c021bce8>] (show_stack) from [<c0449e10>] (dump_stack+0x94/0xa8)
18 [13257.728240] [<c0449e10>] (dump_stack) from [<c022a7e0>] (warn_slowpath_common+0x94/0xb0)
19 [13257.735268] [<c022a7e0>] (warn_slowpath_common) from [<c022a898>] (warn_slowpath_null+0x1c/0x24)
20 [13257.743519] [<c022a898>] (warn_slowpath_null) from [<c06ee450>] (__nf_conntrack_confirm+0xa8/0x618)
21 [13257.752284] [<c06ee450>] (__nf_conntrack_confirm) from [<c0772670>] (ipv4_confirm+0xb8/0xfc)
22 [13257.761049] [<c0772670>] (ipv4_confirm) from [<c06e7a60>] (nf_iterate+0x48/0xa8)
23 [13257.769725] [<c06e7a60>] (nf_iterate) from [<c06e7af0>] (nf_hook_slow+0x30/0xb0)
24 [13257.777108] [<c06e7af0>] (nf_hook_slow) from [<c07f20b4>] (br_nf_post_routing+0x274/0x31c)
25 [13257.784486] [<c07f20b4>] (br_nf_post_routing) from [<c06e7a60>] (nf_iterate+0x48/0xa8)
26 [13257.792556] [<c06e7a60>] (nf_iterate) from [<c06e7af0>] (nf_hook_slow+0x30/0xb0)
27 [13257.800458] [<c06e7af0>] (nf_hook_slow) from [<c07e5580>] (br_forward_finish+0x94/0xa4)
28 [13257.808010] [<c07e5580>] (br_forward_finish) from [<c07f22ac>] (br_nf_forward_finish+0x150/0x1ac)
29 [13257.815736] [<c07f22ac>] (br_nf_forward_finish) from [<c06e8df0>] (nf_reinject+0x108/0x170)
30 [13257.824762] [<c06e8df0>] (nf_reinject) from [<c06ea854>] (nfqnl_recv_verdict+0x3d8/0x420)
31 [13257.832924] [<c06ea854>] (nfqnl_recv_verdict) from [<c06e940c>] (nfnetlink_rcv_msg+0x158/0x248)
32 [13257.841256] [<c06e940c>] (nfnetlink_rcv_msg) from [<c06e5564>] (netlink_rcv_skb+0x54/0xb0)
33 [13257.849762] [<c06e5564>] (netlink_rcv_skb) from [<c06e4ec8>] (netlink_unicast+0x148/0x23c)
34 [13257.858093] [<c06e4ec8>] (netlink_unicast) from [<c06e5364>] (netlink_sendmsg+0x2ec/0x368)
35 [13257.866348] [<c06e5364>] (netlink_sendmsg) from [<c069fb8c>] (sock_sendmsg+0x34/0x44)
36 [13257.874590] [<c069fb8c>] (sock_sendmsg) from [<c06a03dc>] (___sys_sendmsg+0x1ec/0x200)
37 [13257.882489] [<c06a03dc>] (___sys_sendmsg) from [<c06a11c8>] (__sys_sendmsg+0x3c/0x64)
38 [13257.890300] [<c06a11c8>] (__sys_sendmsg) from [<c0209b40>] (ret_fast_syscall+0x0/0x34)
39
40 The original code just triggered the warning but do nothing. It will
41 caused the shared conntrack moves to the dying list and the packet be
42 droppped (nf_ct_resolve_clash returns NF_DROP for dying conntrack).
43
44 - Reproduce steps:
45
46 +----------------------------+
47 | br0(bridge) |
48 | |
49 +-+---------+---------+------+
50 | eth0| | eth1| | eth2|
51 | | | | | |
52 +--+--+ +--+--+ +---+-+
53 | | |
54 | | |
55 +--+-+ +-+--+ +--+-+
56 | PC1| | PC2| | PC3|
57 +----+ +----+ +----+
58
59 iptables -A FORWARD -m mark --mark 0x1000000/0x1000000 -j NFQUEUE --queue-num 100 --queue-bypass
60
61 ps: Our nfq userspace program will set mark on packets whose connection
62 has already been processed.
63
64 PC1 sends broadcast packets simulated by hping3:
65
66 hping3 --rand-source --udp 192.168.1.255 -i u100
67
68 - Broadcast racing flow chart is as follow:
69
70 br_handle_frame
71 BR_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, br_handle_frame_finish)
72 // skb->_nfct (unconfirmed conntrack) is constructed at PRE_ROUTING stage
73 br_handle_frame_finish
74 // check if this packet is broadcast
75 br_flood_forward
76 br_flood
77 list_for_each_entry_rcu(p, &br->port_list, list) // iterate through each port
78 maybe_deliver
79 deliver_clone
80 skb = skb_clone(skb)
81 __br_forward
82 BR_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,...)
83 // queue in our nfq and received by our userspace program
84 // goto __nf_conntrack_confirm with process context on CPU 1
85 br_pass_frame_up
86 BR_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,...)
87 // goto __nf_conntrack_confirm with softirq context on CPU 0
88
89 Because conntrack confirm can happen at both INPUT and POSTROUTING
90 stage. So with NFQUEUE running, skb->_nfct with the same unconfirmed
91 conntrack could race on different core.
92
93 This patch fixes a repeating kernel splat, now it is only displayed
94 once.
95
96 Signed-off-by: Chieh-Min Wang <chiehminw@synology.com>
97 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
98 Signed-off-by: Sasha Levin <sashal@kernel.org>
99 ---
100 net/netfilter/nf_conntrack_core.c | 14 +++++++++++---
101 1 file changed, 11 insertions(+), 3 deletions(-)
102
103 diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
104 index f07357ba9629..06520bf30f29 100644
105 --- a/net/netfilter/nf_conntrack_core.c
106 +++ b/net/netfilter/nf_conntrack_core.c
107 @@ -763,10 +763,18 @@ __nf_conntrack_confirm(struct sk_buff *skb)
108 * REJECT will give spurious warnings here.
109 */
110
111 - /* No external references means no one else could have
112 - * confirmed us.
113 + /* Another skb with the same unconfirmed conntrack may
114 + * win the race. This may happen for bridge(br_flood)
115 + * or broadcast/multicast packets do skb_clone with
116 + * unconfirmed conntrack.
117 */
118 - WARN_ON(nf_ct_is_confirmed(ct));
119 + if (unlikely(nf_ct_is_confirmed(ct))) {
120 + WARN_ON_ONCE(1);
121 + nf_conntrack_double_unlock(hash, reply_hash);
122 + local_bh_enable();
123 + return NF_DROP;
124 + }
125 +
126 pr_debug("Confirming conntrack %p\n", ct);
127 /* We have to check the DYING flag after unlink to prevent
128 * a race against nf_ct_get_next_corpse() possibly called from
129 --
130 2.19.1
131