]>
Commit | Line | Data |
---|---|---|
f1359f75 GKH |
1 | From foo@baz Thu Dec 14 11:45:58 CET 2017 |
2 | From: Eric Dumazet <edumazet@google.com> | |
3 | Date: Tue, 5 Dec 2017 12:45:56 -0800 | |
4 | Subject: net: remove hlist_nulls_add_tail_rcu() | |
5 | ||
6 | From: Eric Dumazet <edumazet@google.com> | |
7 | ||
8 | ||
9 | [ Upstream commit d7efc6c11b277d9d80b99b1334a78bfe7d7edf10 ] | |
10 | ||
11 | Alexander Potapenko reported use of uninitialized memory [1] | |
12 | ||
13 | This happens when inserting a request socket into TCP ehash, | |
14 | in __sk_nulls_add_node_rcu(), since sk_reuseport is not initialized. | |
15 | ||
16 | Bug was added by commit d894ba18d4e4 ("soreuseport: fix ordering for | |
17 | mixed v4/v6 sockets") | |
18 | ||
19 | Note that d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 | |
20 | ordering fix") missed the opportunity to get rid of | |
21 | hlist_nulls_add_tail_rcu() : | |
22 | ||
23 | Both UDP sockets and TCP/DCCP listeners no longer use | |
24 | __sk_nulls_add_node_rcu() for their hash insertion. | |
25 | ||
26 | Since all other sockets have unique 4-tuple, the reuseport status | |
27 | has no special meaning, so we can always use hlist_nulls_add_head_rcu() | |
28 | for them and save few cycles/instructions. | |
29 | ||
30 | [1] | |
31 | ||
32 | ================================================================== | |
33 | BUG: KMSAN: use of uninitialized memory in inet_ehash_insert+0xd40/0x1050 | |
34 | CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.13.0+ #3288 | |
35 | Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 | |
36 | Call Trace: | |
37 | <IRQ> | |
38 | __dump_stack lib/dump_stack.c:16 | |
39 | dump_stack+0x185/0x1d0 lib/dump_stack.c:52 | |
40 | kmsan_report+0x13f/0x1c0 mm/kmsan/kmsan.c:1016 | |
41 | __msan_warning_32+0x69/0xb0 mm/kmsan/kmsan_instr.c:766 | |
42 | __sk_nulls_add_node_rcu ./include/net/sock.h:684 | |
43 | inet_ehash_insert+0xd40/0x1050 net/ipv4/inet_hashtables.c:413 | |
44 | reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:754 | |
45 | inet_csk_reqsk_queue_hash_add+0x1cc/0x300 net/ipv4/inet_connection_sock.c:765 | |
46 | tcp_conn_request+0x31e7/0x36f0 net/ipv4/tcp_input.c:6414 | |
47 | tcp_v4_conn_request+0x16d/0x220 net/ipv4/tcp_ipv4.c:1314 | |
48 | tcp_rcv_state_process+0x42a/0x7210 net/ipv4/tcp_input.c:5917 | |
49 | tcp_v4_do_rcv+0xa6a/0xcd0 net/ipv4/tcp_ipv4.c:1483 | |
50 | tcp_v4_rcv+0x3de0/0x4ab0 net/ipv4/tcp_ipv4.c:1763 | |
51 | ip_local_deliver_finish+0x6bb/0xcb0 net/ipv4/ip_input.c:216 | |
52 | NF_HOOK ./include/linux/netfilter.h:248 | |
53 | ip_local_deliver+0x3fa/0x480 net/ipv4/ip_input.c:257 | |
54 | dst_input ./include/net/dst.h:477 | |
55 | ip_rcv_finish+0x6fb/0x1540 net/ipv4/ip_input.c:397 | |
56 | NF_HOOK ./include/linux/netfilter.h:248 | |
57 | ip_rcv+0x10f6/0x15c0 net/ipv4/ip_input.c:488 | |
58 | __netif_receive_skb_core+0x36f6/0x3f60 net/core/dev.c:4298 | |
59 | __netif_receive_skb net/core/dev.c:4336 | |
60 | netif_receive_skb_internal+0x63c/0x19c0 net/core/dev.c:4497 | |
61 | napi_skb_finish net/core/dev.c:4858 | |
62 | napi_gro_receive+0x629/0xa50 net/core/dev.c:4889 | |
63 | e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4018 | |
64 | e1000_clean_rx_irq+0x1492/0x1d30 | |
65 | drivers/net/ethernet/intel/e1000/e1000_main.c:4474 | |
66 | e1000_clean+0x43aa/0x5970 drivers/net/ethernet/intel/e1000/e1000_main.c:3819 | |
67 | napi_poll net/core/dev.c:5500 | |
68 | net_rx_action+0x73c/0x1820 net/core/dev.c:5566 | |
69 | __do_softirq+0x4b4/0x8dd kernel/softirq.c:284 | |
70 | invoke_softirq kernel/softirq.c:364 | |
71 | irq_exit+0x203/0x240 kernel/softirq.c:405 | |
72 | exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:638 | |
73 | do_IRQ+0x15e/0x1a0 arch/x86/kernel/irq.c:263 | |
74 | common_interrupt+0x86/0x86 | |
75 | ||
76 | Fixes: d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets") | |
77 | Fixes: d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix") | |
78 | Signed-off-by: Eric Dumazet <edumazet@google.com> | |
79 | Reported-by: Alexander Potapenko <glider@google.com> | |
80 | Acked-by: Craig Gallek <kraig@google.com> | |
81 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
82 | Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> | |
83 | --- | |
84 | include/linux/rculist_nulls.h | 38 -------------------------------------- | |
85 | include/net/sock.h | 6 +----- | |
86 | 2 files changed, 1 insertion(+), 43 deletions(-) | |
87 | ||
88 | --- a/include/linux/rculist_nulls.h | |
89 | +++ b/include/linux/rculist_nulls.h | |
90 | @@ -100,44 +100,6 @@ static inline void hlist_nulls_add_head_ | |
91 | } | |
92 | ||
93 | /** | |
94 | - * hlist_nulls_add_tail_rcu | |
95 | - * @n: the element to add to the hash list. | |
96 | - * @h: the list to add to. | |
97 | - * | |
98 | - * Description: | |
99 | - * Adds the specified element to the end of the specified hlist_nulls, | |
100 | - * while permitting racing traversals. NOTE: tail insertion requires | |
101 | - * list traversal. | |
102 | - * | |
103 | - * The caller must take whatever precautions are necessary | |
104 | - * (such as holding appropriate locks) to avoid racing | |
105 | - * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() | |
106 | - * or hlist_nulls_del_rcu(), running on this same list. | |
107 | - * However, it is perfectly legal to run concurrently with | |
108 | - * the _rcu list-traversal primitives, such as | |
109 | - * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency | |
110 | - * problems on Alpha CPUs. Regardless of the type of CPU, the | |
111 | - * list-traversal primitive must be guarded by rcu_read_lock(). | |
112 | - */ | |
113 | -static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, | |
114 | - struct hlist_nulls_head *h) | |
115 | -{ | |
116 | - struct hlist_nulls_node *i, *last = NULL; | |
117 | - | |
118 | - for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i); | |
119 | - i = hlist_nulls_next_rcu(i)) | |
120 | - last = i; | |
121 | - | |
122 | - if (last) { | |
123 | - n->next = last->next; | |
124 | - n->pprev = &last->next; | |
125 | - rcu_assign_pointer(hlist_nulls_next_rcu(last), n); | |
126 | - } else { | |
127 | - hlist_nulls_add_head_rcu(n, h); | |
128 | - } | |
129 | -} | |
130 | - | |
131 | -/** | |
132 | * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type | |
133 | * @tpos: the type * to use as a loop cursor. | |
134 | * @pos: the &struct hlist_nulls_node to use as a loop cursor. | |
135 | --- a/include/net/sock.h | |
136 | +++ b/include/net/sock.h | |
137 | @@ -648,11 +648,7 @@ static inline void sk_add_node_rcu(struc | |
138 | ||
139 | static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) | |
140 | { | |
141 | - if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && | |
142 | - sk->sk_family == AF_INET6) | |
143 | - hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); | |
144 | - else | |
145 | - hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); | |
146 | + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); | |
147 | } | |
148 | ||
149 | static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) |