]>
Commit | Line | Data |
---|---|---|
d60d0101 SL |
1 | From 7a0847107ec1c0477b91c8e67b50f5b0f982c6d8 Mon Sep 17 00:00:00 2001 |
2 | From: Alexei Starovoitov <ast@fb.com> | |
3 | Date: Thu, 9 May 2019 19:33:53 -0700 | |
4 | Subject: bpf: fix struct htab_elem layout | |
5 | ||
6 | commit 9f691549f76d488a0c74397b3e51e943865ea01f upstream. | |
7 | ||
8 | when htab_elem is removed from the bucket list the htab_elem.hash_node.next | |
9 | field should not be overridden too early otherwise we have a tiny race window | |
10 | between lookup and delete. | |
11 | The bug was discovered by manual code analysis and reproducible | |
12 | only with explicit udelay() in lookup_elem_raw(). | |
13 | ||
14 | Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") | |
15 | Reported-by: Jonathan Perry <jonperry@fb.com> | |
16 | Signed-off-by: Alexei Starovoitov <ast@kernel.org> | |
17 | Acked-by: Daniel Borkmann <daniel@iogearbox.net> | |
18 | Signed-off-by: David S. Miller <davem@davemloft.net> | |
19 | Signed-off-by: Chenbo Feng <fengc@google.com> | |
20 | Signed-off-by: Sasha Levin <sashal@kernel.org> | |
21 | --- | |
22 | kernel/bpf/hashtab.c | 28 ++++++++++++++++++++++------ | |
23 | 1 file changed, 22 insertions(+), 6 deletions(-) | |
24 | ||
25 | diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c | |
26 | index a36a532c056df..f9d53ac57f640 100644 | |
27 | --- a/kernel/bpf/hashtab.c | |
28 | +++ b/kernel/bpf/hashtab.c | |
29 | @@ -41,8 +41,13 @@ enum extra_elem_state { | |
30 | struct htab_elem { | |
31 | union { | |
32 | struct hlist_node hash_node; | |
33 | - struct bpf_htab *htab; | |
34 | - struct pcpu_freelist_node fnode; | |
35 | + struct { | |
36 | + void *padding; | |
37 | + union { | |
38 | + struct bpf_htab *htab; | |
39 | + struct pcpu_freelist_node fnode; | |
40 | + }; | |
41 | + }; | |
42 | }; | |
43 | union { | |
44 | struct rcu_head rcu; | |
45 | @@ -114,8 +119,10 @@ static int prealloc_elems_and_freelist(struct bpf_htab *htab) | |
46 | if (err) | |
47 | goto free_elems; | |
48 | ||
49 | - pcpu_freelist_populate(&htab->freelist, htab->elems, htab->elem_size, | |
50 | - htab->map.max_entries); | |
51 | + pcpu_freelist_populate(&htab->freelist, | |
52 | + htab->elems + offsetof(struct htab_elem, fnode), | |
53 | + htab->elem_size, htab->map.max_entries); | |
54 | + | |
55 | return 0; | |
56 | ||
57 | free_elems: | |
58 | @@ -148,6 +155,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) | |
59 | int err, i; | |
60 | u64 cost; | |
61 | ||
62 | + BUILD_BUG_ON(offsetof(struct htab_elem, htab) != | |
63 | + offsetof(struct htab_elem, hash_node.pprev)); | |
64 | + BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != | |
65 | + offsetof(struct htab_elem, hash_node.pprev)); | |
66 | + | |
67 | if (attr->map_flags & ~BPF_F_NO_PREALLOC) | |
68 | /* reserved bits should not be used */ | |
69 | return ERR_PTR(-EINVAL); | |
70 | @@ -429,9 +441,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, | |
71 | int err = 0; | |
72 | ||
73 | if (prealloc) { | |
74 | - l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); | |
75 | - if (!l_new) | |
76 | + struct pcpu_freelist_node *l; | |
77 | + | |
78 | + l = pcpu_freelist_pop(&htab->freelist); | |
79 | + if (!l) | |
80 | err = -E2BIG; | |
81 | + else | |
82 | + l_new = container_of(l, struct htab_elem, fnode); | |
83 | } else { | |
84 | if (atomic_inc_return(&htab->count) > htab->map.max_entries) { | |
85 | atomic_dec(&htab->count); | |
86 | -- | |
87 | 2.20.1 | |
88 |