From: Sasha Levin Date: Sat, 11 May 2019 01:45:50 +0000 (-0400) Subject: fixes for 4.9 X-Git-Tag: v5.1.1~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=d60d0101535c33cedce7ca59379197fe933ac84c;p=thirdparty%2Fkernel%2Fstable-queue.git fixes for 4.9 Signed-off-by: Sasha Levin --- diff --git a/queue-4.9/bpf-convert-htab-map-to-hlist_nulls.patch b/queue-4.9/bpf-convert-htab-map-to-hlist_nulls.patch new file mode 100644 index 00000000000..36b2c83f7d6 --- /dev/null +++ b/queue-4.9/bpf-convert-htab-map-to-hlist_nulls.patch @@ -0,0 +1,283 @@ +From 5135fb434c1d65705d412595519629f993160cc3 Mon Sep 17 00:00:00 2001 +From: Alexei Starovoitov +Date: Thu, 9 May 2019 19:33:54 -0700 +Subject: bpf: convert htab map to hlist_nulls + +commit 4fe8435909fddc97b81472026aa954e06dd192a5 upstream. + +when all map elements are pre-allocated one cpu can delete and reuse htab_elem +while another cpu is still walking the hlist. In such case the lookup may +miss the element. Convert hlist to hlist_nulls to avoid such scenario. +When bucket lock is taken there is no need to take such precautions, +so only convert map_lookup and map_get_next to nulls. +The race window is extremely small and only reproducible with explicit +udelay() inside lookup_nulls_elem_raw() + +Similar to hlist add hlist_nulls_for_each_entry_safe() and +hlist_nulls_entry_safe() helpers. + +Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") +Reported-by: Jonathan Perry +Signed-off-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Chenbo Feng +Signed-off-by: Sasha Levin +--- + include/linux/list_nulls.h | 5 +++ + include/linux/rculist_nulls.h | 14 +++++++ + kernel/bpf/hashtab.c | 71 +++++++++++++++++++++++------------ + 3 files changed, 67 insertions(+), 23 deletions(-) + +diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h +index b01fe10090843..87ff4f58a2f01 100644 +--- a/include/linux/list_nulls.h ++++ b/include/linux/list_nulls.h +@@ -29,6 +29,11 @@ struct hlist_nulls_node { + ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) + + #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) ++ ++#define hlist_nulls_entry_safe(ptr, type, member) \ ++ ({ typeof(ptr) ____ptr = (ptr); \ ++ !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \ ++ }) + /** + * ptr_is_a_nulls - Test if a ptr is a nulls + * @ptr: ptr to be tested +diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h +index 6224a0ab0b1e8..2720b2fbfb86d 100644 +--- a/include/linux/rculist_nulls.h ++++ b/include/linux/rculist_nulls.h +@@ -118,5 +118,19 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \ + pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos))) + ++/** ++ * hlist_nulls_for_each_entry_safe - ++ * iterate over list of given type safe against removal of list entry ++ * @tpos: the type * to use as a loop cursor. ++ * @pos: the &struct hlist_nulls_node to use as a loop cursor. ++ * @head: the head for your list. ++ * @member: the name of the hlist_nulls_node within the struct. ++ */ ++#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member) \ ++ for (({barrier();}), \ ++ pos = rcu_dereference_raw(hlist_nulls_first_rcu(head)); \ ++ (!is_a_nulls(pos)) && \ ++ ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); \ ++ pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });) + #endif + #endif +diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c +index f9d53ac57f640..8648d7d297081 100644 +--- a/kernel/bpf/hashtab.c ++++ b/kernel/bpf/hashtab.c +@@ -13,10 +13,11 @@ + #include + #include + #include ++#include + #include "percpu_freelist.h" + + struct bucket { +- struct hlist_head head; ++ struct hlist_nulls_head head; + raw_spinlock_t lock; + }; + +@@ -40,7 +41,7 @@ enum extra_elem_state { + /* each htab element is struct htab_elem + key + value */ + struct htab_elem { + union { +- struct hlist_node hash_node; ++ struct hlist_nulls_node hash_node; + struct { + void *padding; + union { +@@ -245,7 +246,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) + goto free_htab; + + for (i = 0; i < htab->n_buckets; i++) { +- INIT_HLIST_HEAD(&htab->buckets[i].head); ++ INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i); + raw_spin_lock_init(&htab->buckets[i].lock); + } + +@@ -282,28 +283,52 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash) + return &htab->buckets[hash & (htab->n_buckets - 1)]; + } + +-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash) ++static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash) + { + return &__select_bucket(htab, hash)->head; + } + +-static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash, ++/* this lookup function can only be called with bucket lock taken */ ++static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, + void *key, u32 key_size) + { ++ struct hlist_nulls_node *n; + struct htab_elem *l; + +- hlist_for_each_entry_rcu(l, head, hash_node) ++ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) + if (l->hash == hash && !memcmp(&l->key, key, key_size)) + return l; + + return NULL; + } + ++/* can be called without bucket lock. it will repeat the loop in ++ * the unlikely event when elements moved from one bucket into another ++ * while link list is being walked ++ */ ++static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, ++ u32 hash, void *key, ++ u32 key_size, u32 n_buckets) ++{ ++ struct hlist_nulls_node *n; ++ struct htab_elem *l; ++ ++again: ++ hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) ++ if (l->hash == hash && !memcmp(&l->key, key, key_size)) ++ return l; ++ ++ if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) ++ goto again; ++ ++ return NULL; ++} ++ + /* Called from syscall or from eBPF program */ + static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) + { + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); +- struct hlist_head *head; ++ struct hlist_nulls_head *head; + struct htab_elem *l; + u32 hash, key_size; + +@@ -316,7 +341,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) + + head = select_bucket(htab, hash); + +- l = lookup_elem_raw(head, hash, key, key_size); ++ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); + + return l; + } +@@ -335,7 +360,7 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key) + static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) + { + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); +- struct hlist_head *head; ++ struct hlist_nulls_head *head; + struct htab_elem *l, *next_l; + u32 hash, key_size; + int i = 0; +@@ -352,13 +377,13 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) + head = select_bucket(htab, hash); + + /* lookup the key */ +- l = lookup_elem_raw(head, hash, key, key_size); ++ l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets); + + if (!l) + goto find_first_elem; + + /* key was found, get next key in the same bucket */ +- next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)), ++ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)), + struct htab_elem, hash_node); + + if (next_l) { +@@ -377,7 +402,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key) + head = select_bucket(htab, i); + + /* pick first element in the bucket */ +- next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), ++ next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)), + struct htab_elem, hash_node); + if (next_l) { + /* if it's not empty, just return it */ +@@ -534,7 +559,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, + { + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct htab_elem *l_new = NULL, *l_old; +- struct hlist_head *head; ++ struct hlist_nulls_head *head; + unsigned long flags; + struct bucket *b; + u32 key_size, hash; +@@ -573,9 +598,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, + /* add new element to the head of the list, so that + * concurrent search will find it before old elem + */ +- hlist_add_head_rcu(&l_new->hash_node, head); ++ hlist_nulls_add_head_rcu(&l_new->hash_node, head); + if (l_old) { +- hlist_del_rcu(&l_old->hash_node); ++ hlist_nulls_del_rcu(&l_old->hash_node); + free_htab_elem(htab, l_old); + } + ret = 0; +@@ -590,7 +615,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, + { + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct htab_elem *l_new = NULL, *l_old; +- struct hlist_head *head; ++ struct hlist_nulls_head *head; + unsigned long flags; + struct bucket *b; + u32 key_size, hash; +@@ -642,7 +667,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, + ret = PTR_ERR(l_new); + goto err; + } +- hlist_add_head_rcu(&l_new->hash_node, head); ++ hlist_nulls_add_head_rcu(&l_new->hash_node, head); + } + ret = 0; + err: +@@ -660,7 +685,7 @@ static int htab_percpu_map_update_elem(struct bpf_map *map, void *key, + static int htab_map_delete_elem(struct bpf_map *map, void *key) + { + struct bpf_htab *htab = container_of(map, struct bpf_htab, map); +- struct hlist_head *head; ++ struct hlist_nulls_head *head; + struct bucket *b; + struct htab_elem *l; + unsigned long flags; +@@ -680,7 +705,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) + l = lookup_elem_raw(head, hash, key, key_size); + + if (l) { +- hlist_del_rcu(&l->hash_node); ++ hlist_nulls_del_rcu(&l->hash_node); + free_htab_elem(htab, l); + ret = 0; + } +@@ -694,12 +719,12 @@ static void delete_all_elements(struct bpf_htab *htab) + int i; + + for (i = 0; i < htab->n_buckets; i++) { +- struct hlist_head *head = select_bucket(htab, i); +- struct hlist_node *n; ++ struct hlist_nulls_head *head = select_bucket(htab, i); ++ struct hlist_nulls_node *n; + struct htab_elem *l; + +- hlist_for_each_entry_safe(l, n, head, hash_node) { +- hlist_del_rcu(&l->hash_node); ++ hlist_nulls_for_each_entry_safe(l, n, head, hash_node) { ++ hlist_nulls_del_rcu(&l->hash_node); + if (l->state != HTAB_EXTRA_ELEM_USED) + htab_elem_free(htab, l); + } +-- +2.20.1 + diff --git a/queue-4.9/bpf-fix-struct-htab_elem-layout.patch b/queue-4.9/bpf-fix-struct-htab_elem-layout.patch new file mode 100644 index 00000000000..047a5af91e5 --- /dev/null +++ b/queue-4.9/bpf-fix-struct-htab_elem-layout.patch @@ -0,0 +1,88 @@ +From 7a0847107ec1c0477b91c8e67b50f5b0f982c6d8 Mon Sep 17 00:00:00 2001 +From: Alexei Starovoitov +Date: Thu, 9 May 2019 19:33:53 -0700 +Subject: bpf: fix struct htab_elem layout + +commit 9f691549f76d488a0c74397b3e51e943865ea01f upstream. + +when htab_elem is removed from the bucket list the htab_elem.hash_node.next +field should not be overridden too early otherwise we have a tiny race window +between lookup and delete. +The bug was discovered by manual code analysis and reproducible +only with explicit udelay() in lookup_elem_raw(). + +Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") +Reported-by: Jonathan Perry +Signed-off-by: Alexei Starovoitov +Acked-by: Daniel Borkmann +Signed-off-by: David S. Miller +Signed-off-by: Chenbo Feng +Signed-off-by: Sasha Levin +--- + kernel/bpf/hashtab.c | 28 ++++++++++++++++++++++------ + 1 file changed, 22 insertions(+), 6 deletions(-) + +diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c +index a36a532c056df..f9d53ac57f640 100644 +--- a/kernel/bpf/hashtab.c ++++ b/kernel/bpf/hashtab.c +@@ -41,8 +41,13 @@ enum extra_elem_state { + struct htab_elem { + union { + struct hlist_node hash_node; +- struct bpf_htab *htab; +- struct pcpu_freelist_node fnode; ++ struct { ++ void *padding; ++ union { ++ struct bpf_htab *htab; ++ struct pcpu_freelist_node fnode; ++ }; ++ }; + }; + union { + struct rcu_head rcu; +@@ -114,8 +119,10 @@ static int prealloc_elems_and_freelist(struct bpf_htab *htab) + if (err) + goto free_elems; + +- pcpu_freelist_populate(&htab->freelist, htab->elems, htab->elem_size, +- htab->map.max_entries); ++ pcpu_freelist_populate(&htab->freelist, ++ htab->elems + offsetof(struct htab_elem, fnode), ++ htab->elem_size, htab->map.max_entries); ++ + return 0; + + free_elems: +@@ -148,6 +155,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) + int err, i; + u64 cost; + ++ BUILD_BUG_ON(offsetof(struct htab_elem, htab) != ++ offsetof(struct htab_elem, hash_node.pprev)); ++ BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) != ++ offsetof(struct htab_elem, hash_node.pprev)); ++ + if (attr->map_flags & ~BPF_F_NO_PREALLOC) + /* reserved bits should not be used */ + return ERR_PTR(-EINVAL); +@@ -429,9 +441,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, + int err = 0; + + if (prealloc) { +- l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); +- if (!l_new) ++ struct pcpu_freelist_node *l; ++ ++ l = pcpu_freelist_pop(&htab->freelist); ++ if (!l) + err = -E2BIG; ++ else ++ l_new = container_of(l, struct htab_elem, fnode); + } else { + if (atomic_inc_return(&htab->count) > htab->map.max_entries) { + atomic_dec(&htab->count); +-- +2.20.1 + diff --git a/queue-4.9/netfilter-compat-initialize-all-fields-in-xt_init.patch b/queue-4.9/netfilter-compat-initialize-all-fields-in-xt_init.patch new file mode 100644 index 00000000000..9a29d13c62c --- /dev/null +++ b/queue-4.9/netfilter-compat-initialize-all-fields-in-xt_init.patch @@ -0,0 +1,70 @@ +From 7b787dc8c9103bf222ebb85c342af971d408f68f Mon Sep 17 00:00:00 2001 +From: Francesco Ruggeri +Date: Fri, 10 May 2019 09:19:30 -0700 +Subject: netfilter: compat: initialize all fields in xt_init + +commit 8d29d16d21342a0c86405d46de0c4ac5daf1760f upstream + +If a non zero value happens to be in xt[NFPROTO_BRIDGE].cur at init +time, the following panic can be caused by running + +% ebtables -t broute -F BROUTING + +from a 32-bit user level on a 64-bit kernel. This patch replaces +kmalloc_array with kcalloc when allocating xt. + +[ 474.680846] BUG: unable to handle kernel paging request at 0000000009600920 +[ 474.687869] PGD 2037006067 P4D 2037006067 PUD 2038938067 PMD 0 +[ 474.693838] Oops: 0000 [#1] SMP +[ 474.697055] CPU: 9 PID: 4662 Comm: ebtables Kdump: loaded Not tainted 4.19.17-11302235.AroraKernelnext.fc18.x86_64 #1 +[ 474.707721] Hardware name: Supermicro X9DRT/X9DRT, BIOS 3.0 06/28/2013 +[ 474.714313] RIP: 0010:xt_compat_calc_jump+0x2f/0x63 [x_tables] +[ 474.720201] Code: 40 0f b6 ff 55 31 c0 48 6b ff 70 48 03 3d dc 45 00 00 48 89 e5 8b 4f 6c 4c 8b 47 60 ff c9 39 c8 7f 2f 8d 14 08 d1 fa 48 63 fa <41> 39 34 f8 4c 8d 0c fd 00 00 00 00 73 05 8d 42 01 eb e1 76 05 8d +[ 474.739023] RSP: 0018:ffffc9000943fc58 EFLAGS: 00010207 +[ 474.744296] RAX: 0000000000000000 RBX: ffffc90006465000 RCX: 0000000002580249 +[ 474.751485] RDX: 00000000012c0124 RSI: fffffffff7be17e9 RDI: 00000000012c0124 +[ 474.758670] RBP: ffffc9000943fc58 R08: 0000000000000000 R09: ffffffff8117cf8f +[ 474.765855] R10: ffffc90006477000 R11: 0000000000000000 R12: 0000000000000001 +[ 474.773048] R13: 0000000000000000 R14: ffffc9000943fcb8 R15: ffffc9000943fcb8 +[ 474.780234] FS: 0000000000000000(0000) GS:ffff88a03f840000(0063) knlGS:00000000f7ac7700 +[ 474.788612] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 +[ 474.794632] CR2: 0000000009600920 CR3: 0000002037422006 CR4: 00000000000606e0 +[ 474.802052] Call Trace: +[ 474.804789] compat_do_replace+0x1fb/0x2a3 [ebtables] +[ 474.810105] compat_do_ebt_set_ctl+0x69/0xe6 [ebtables] +[ 474.815605] ? try_module_get+0x37/0x42 +[ 474.819716] compat_nf_setsockopt+0x4f/0x6d +[ 474.824172] compat_ip_setsockopt+0x7e/0x8c +[ 474.828641] compat_raw_setsockopt+0x16/0x3a +[ 474.833220] compat_sock_common_setsockopt+0x1d/0x24 +[ 474.838458] __compat_sys_setsockopt+0x17e/0x1b1 +[ 474.843343] ? __check_object_size+0x76/0x19a +[ 474.847960] __ia32_compat_sys_socketcall+0x1cb/0x25b +[ 474.853276] do_fast_syscall_32+0xaf/0xf6 +[ 474.857548] entry_SYSENTER_compat+0x6b/0x7a + +Signed-off-by: Francesco Ruggeri +Acked-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Zubin Mithra +Signed-off-by: Sasha Levin +--- + net/netfilter/x_tables.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c +index 751fec729ffb0..e065140d0c93b 100644 +--- a/net/netfilter/x_tables.c ++++ b/net/netfilter/x_tables.c +@@ -1728,7 +1728,7 @@ static int __init xt_init(void) + seqcount_init(&per_cpu(xt_recseq, i)); + } + +- xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); ++ xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL); + if (!xt) + return -ENOMEM; + +-- +2.20.1 + diff --git a/queue-4.9/series b/queue-4.9/series new file mode 100644 index 00000000000..8acecc1c4d9 --- /dev/null +++ b/queue-4.9/series @@ -0,0 +1,3 @@ +netfilter-compat-initialize-all-fields-in-xt_init.patch +bpf-fix-struct-htab_elem-layout.patch +bpf-convert-htab-map-to-hlist_nulls.patch