]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
fixes for 4.9
authorSasha Levin <sashal@kernel.org>
Sat, 11 May 2019 01:45:50 +0000 (21:45 -0400)
committerSasha Levin <sashal@kernel.org>
Sat, 11 May 2019 01:45:50 +0000 (21:45 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-4.9/bpf-convert-htab-map-to-hlist_nulls.patch [new file with mode: 0644]
queue-4.9/bpf-fix-struct-htab_elem-layout.patch [new file with mode: 0644]
queue-4.9/netfilter-compat-initialize-all-fields-in-xt_init.patch [new file with mode: 0644]
queue-4.9/series [new file with mode: 0644]

diff --git a/queue-4.9/bpf-convert-htab-map-to-hlist_nulls.patch b/queue-4.9/bpf-convert-htab-map-to-hlist_nulls.patch
new file mode 100644 (file)
index 0000000..36b2c83
--- /dev/null
@@ -0,0 +1,283 @@
+From 5135fb434c1d65705d412595519629f993160cc3 Mon Sep 17 00:00:00 2001
+From: Alexei Starovoitov <ast@fb.com>
+Date: Thu, 9 May 2019 19:33:54 -0700
+Subject: bpf: convert htab map to hlist_nulls
+
+commit 4fe8435909fddc97b81472026aa954e06dd192a5 upstream.
+
+when all map elements are pre-allocated one cpu can delete and reuse htab_elem
+while another cpu is still walking the hlist. In such case the lookup may
+miss the element. Convert hlist to hlist_nulls to avoid such scenario.
+When bucket lock is taken there is no need to take such precautions,
+so only convert map_lookup and map_get_next to nulls.
+The race window is extremely small and only reproducible with explicit
+udelay() inside lookup_nulls_elem_raw()
+
+Similar to hlist add hlist_nulls_for_each_entry_safe() and
+hlist_nulls_entry_safe() helpers.
+
+Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
+Reported-by: Jonathan Perry <jonperry@fb.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chenbo Feng <fengc@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/list_nulls.h    |  5 +++
+ include/linux/rculist_nulls.h | 14 +++++++
+ kernel/bpf/hashtab.c          | 71 +++++++++++++++++++++++------------
+ 3 files changed, 67 insertions(+), 23 deletions(-)
+
+diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
+index b01fe10090843..87ff4f58a2f01 100644
+--- a/include/linux/list_nulls.h
++++ b/include/linux/list_nulls.h
+@@ -29,6 +29,11 @@ struct hlist_nulls_node {
+       ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
+ #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
++
++#define hlist_nulls_entry_safe(ptr, type, member) \
++      ({ typeof(ptr) ____ptr = (ptr); \
++         !is_a_nulls(____ptr) ? hlist_nulls_entry(____ptr, type, member) : NULL; \
++      })
+ /**
+  * ptr_is_a_nulls - Test if a ptr is a nulls
+  * @ptr: ptr to be tested
+diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
+index 6224a0ab0b1e8..2720b2fbfb86d 100644
+--- a/include/linux/rculist_nulls.h
++++ b/include/linux/rculist_nulls.h
+@@ -118,5 +118,19 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
+               ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1; }); \
+               pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)))
++/**
++ * hlist_nulls_for_each_entry_safe -
++ *   iterate over list of given type safe against removal of list entry
++ * @tpos:     the type * to use as a loop cursor.
++ * @pos:      the &struct hlist_nulls_node to use as a loop cursor.
++ * @head:     the head for your list.
++ * @member:   the name of the hlist_nulls_node within the struct.
++ */
++#define hlist_nulls_for_each_entry_safe(tpos, pos, head, member)              \
++      for (({barrier();}),                                                    \
++           pos = rcu_dereference_raw(hlist_nulls_first_rcu(head));            \
++              (!is_a_nulls(pos)) &&                                           \
++              ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member);        \
++                 pos = rcu_dereference_raw(hlist_nulls_next_rcu(pos)); 1; });)
+ #endif
+ #endif
+diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
+index f9d53ac57f640..8648d7d297081 100644
+--- a/kernel/bpf/hashtab.c
++++ b/kernel/bpf/hashtab.c
+@@ -13,10 +13,11 @@
+ #include <linux/bpf.h>
+ #include <linux/jhash.h>
+ #include <linux/filter.h>
++#include <linux/rculist_nulls.h>
+ #include "percpu_freelist.h"
+ struct bucket {
+-      struct hlist_head head;
++      struct hlist_nulls_head head;
+       raw_spinlock_t lock;
+ };
+@@ -40,7 +41,7 @@ enum extra_elem_state {
+ /* each htab element is struct htab_elem + key + value */
+ struct htab_elem {
+       union {
+-              struct hlist_node hash_node;
++              struct hlist_nulls_node hash_node;
+               struct {
+                       void *padding;
+                       union {
+@@ -245,7 +246,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
+               goto free_htab;
+       for (i = 0; i < htab->n_buckets; i++) {
+-              INIT_HLIST_HEAD(&htab->buckets[i].head);
++              INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
+               raw_spin_lock_init(&htab->buckets[i].lock);
+       }
+@@ -282,28 +283,52 @@ static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
+       return &htab->buckets[hash & (htab->n_buckets - 1)];
+ }
+-static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
++static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 hash)
+ {
+       return &__select_bucket(htab, hash)->head;
+ }
+-static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
++/* this lookup function can only be called with bucket lock taken */
++static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash,
+                                        void *key, u32 key_size)
+ {
++      struct hlist_nulls_node *n;
+       struct htab_elem *l;
+-      hlist_for_each_entry_rcu(l, head, hash_node)
++      hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
+               if (l->hash == hash && !memcmp(&l->key, key, key_size))
+                       return l;
+       return NULL;
+ }
++/* can be called without bucket lock. it will repeat the loop in
++ * the unlikely event when elements moved from one bucket into another
++ * while link list is being walked
++ */
++static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
++                                             u32 hash, void *key,
++                                             u32 key_size, u32 n_buckets)
++{
++      struct hlist_nulls_node *n;
++      struct htab_elem *l;
++
++again:
++      hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
++              if (l->hash == hash && !memcmp(&l->key, key, key_size))
++                      return l;
++
++      if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1))))
++              goto again;
++
++      return NULL;
++}
++
+ /* Called from syscall or from eBPF program */
+ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
+ {
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+-      struct hlist_head *head;
++      struct hlist_nulls_head *head;
+       struct htab_elem *l;
+       u32 hash, key_size;
+@@ -316,7 +341,7 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
+       head = select_bucket(htab, hash);
+-      l = lookup_elem_raw(head, hash, key, key_size);
++      l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
+       return l;
+ }
+@@ -335,7 +360,7 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
+ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+ {
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+-      struct hlist_head *head;
++      struct hlist_nulls_head *head;
+       struct htab_elem *l, *next_l;
+       u32 hash, key_size;
+       int i = 0;
+@@ -352,13 +377,13 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+       head = select_bucket(htab, hash);
+       /* lookup the key */
+-      l = lookup_elem_raw(head, hash, key, key_size);
++      l = lookup_nulls_elem_raw(head, hash, key, key_size, htab->n_buckets);
+       if (!l)
+               goto find_first_elem;
+       /* key was found, get next key in the same bucket */
+-      next_l = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&l->hash_node)),
++      next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_next_rcu(&l->hash_node)),
+                                 struct htab_elem, hash_node);
+       if (next_l) {
+@@ -377,7 +402,7 @@ static int htab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+               head = select_bucket(htab, i);
+               /* pick first element in the bucket */
+-              next_l = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)),
++              next_l = hlist_nulls_entry_safe(rcu_dereference_raw(hlist_nulls_first_rcu(head)),
+                                         struct htab_elem, hash_node);
+               if (next_l) {
+                       /* if it's not empty, just return it */
+@@ -534,7 +559,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
+ {
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       struct htab_elem *l_new = NULL, *l_old;
+-      struct hlist_head *head;
++      struct hlist_nulls_head *head;
+       unsigned long flags;
+       struct bucket *b;
+       u32 key_size, hash;
+@@ -573,9 +598,9 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
+       /* add new element to the head of the list, so that
+        * concurrent search will find it before old elem
+        */
+-      hlist_add_head_rcu(&l_new->hash_node, head);
++      hlist_nulls_add_head_rcu(&l_new->hash_node, head);
+       if (l_old) {
+-              hlist_del_rcu(&l_old->hash_node);
++              hlist_nulls_del_rcu(&l_old->hash_node);
+               free_htab_elem(htab, l_old);
+       }
+       ret = 0;
+@@ -590,7 +615,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+ {
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       struct htab_elem *l_new = NULL, *l_old;
+-      struct hlist_head *head;
++      struct hlist_nulls_head *head;
+       unsigned long flags;
+       struct bucket *b;
+       u32 key_size, hash;
+@@ -642,7 +667,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+                       ret = PTR_ERR(l_new);
+                       goto err;
+               }
+-              hlist_add_head_rcu(&l_new->hash_node, head);
++              hlist_nulls_add_head_rcu(&l_new->hash_node, head);
+       }
+       ret = 0;
+ err:
+@@ -660,7 +685,7 @@ static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+ static int htab_map_delete_elem(struct bpf_map *map, void *key)
+ {
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+-      struct hlist_head *head;
++      struct hlist_nulls_head *head;
+       struct bucket *b;
+       struct htab_elem *l;
+       unsigned long flags;
+@@ -680,7 +705,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
+       l = lookup_elem_raw(head, hash, key, key_size);
+       if (l) {
+-              hlist_del_rcu(&l->hash_node);
++              hlist_nulls_del_rcu(&l->hash_node);
+               free_htab_elem(htab, l);
+               ret = 0;
+       }
+@@ -694,12 +719,12 @@ static void delete_all_elements(struct bpf_htab *htab)
+       int i;
+       for (i = 0; i < htab->n_buckets; i++) {
+-              struct hlist_head *head = select_bucket(htab, i);
+-              struct hlist_node *n;
++              struct hlist_nulls_head *head = select_bucket(htab, i);
++              struct hlist_nulls_node *n;
+               struct htab_elem *l;
+-              hlist_for_each_entry_safe(l, n, head, hash_node) {
+-                      hlist_del_rcu(&l->hash_node);
++              hlist_nulls_for_each_entry_safe(l, n, head, hash_node) {
++                      hlist_nulls_del_rcu(&l->hash_node);
+                       if (l->state != HTAB_EXTRA_ELEM_USED)
+                               htab_elem_free(htab, l);
+               }
+-- 
+2.20.1
+
diff --git a/queue-4.9/bpf-fix-struct-htab_elem-layout.patch b/queue-4.9/bpf-fix-struct-htab_elem-layout.patch
new file mode 100644 (file)
index 0000000..047a5af
--- /dev/null
@@ -0,0 +1,88 @@
+From 7a0847107ec1c0477b91c8e67b50f5b0f982c6d8 Mon Sep 17 00:00:00 2001
+From: Alexei Starovoitov <ast@fb.com>
+Date: Thu, 9 May 2019 19:33:53 -0700
+Subject: bpf: fix struct htab_elem layout
+
+commit 9f691549f76d488a0c74397b3e51e943865ea01f upstream.
+
+when htab_elem is removed from the bucket list the htab_elem.hash_node.next
+field should not be overridden too early otherwise we have a tiny race window
+between lookup and delete.
+The bug was discovered by manual code analysis and reproducible
+only with explicit udelay() in lookup_elem_raw().
+
+Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements")
+Reported-by: Jonathan Perry <jonperry@fb.com>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Chenbo Feng <fengc@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/hashtab.c | 28 ++++++++++++++++++++++------
+ 1 file changed, 22 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
+index a36a532c056df..f9d53ac57f640 100644
+--- a/kernel/bpf/hashtab.c
++++ b/kernel/bpf/hashtab.c
+@@ -41,8 +41,13 @@ enum extra_elem_state {
+ struct htab_elem {
+       union {
+               struct hlist_node hash_node;
+-              struct bpf_htab *htab;
+-              struct pcpu_freelist_node fnode;
++              struct {
++                      void *padding;
++                      union {
++                              struct bpf_htab *htab;
++                              struct pcpu_freelist_node fnode;
++                      };
++              };
+       };
+       union {
+               struct rcu_head rcu;
+@@ -114,8 +119,10 @@ static int prealloc_elems_and_freelist(struct bpf_htab *htab)
+       if (err)
+               goto free_elems;
+-      pcpu_freelist_populate(&htab->freelist, htab->elems, htab->elem_size,
+-                             htab->map.max_entries);
++      pcpu_freelist_populate(&htab->freelist,
++                             htab->elems + offsetof(struct htab_elem, fnode),
++                             htab->elem_size, htab->map.max_entries);
++
+       return 0;
+ free_elems:
+@@ -148,6 +155,11 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
+       int err, i;
+       u64 cost;
++      BUILD_BUG_ON(offsetof(struct htab_elem, htab) !=
++                   offsetof(struct htab_elem, hash_node.pprev));
++      BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
++                   offsetof(struct htab_elem, hash_node.pprev));
++
+       if (attr->map_flags & ~BPF_F_NO_PREALLOC)
+               /* reserved bits should not be used */
+               return ERR_PTR(-EINVAL);
+@@ -429,9 +441,13 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
+       int err = 0;
+       if (prealloc) {
+-              l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
+-              if (!l_new)
++              struct pcpu_freelist_node *l;
++
++              l = pcpu_freelist_pop(&htab->freelist);
++              if (!l)
+                       err = -E2BIG;
++              else
++                      l_new = container_of(l, struct htab_elem, fnode);
+       } else {
+               if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
+                       atomic_dec(&htab->count);
+-- 
+2.20.1
+
diff --git a/queue-4.9/netfilter-compat-initialize-all-fields-in-xt_init.patch b/queue-4.9/netfilter-compat-initialize-all-fields-in-xt_init.patch
new file mode 100644 (file)
index 0000000..9a29d13
--- /dev/null
@@ -0,0 +1,70 @@
+From 7b787dc8c9103bf222ebb85c342af971d408f68f Mon Sep 17 00:00:00 2001
+From: Francesco Ruggeri <fruggeri@arista.com>
+Date: Fri, 10 May 2019 09:19:30 -0700
+Subject: netfilter: compat: initialize all fields in xt_init
+
+commit 8d29d16d21342a0c86405d46de0c4ac5daf1760f upstream
+
+If a non zero value happens to be in xt[NFPROTO_BRIDGE].cur at init
+time, the following panic can be caused by running
+
+% ebtables -t broute -F BROUTING
+
+from a 32-bit user level on a 64-bit kernel. This patch replaces
+kmalloc_array with kcalloc when allocating xt.
+
+[  474.680846] BUG: unable to handle kernel paging request at 0000000009600920
+[  474.687869] PGD 2037006067 P4D 2037006067 PUD 2038938067 PMD 0
+[  474.693838] Oops: 0000 [#1] SMP
+[  474.697055] CPU: 9 PID: 4662 Comm: ebtables Kdump: loaded Not tainted 4.19.17-11302235.AroraKernelnext.fc18.x86_64 #1
+[  474.707721] Hardware name: Supermicro X9DRT/X9DRT, BIOS 3.0 06/28/2013
+[  474.714313] RIP: 0010:xt_compat_calc_jump+0x2f/0x63 [x_tables]
+[  474.720201] Code: 40 0f b6 ff 55 31 c0 48 6b ff 70 48 03 3d dc 45 00 00 48 89 e5 8b 4f 6c 4c 8b 47 60 ff c9 39 c8 7f 2f 8d 14 08 d1 fa 48 63 fa <41> 39 34 f8 4c 8d 0c fd 00 00 00 00 73 05 8d 42 01 eb e1 76 05 8d
+[  474.739023] RSP: 0018:ffffc9000943fc58 EFLAGS: 00010207
+[  474.744296] RAX: 0000000000000000 RBX: ffffc90006465000 RCX: 0000000002580249
+[  474.751485] RDX: 00000000012c0124 RSI: fffffffff7be17e9 RDI: 00000000012c0124
+[  474.758670] RBP: ffffc9000943fc58 R08: 0000000000000000 R09: ffffffff8117cf8f
+[  474.765855] R10: ffffc90006477000 R11: 0000000000000000 R12: 0000000000000001
+[  474.773048] R13: 0000000000000000 R14: ffffc9000943fcb8 R15: ffffc9000943fcb8
+[  474.780234] FS:  0000000000000000(0000) GS:ffff88a03f840000(0063) knlGS:00000000f7ac7700
+[  474.788612] CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
+[  474.794632] CR2: 0000000009600920 CR3: 0000002037422006 CR4: 00000000000606e0
+[  474.802052] Call Trace:
+[  474.804789]  compat_do_replace+0x1fb/0x2a3 [ebtables]
+[  474.810105]  compat_do_ebt_set_ctl+0x69/0xe6 [ebtables]
+[  474.815605]  ? try_module_get+0x37/0x42
+[  474.819716]  compat_nf_setsockopt+0x4f/0x6d
+[  474.824172]  compat_ip_setsockopt+0x7e/0x8c
+[  474.828641]  compat_raw_setsockopt+0x16/0x3a
+[  474.833220]  compat_sock_common_setsockopt+0x1d/0x24
+[  474.838458]  __compat_sys_setsockopt+0x17e/0x1b1
+[  474.843343]  ? __check_object_size+0x76/0x19a
+[  474.847960]  __ia32_compat_sys_socketcall+0x1cb/0x25b
+[  474.853276]  do_fast_syscall_32+0xaf/0xf6
+[  474.857548]  entry_SYSENTER_compat+0x6b/0x7a
+
+Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
+Acked-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Zubin Mithra <zsm@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/x_tables.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
+index 751fec729ffb0..e065140d0c93b 100644
+--- a/net/netfilter/x_tables.c
++++ b/net/netfilter/x_tables.c
+@@ -1728,7 +1728,7 @@ static int __init xt_init(void)
+               seqcount_init(&per_cpu(xt_recseq, i));
+       }
+-      xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
++      xt = kcalloc(NFPROTO_NUMPROTO, sizeof(struct xt_af), GFP_KERNEL);
+       if (!xt)
+               return -ENOMEM;
+-- 
+2.20.1
+
diff --git a/queue-4.9/series b/queue-4.9/series
new file mode 100644 (file)
index 0000000..8acecc1
--- /dev/null
@@ -0,0 +1,3 @@
+netfilter-compat-initialize-all-fields-in-xt_init.patch
+bpf-fix-struct-htab_elem-layout.patch
+bpf-convert-htab-map-to-hlist_nulls.patch