+++ /dev/null
-From 9515b63fddd8c96797b0513c8d6509a9cc767611 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Wed, 21 Sep 2022 15:38:26 +0800
-Subject: bpf: Always use raw spinlock for hash bucket lock
-
-From: Hou Tao <houtao1@huawei.com>
-
-[ Upstream commit 1d8b82c613297f24354b4d750413a7456b5cd92c ]
-
-For a non-preallocated hash map on RT kernel, regular spinlock instead
-of raw spinlock is used for bucket lock. The reason is that on RT kernel
-memory allocation is forbidden under atomic context and regular spinlock
-is sleepable under RT.
-
-Now hash map has been fully converted to use bpf_map_alloc, and there
-will be no synchronous memory allocation for non-preallocated hash map,
-so it is safe to always use raw spinlock for bucket lock on RT. So
-removing the usage of htab_use_raw_lock() and updating the comments
-accordingly.
-
-Signed-off-by: Hou Tao <houtao1@huawei.com>
-Link: https://lore.kernel.org/r/20220921073826.2365800-1-houtao@huaweicloud.com
-Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-Stable-dep-of: 9f907439dc80 ("bpf: hash map, avoid deadlock with suitable hash mask")
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- kernel/bpf/hashtab.c | 66 ++++++++++----------------------------------
- 1 file changed, 14 insertions(+), 52 deletions(-)
-
-diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
-index e7f45a966e6b..ea2051a913fb 100644
---- a/kernel/bpf/hashtab.c
-+++ b/kernel/bpf/hashtab.c
-@@ -66,24 +66,16 @@
- * In theory the BPF locks could be converted to regular spinlocks as well,
- * but the bucket locks and percpu_freelist locks can be taken from
- * arbitrary contexts (perf, kprobes, tracepoints) which are required to be
-- * atomic contexts even on RT. These mechanisms require preallocated maps,
-- * so there is no need to invoke memory allocations within the lock held
-- * sections.
-- *
-- * BPF maps which need dynamic allocation are only used from (forced)
-- * thread context on RT and can therefore use regular spinlocks which in
-- * turn allows to invoke memory allocations from the lock held section.
-- *
-- * On a non RT kernel this distinction is neither possible nor required.
-- * spinlock maps to raw_spinlock and the extra code is optimized out by the
-- * compiler.
-+ * atomic contexts even on RT. Before the introduction of bpf_mem_alloc,
-+ * it is only safe to use raw spinlock for preallocated hash map on a RT kernel,
-+ * because there is no memory allocation within the lock held sections. However
-+ * after hash map was fully converted to use bpf_mem_alloc, there will be
-+ * non-synchronous memory allocation for non-preallocated hash map, so it is
-+ * safe to always use raw spinlock for bucket lock.
- */
- struct bucket {
- struct hlist_nulls_head head;
-- union {
-- raw_spinlock_t raw_lock;
-- spinlock_t lock;
-- };
-+ raw_spinlock_t raw_lock;
- };
-
- #define HASHTAB_MAP_LOCK_COUNT 8
-@@ -132,26 +124,15 @@ static inline bool htab_is_prealloc(const struct bpf_htab *htab)
- return !(htab->map.map_flags & BPF_F_NO_PREALLOC);
- }
-
--static inline bool htab_use_raw_lock(const struct bpf_htab *htab)
--{
-- return (!IS_ENABLED(CONFIG_PREEMPT_RT) || htab_is_prealloc(htab));
--}
--
- static void htab_init_buckets(struct bpf_htab *htab)
- {
- unsigned i;
-
- for (i = 0; i < htab->n_buckets; i++) {
- INIT_HLIST_NULLS_HEAD(&htab->buckets[i].head, i);
-- if (htab_use_raw_lock(htab)) {
-- raw_spin_lock_init(&htab->buckets[i].raw_lock);
-- lockdep_set_class(&htab->buckets[i].raw_lock,
-+ raw_spin_lock_init(&htab->buckets[i].raw_lock);
-+ lockdep_set_class(&htab->buckets[i].raw_lock,
- &htab->lockdep_key);
-- } else {
-- spin_lock_init(&htab->buckets[i].lock);
-- lockdep_set_class(&htab->buckets[i].lock,
-- &htab->lockdep_key);
-- }
- cond_resched();
- }
- }
-@@ -161,28 +142,17 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab,
- unsigned long *pflags)
- {
- unsigned long flags;
-- bool use_raw_lock;
-
- hash = hash & HASHTAB_MAP_LOCK_MASK;
-
-- use_raw_lock = htab_use_raw_lock(htab);
-- if (use_raw_lock)
-- preempt_disable();
-- else
-- migrate_disable();
-+ preempt_disable();
- if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
- __this_cpu_dec(*(htab->map_locked[hash]));
-- if (use_raw_lock)
-- preempt_enable();
-- else
-- migrate_enable();
-+ preempt_enable();
- return -EBUSY;
- }
-
-- if (use_raw_lock)
-- raw_spin_lock_irqsave(&b->raw_lock, flags);
-- else
-- spin_lock_irqsave(&b->lock, flags);
-+ raw_spin_lock_irqsave(&b->raw_lock, flags);
- *pflags = flags;
-
- return 0;
-@@ -192,18 +162,10 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab,
- struct bucket *b, u32 hash,
- unsigned long flags)
- {
-- bool use_raw_lock = htab_use_raw_lock(htab);
--
- hash = hash & HASHTAB_MAP_LOCK_MASK;
-- if (use_raw_lock)
-- raw_spin_unlock_irqrestore(&b->raw_lock, flags);
-- else
-- spin_unlock_irqrestore(&b->lock, flags);
-+ raw_spin_unlock_irqrestore(&b->raw_lock, flags);
- __this_cpu_dec(*(htab->map_locked[hash]));
-- if (use_raw_lock)
-- preempt_enable();
-- else
-- migrate_enable();
-+ preempt_enable();
- }
-
- static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node);
---
-2.39.0
-
+++ /dev/null
-From db3e423d05b76c2d43e763e79e5273c2988212a7 Mon Sep 17 00:00:00 2001
-From: Sasha Levin <sashal@kernel.org>
-Date: Wed, 11 Jan 2023 17:29:01 +0800
-Subject: bpf: hash map, avoid deadlock with suitable hash mask
-
-From: Tonghao Zhang <tong@infragraf.org>
-
-[ Upstream commit 9f907439dc80e4a2fcfb949927b36c036468dbb3 ]
-
-The deadlock still may occur while accessed in NMI and non-NMI
-context. Because in NMI, we still may access the same bucket but with
-different map_locked index.
-
-For example, on the same CPU, .max_entries = 2, we update the hash map,
-with key = 4, while running bpf prog in NMI nmi_handle(), to update
-hash map with key = 20, so it will have the same bucket index but have
-different map_locked index.
-
-To fix this issue, using min mask to hash again.
-
-Fixes: 20b6cc34ea74 ("bpf: Avoid hashtab deadlock with map_locked")
-Signed-off-by: Tonghao Zhang <tong@infragraf.org>
-Cc: Alexei Starovoitov <ast@kernel.org>
-Cc: Daniel Borkmann <daniel@iogearbox.net>
-Cc: Andrii Nakryiko <andrii@kernel.org>
-Cc: Martin KaFai Lau <martin.lau@linux.dev>
-Cc: Song Liu <song@kernel.org>
-Cc: Yonghong Song <yhs@fb.com>
-Cc: John Fastabend <john.fastabend@gmail.com>
-Cc: KP Singh <kpsingh@kernel.org>
-Cc: Stanislav Fomichev <sdf@google.com>
-Cc: Hao Luo <haoluo@google.com>
-Cc: Jiri Olsa <jolsa@kernel.org>
-Cc: Hou Tao <houtao1@huawei.com>
-Acked-by: Yonghong Song <yhs@fb.com>
-Acked-by: Hou Tao <houtao1@huawei.com>
-Link: https://lore.kernel.org/r/20230111092903.92389-1-tong@infragraf.org
-Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
-Signed-off-by: Sasha Levin <sashal@kernel.org>
----
- kernel/bpf/hashtab.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
-index ea2051a913fb..06461ce9363e 100644
---- a/kernel/bpf/hashtab.c
-+++ b/kernel/bpf/hashtab.c
-@@ -143,7 +143,7 @@ static inline int htab_lock_bucket(const struct bpf_htab *htab,
- {
- unsigned long flags;
-
-- hash = hash & HASHTAB_MAP_LOCK_MASK;
-+ hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1);
-
- preempt_disable();
- if (unlikely(__this_cpu_inc_return(*(htab->map_locked[hash])) != 1)) {
-@@ -162,7 +162,7 @@ static inline void htab_unlock_bucket(const struct bpf_htab *htab,
- struct bucket *b, u32 hash,
- unsigned long flags)
- {
-- hash = hash & HASHTAB_MAP_LOCK_MASK;
-+ hash = hash & min_t(u32, HASHTAB_MAP_LOCK_MASK, htab->n_buckets - 1);
- raw_spin_unlock_irqrestore(&b->raw_lock, flags);
- __this_cpu_dec(*(htab->map_locked[hash]));
- preempt_enable();
---
-2.39.0
-