kfree(ctx);
}
-static int htab_set_dtor(struct bpf_htab *htab, void (*dtor)(void *, void *))
+static int bpf_ma_set_dtor(struct bpf_map *map, struct bpf_mem_alloc *ma,
+ void (*dtor)(void *, void *))
{
- u32 key_size = htab->map.key_size;
- struct bpf_mem_alloc *ma;
struct htab_btf_record *hrec;
int err;
/* No need for dtors. */
- if (IS_ERR_OR_NULL(htab->map.record))
+ if (IS_ERR_OR_NULL(map->record))
return 0;
hrec = kzalloc(sizeof(*hrec), GFP_KERNEL);
if (!hrec)
return -ENOMEM;
- hrec->key_size = key_size;
- hrec->record = btf_record_dup(htab->map.record);
+ hrec->key_size = map->key_size;
+ hrec->record = btf_record_dup(map->record);
if (IS_ERR(hrec->record)) {
err = PTR_ERR(hrec->record);
kfree(hrec);
return err;
}
- ma = htab_is_percpu(htab) ? &htab->pcpu_ma : &htab->ma;
bpf_mem_alloc_set_dtor(ma, dtor, htab_dtor_ctx_free, hrec);
return 0;
}
* populated in htab_map_alloc(), so it will always appear as NULL.
*/
if (htab_is_percpu(htab))
- return htab_set_dtor(htab, htab_pcpu_mem_dtor);
+ return bpf_ma_set_dtor(map, &htab->pcpu_ma, htab_pcpu_mem_dtor);
else
- return htab_set_dtor(htab, htab_mem_dtor);
+ return bpf_ma_set_dtor(map, &htab->ma, htab_mem_dtor);
}
static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
struct rhashtable ht;
struct bpf_mem_alloc ma;
u32 elem_size;
+ bool freeing_internal;
};
static const struct rhashtable_params rhtab_params = {
return htab_map_alloc_check(attr);
}
+static void rhtab_check_and_free_fields(struct bpf_rhtab *rhtab,
+ struct rhtab_elem *elem)
+{
+ if (IS_ERR_OR_NULL(rhtab->map.record))
+ return;
+
+ bpf_obj_free_fields(rhtab->map.record,
+ rhtab_elem_value(elem, rhtab->map.key_size));
+}
+
+static void rhtab_mem_dtor(void *obj, void *ctx)
+{
+ struct htab_btf_record *hrec = ctx;
+ struct rhtab_elem *elem = obj;
+
+ if (IS_ERR_OR_NULL(hrec->record))
+ return;
+
+ bpf_obj_free_fields(hrec->record,
+ rhtab_elem_value(elem, hrec->key_size));
+}
+
static void rhtab_free_elem(void *ptr, void *arg)
{
struct bpf_rhtab *rhtab = arg;
struct rhtab_elem *elem = ptr;
+ bpf_map_free_internal_structs(&rhtab->map, rhtab_elem_value(elem, rhtab->map.key_size));
bpf_mem_cache_free_rcu(&rhtab->ma, elem);
}
rhtab_read_elem_value(&rhtab->map, copy, elem, flags);
check_and_init_map_value(&rhtab->map, copy);
}
-
+ /* Release internal structs: kptr, bpf_timer, task_work, wq */
+ rhtab_check_and_free_fields(rhtab, elem);
bpf_mem_cache_free_rcu(&rhtab->ma, elem);
return 0;
}
static long rhtab_map_update_existing(struct bpf_map *map, struct rhtab_elem *elem, void *value,
u64 map_flags)
{
+ struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
void *old_val = rhtab_elem_value(elem, map->key_size);
if (map_flags & BPF_NOEXIST)
copy_map_value_locked(map, old_val, value, false);
else
copy_map_value(map, old_val, value);
+
+ /*
+ * Torn reads: a concurrent reader without BPF_F_LOCK may observe
+ * the value mid-copy. Callers requiring consistent reads must use
+ * BPF_F_LOCK, matching arraymap semantics.
+ *
+ * copy_map_value() skips special-field offsets, so old timers/
+ * kptrs/etc. still sit in the slot. Cancel them after the copy
+ * to match arraymap's update semantics.
+ */
+ rhtab_check_and_free_fields(rhtab, elem);
return 0;
}
if (map_flags & BPF_EXIST)
return -ENOENT;
+ /*
+ * Reject new insertions while map_release_uref cleanup walks the
+ * table. Without this, new elements could keep triggering rehash
+ * and prevent the walk from terminating.
+ */
+ if (READ_ONCE(rhtab->freeing_internal))
+ return -EBUSY;
+
/* Check max_entries limit before inserting new element */
if (atomic_read(&rhtab->ht.nelems) >= map->max_entries)
return -E2BIG;
memcpy(elem->data, key, map->key_size);
copy_map_value(map, rhtab_elem_value(elem, map->key_size), value);
+ check_and_init_map_value(map, rhtab_elem_value(elem, map->key_size));
/* Prevent deadlock for NMI programs attempting to take bucket lock */
bpf_disable_instrumentation();
return insn - insn_buf;
}
+static int rhtab_map_check_btf(struct bpf_map *map, const struct btf *btf,
+ const struct btf_type *key_type,
+ const struct btf_type *value_type)
+{
+ struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
+
+ return bpf_ma_set_dtor(map, &rhtab->ma, rhtab_mem_dtor);
+}
+
static void rhtab_map_free_internal_structs(struct bpf_map *map)
{
+ struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
+ struct rhashtable_iter iter;
+ struct rhtab_elem *elem;
+
+ if (!bpf_map_has_internal_structs(map))
+ return;
+
+ /*
+ * Block new insertions. Once observed, no new growth is triggered,
+ * so any in-flight rehash will drain and the walker is guaranteed
+ * to stop returning -EAGAIN. Treat -EAGAIN as "rehash in progress,
+ * retry"; do not wait for the worker.
+ */
+ WRITE_ONCE(rhtab->freeing_internal, true);
+
+ rhashtable_walk_enter(&rhtab->ht, &iter);
+ rhashtable_walk_start(&iter);
+
+ while ((elem = rhashtable_walk_next(&iter))) {
+ if (IS_ERR(elem)) {
+ if (PTR_ERR(elem) == -EAGAIN)
+ continue;
+ break;
+ }
+
+ bpf_map_free_internal_structs(map, rhtab_elem_value(elem, map->key_size));
+
+ if (need_resched()) { /* Avoid stalls on large maps */
+ rhashtable_walk_stop(&iter);
+ cond_resched();
+ rhashtable_walk_start(&iter);
+ }
+ }
+
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
+ WRITE_ONCE(rhtab->freeing_internal, false);
}
static int rhtab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
.map_free = rhtab_map_free,
.map_get_next_key = rhtab_map_get_next_key,
.map_release_uref = rhtab_map_free_internal_structs,
+ .map_check_btf = rhtab_map_check_btf,
.map_lookup_elem = rhtab_map_lookup_elem,
.map_lookup_and_delete_elem = rhtab_map_lookup_and_delete_elem,
.map_update_elem = rhtab_map_update_elem,