Support resizable hashmap in BPF map benchmarks.
1. LOOKUP (single producer, M events/sec)
key | max | nr | htab | rhtab | ratio | delta
----+-----+-------+---------+---------+-------+-------
8 | 1K | 750 | 99.85 | 81.92 | 0.82x | -18 %
8 | 1K | 1K | 100.71 | 80.19 | 0.80x | -20 %
8 | 1M | 750K | 23.37 | 72.09 | 3.08x | +208 %
8 | 1M | 1M | 13.39 | 53.72 | 4.01x | +301 %
32 | 1K | 750 | 51.57 | 42.78 | 0.83x | -17 %
32 | 1K | 1K | 50.81 | 45.83 | 0.90x | -10 %
32 | 1M | 750K | 11.27 | 15.29 | 1.36x | +36 %
32 | 1M | 1M | 7.32 | 8.75 | 1.19x | +19 %
256 | 1K | 750 | 7.58 | 7.88 | 1.04x | +4 %
256 | 1K | 1K | 7.43 | 7.81 | 1.05x | +5 %
256 | 1M | 750K | 3.69 | 4.27 | 1.16x | +16 %
256 | 1M | 1M | 2.60 | 3.12 | 1.20x | +20 %
Pattern:
* Small map (1K): htab wins for 8 / 32 byte keys by 10-20%
* Large map (1M): rhtab wins everywhere, up to 4x at high load
factor with 8 byte keys.
* Higher load factor amplifies rhtab's lead: rhtab grows the
bucket array; htab stays at user-declared max.
2. FULL UPDATE (M events/sec per producer)
htab per-producer:
20.33 22.02 19.27 23.61 24.18 23.17 21.07
mean 21.94 range 19.27 - 24.18
rhtab per-producer:
133.51 129.47 74.52 129.29 102.26 129.98 107.64
mean 115.24 range 74.52 - 133.51
speedup (mean): 5.25x (+425 %)
In-place memcpy avoids the per-update alloc + RCU pointer swap
that htab pays.
3. MEMORY
value_size | htab ops/s | rhtab ops/s | htab mem | rhtab mem
-----------+-------------+-------------+----------+----------
32 B | 122.87 k/s | 133.04 k/s | 2.47 MiB | 2.49 MiB
4096 B | 64.43 k/s | 65.38 k/s | 6.74 MiB | 6.44 MiB
rhtab/htab : +8 % ops, +0.8 % mem (32 B)
+1 % ops, -4 % mem (4096 B)
Throughput effectively tied
SUMMARY
* Small / well-fitting map: htab is faster (cache-friendly
fixed bucket array), but only by ~10-20 %.
* Large / high-load-factor map: rhtab is dramatically faster
(1.2x to 4x) because rhashtable resizes to keep the load
factor sane while htab stays stuck at user-declared max.
* Update-heavy workloads: rhtab is ~5x faster per producer
via in-place memcpy.
* Memory benchmark: effectively on par.
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
Link: https://lore.kernel.org/r/20260605-rhash-v7-12-5b8e05f8630d@meta.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
extern const struct bench bench_strncmp_no_helper;
extern const struct bench bench_strncmp_helper;
extern const struct bench bench_bpf_hashmap_full_update;
+extern const struct bench bench_bpf_rhashmap_full_update;
extern const struct bench bench_local_storage_cache_seq_get;
extern const struct bench bench_local_storage_cache_interleaved_get;
extern const struct bench bench_local_storage_cache_hashmap_control;
extern const struct bench bench_local_storage_tasks_trace;
extern const struct bench bench_bpf_hashmap_lookup;
+extern const struct bench bench_bpf_rhashmap_lookup;
extern const struct bench bench_local_storage_create;
extern const struct bench bench_htab_mem;
+extern const struct bench bench_rhtab_mem;
extern const struct bench bench_crypto_encrypt;
extern const struct bench bench_crypto_decrypt;
extern const struct bench bench_sockmap;
&bench_strncmp_no_helper,
&bench_strncmp_helper,
&bench_bpf_hashmap_full_update,
+ &bench_bpf_rhashmap_full_update,
&bench_local_storage_cache_seq_get,
&bench_local_storage_cache_interleaved_get,
&bench_local_storage_cache_hashmap_control,
&bench_local_storage_tasks_trace,
&bench_bpf_hashmap_lookup,
+ &bench_bpf_rhashmap_lookup,
&bench_local_storage_create,
&bench_htab_mem,
+ &bench_rhtab_mem,
&bench_crypto_encrypt,
&bench_crypto_decrypt,
&bench_sockmap,
{
}
-static void setup(void)
+static void hashmap_full_update_setup(enum bpf_map_type map_type)
{
struct bpf_link *link;
int map_fd, i, max_entries;
setup_libbpf();
- ctx.skel = bpf_hashmap_full_update_bench__open_and_load();
+ ctx.skel = bpf_hashmap_full_update_bench__open();
if (!ctx.skel) {
fprintf(stderr, "failed to open skeleton\n");
exit(1);
}
+ bpf_map__set_type(ctx.skel->maps.hash_map_bench, map_type);
+ if (map_type == BPF_MAP_TYPE_RHASH)
+ bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench,
+ BPF_F_NO_PREALLOC);
+
+ if (bpf_hashmap_full_update_bench__load(ctx.skel)) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
ctx.skel->bss->nr_loops = MAX_LOOP_NUM;
link = bpf_program__attach(ctx.skel->progs.benchmark);
bpf_map_update_elem(map_fd, &i, &i, BPF_ANY);
}
+static void setup(void)
+{
+ hashmap_full_update_setup(BPF_MAP_TYPE_HASH);
+}
+
+static void rhash_setup(void)
+{
+ hashmap_full_update_setup(BPF_MAP_TYPE_RHASH);
+}
+
static void hashmap_report_final(struct bench_res res[], int res_cnt)
{
unsigned int nr_cpus = bpf_num_possible_cpus();
.report_progress = NULL,
.report_final = hashmap_report_final,
};
+
+const struct bench bench_bpf_rhashmap_full_update = {
+ .name = "bpf-rhashmap-full-update",
+ .validate = validate,
+ .setup = rhash_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = NULL,
+ .report_final = hashmap_report_final,
+};
/* the rest of key is random */
}
-static void setup(void)
+static void hashmap_lookup_setup(enum bpf_map_type map_type)
{
struct bpf_link *link;
+ __u32 map_flags;
int map_fd;
int ret;
int i;
exit(1);
}
+ map_flags = args.map_flags;
+ if (map_type == BPF_MAP_TYPE_RHASH)
+ map_flags |= BPF_F_NO_PREALLOC;
+
+ bpf_map__set_type(ctx.skel->maps.hash_map_bench, map_type);
bpf_map__set_max_entries(ctx.skel->maps.hash_map_bench, args.max_entries);
bpf_map__set_key_size(ctx.skel->maps.hash_map_bench, args.key_size);
bpf_map__set_value_size(ctx.skel->maps.hash_map_bench, 8);
- bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, args.map_flags);
+ bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, map_flags);
ctx.skel->bss->nr_entries = args.nr_entries;
ctx.skel->bss->nr_loops = args.nr_loops / args.nr_entries;
}
}
+static void setup(void)
+{
+ hashmap_lookup_setup(BPF_MAP_TYPE_HASH);
+}
+
+static void rhash_setup(void)
+{
+ hashmap_lookup_setup(BPF_MAP_TYPE_RHASH);
+}
+
static inline double events_from_time(u64 time)
{
if (time)
.report_progress = NULL,
.report_final = hashmap_report_final,
};
+
+const struct bench bench_bpf_rhashmap_lookup = {
+ .name = "bpf-rhashmap-lookup",
+ .argp = &bench_hashmap_lookup_argp,
+ .validate = validate,
+ .setup = rhash_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = NULL,
+ .report_final = hashmap_report_final,
+};
exit(1);
}
-static void htab_mem_setup(void)
+static void htab_mem_setup_impl(enum bpf_map_type map_type)
{
struct bpf_map *map;
const char **names;
}
map = ctx.skel->maps.htab;
+ bpf_map__set_type(map, map_type);
bpf_map__set_value_size(map, args.value_size);
/* Ensure that different CPUs can operate on different subset */
bpf_map__set_max_entries(map, MAX(8192, 64 * env.nr_cpus));
- if (args.preallocated)
+ if (map_type != BPF_MAP_TYPE_RHASH && args.preallocated)
bpf_map__set_map_flags(map, bpf_map__map_flags(map) & ~BPF_F_NO_PREALLOC);
names = ctx.uc->progs;
exit(1);
}
+static void htab_mem_setup(void)
+{
+ htab_mem_setup_impl(BPF_MAP_TYPE_HASH);
+}
+
+static void rhtab_mem_setup(void)
+{
+ htab_mem_setup_impl(BPF_MAP_TYPE_RHASH);
+}
+
static void htab_mem_add_fn(pthread_barrier_t *notify)
{
while (true) {
cleanup_cgroup_environment();
}
+static void rhtab_mem_validate(void)
+{
+ if (args.preallocated) {
+ fprintf(stderr, "rhash map does not support preallocation\n");
+ exit(1);
+ }
+ htab_mem_validate();
+}
+
const struct bench bench_htab_mem = {
.name = "htab-mem",
.argp = &bench_htab_mem_argp,
.report_progress = htab_mem_report_progress,
.report_final = htab_mem_report_final,
};
+
+const struct bench bench_rhtab_mem = {
+ .name = "rhtab-mem",
+ .argp = &bench_htab_mem_argp,
+ .validate = rhtab_mem_validate,
+ .setup = rhtab_mem_setup,
+ .producer_thread = htab_mem_producer,
+ .measure = htab_mem_measure,
+ .report_progress = htab_mem_report_progress,
+ .report_final = htab_mem_report_final,
+};