]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
netfilter: nfnetlink_queue: make hash table per queue
authorFlorian Westphal <fw@strlen.de>
Tue, 7 Apr 2026 15:00:01 +0000 (17:00 +0200)
committerFlorian Westphal <fw@strlen.de>
Wed, 8 Apr 2026 11:34:51 +0000 (13:34 +0200)
Sharing a global hash table among all queues is tempting, but
it can cause crash:

BUG: KASAN: slab-use-after-free in nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue]
[..]
 nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue]
 nfnetlink_rcv_msg+0x46a/0x930
 kmem_cache_alloc_node_noprof+0x11e/0x450

struct nf_queue_entry is freed via kfree, but parallel cpu can still
encounter such an nf_queue_entry when walking the list.

Alternative fix is to free the nf_queue_entry via kfree_rcu() instead,
but as we have to alloc/free for each skb this will cause more mem
pressure.

Cc: Scott Mitchell <scott.k.mitch1@gmail.com>
Fixes: e19079adcd26 ("netfilter: nfnetlink_queue: optimize verdict lookup with hash table")
Signed-off-by: Florian Westphal <fw@strlen.de>
include/net/netfilter/nf_queue.h
net/netfilter/nfnetlink_queue.c

index 45eb26b2e95b37ec855b964f06888d6233b95ef3..d17035d14d96cff132f0fa96ec5858760029d57c 100644 (file)
@@ -23,7 +23,6 @@ struct nf_queue_entry {
        struct nf_hook_state    state;
        bool                    nf_ct_is_unconfirmed;
        u16                     size; /* sizeof(entry) + saved route keys */
-       u16                     queue_num;
 
        /* extra space to store route keys */
 };
index 47f7f62906e218c93d2843e4450ce30cd92fac21..8e02f84784dabce2e9f56cfb49b961cb7204ecc2 100644 (file)
@@ -49,8 +49,8 @@
 #endif
 
 #define NFQNL_QMAX_DEFAULT 1024
-#define NFQNL_HASH_MIN     1024
-#define NFQNL_HASH_MAX     1048576
+#define NFQNL_HASH_MIN     8
+#define NFQNL_HASH_MAX     32768
 
 /* We're using struct nlattr which has 16bit nla_len. Note that nla_len
  * includes the header length. Thus, the maximum packet length that we
  */
 #define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
 
-/* Composite key for packet lookup: (net, queue_num, packet_id) */
-struct nfqnl_packet_key {
-       possible_net_t net;
-       u32 packet_id;
-       u16 queue_num;
-} __aligned(sizeof(u32));  /* jhash2 requires 32-bit alignment */
-
-/* Global rhashtable - one for entire system, all netns */
-static struct rhashtable nfqnl_packet_map __read_mostly;
-
-/* Helper to initialize composite key */
-static inline void nfqnl_init_key(struct nfqnl_packet_key *key,
-                                 struct net *net, u32 packet_id, u16 queue_num)
-{
-       memset(key, 0, sizeof(*key));
-       write_pnet(&key->net, net);
-       key->packet_id = packet_id;
-       key->queue_num = queue_num;
-}
-
 struct nfqnl_instance {
        struct hlist_node hlist;                /* global list of queues */
-       struct rcu_head rcu;
+       struct rhashtable nfqnl_packet_map;
+       struct rcu_work rwork;
 
        u32 peer_portid;
        unsigned int queue_maxlen;
@@ -106,6 +87,7 @@ struct nfqnl_instance {
 
 typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
 
+static struct workqueue_struct *nfq_cleanup_wq __read_mostly;
 static unsigned int nfnl_queue_net_id __read_mostly;
 
 #define INSTANCE_BUCKETS       16
@@ -124,34 +106,10 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
        return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
 }
 
-/* Extract composite key from nf_queue_entry for hashing */
-static u32 nfqnl_packet_obj_hashfn(const void *data, u32 len, u32 seed)
-{
-       const struct nf_queue_entry *entry = data;
-       struct nfqnl_packet_key key;
-
-       nfqnl_init_key(&key, entry->state.net, entry->id, entry->queue_num);
-
-       return jhash2((u32 *)&key, sizeof(key) / sizeof(u32), seed);
-}
-
-/* Compare stack-allocated key against entry */
-static int nfqnl_packet_obj_cmpfn(struct rhashtable_compare_arg *arg,
-                                 const void *obj)
-{
-       const struct nfqnl_packet_key *key = arg->key;
-       const struct nf_queue_entry *entry = obj;
-
-       return !net_eq(entry->state.net, read_pnet(&key->net)) ||
-              entry->queue_num != key->queue_num ||
-              entry->id != key->packet_id;
-}
-
 static const struct rhashtable_params nfqnl_rhashtable_params = {
        .head_offset = offsetof(struct nf_queue_entry, hash_node),
-       .key_len = sizeof(struct nfqnl_packet_key),
-       .obj_hashfn = nfqnl_packet_obj_hashfn,
-       .obj_cmpfn = nfqnl_packet_obj_cmpfn,
+       .key_offset = offsetof(struct nf_queue_entry, id),
+       .key_len = sizeof(u32),
        .automatic_shrinking = true,
        .min_size = NFQNL_HASH_MIN,
        .max_size = NFQNL_HASH_MAX,
@@ -190,6 +148,10 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
        spin_lock_init(&inst->lock);
        INIT_LIST_HEAD(&inst->queue_list);
 
+       err = rhashtable_init(&inst->nfqnl_packet_map, &nfqnl_rhashtable_params);
+       if (err < 0)
+               goto out_free;
+
        spin_lock(&q->instances_lock);
        if (instance_lookup(q, queue_num)) {
                err = -EEXIST;
@@ -210,6 +172,8 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
 
 out_unlock:
        spin_unlock(&q->instances_lock);
+       rhashtable_destroy(&inst->nfqnl_packet_map);
+out_free:
        kfree(inst);
        return ERR_PTR(err);
 }
@@ -217,15 +181,18 @@ out_unlock:
 static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
                        unsigned long data);
 
-static void
-instance_destroy_rcu(struct rcu_head *head)
+static void instance_destroy_work(struct work_struct *work)
 {
-       struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
-                                                  rcu);
+       struct nfqnl_instance *inst;
 
+       inst = container_of(to_rcu_work(work), struct nfqnl_instance,
+                           rwork);
        rcu_read_lock();
        nfqnl_flush(inst, NULL, 0);
        rcu_read_unlock();
+
+       rhashtable_destroy(&inst->nfqnl_packet_map);
+
        kfree(inst);
        module_put(THIS_MODULE);
 }
@@ -234,7 +201,9 @@ static void
 __instance_destroy(struct nfqnl_instance *inst)
 {
        hlist_del_rcu(&inst->hlist);
-       call_rcu(&inst->rcu, instance_destroy_rcu);
+
+       INIT_RCU_WORK(&inst->rwork, instance_destroy_work);
+       queue_rcu_work(nfq_cleanup_wq, &inst->rwork);
 }
 
 static void
@@ -250,9 +219,7 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
 {
        int err;
 
-       entry->queue_num = queue->queue_num;
-
-       err = rhashtable_insert_fast(&nfqnl_packet_map, &entry->hash_node,
+       err = rhashtable_insert_fast(&queue->nfqnl_packet_map, &entry->hash_node,
                                     nfqnl_rhashtable_params);
        if (unlikely(err))
                return err;
@@ -266,23 +233,19 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
 static void
 __dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
 {
-       rhashtable_remove_fast(&nfqnl_packet_map, &entry->hash_node,
+       rhashtable_remove_fast(&queue->nfqnl_packet_map, &entry->hash_node,
                               nfqnl_rhashtable_params);
        list_del(&entry->list);
        queue->queue_total--;
 }
 
 static struct nf_queue_entry *
-find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id,
-                  struct net *net)
+find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
 {
-       struct nfqnl_packet_key key;
        struct nf_queue_entry *entry;
 
-       nfqnl_init_key(&key, net, id, queue->queue_num);
-
        spin_lock_bh(&queue->lock);
-       entry = rhashtable_lookup_fast(&nfqnl_packet_map, &key,
+       entry = rhashtable_lookup_fast(&queue->nfqnl_packet_map, &id,
                                       nfqnl_rhashtable_params);
 
        if (entry)
@@ -1531,7 +1494,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
 
        verdict = ntohl(vhdr->verdict);
 
-       entry = find_dequeue_entry(queue, ntohl(vhdr->id), info->net);
+       entry = find_dequeue_entry(queue, ntohl(vhdr->id));
        if (entry == NULL)
                return -ENOENT;
 
@@ -1880,40 +1843,38 @@ static int __init nfnetlink_queue_init(void)
 {
        int status;
 
-       status = rhashtable_init(&nfqnl_packet_map, &nfqnl_rhashtable_params);
-       if (status < 0)
-               return status;
+       nfq_cleanup_wq = alloc_ordered_workqueue("nfq_workqueue", 0);
+       if (!nfq_cleanup_wq)
+               return -ENOMEM;
 
        status = register_pernet_subsys(&nfnl_queue_net_ops);
-       if (status < 0) {
-               pr_err("failed to register pernet ops\n");
-               goto cleanup_rhashtable;
-       }
+       if (status < 0)
+               goto cleanup_pernet_subsys;
 
-       netlink_register_notifier(&nfqnl_rtnl_notifier);
-       status = nfnetlink_subsys_register(&nfqnl_subsys);
-       if (status < 0) {
-               pr_err("failed to create netlink socket\n");
-               goto cleanup_netlink_notifier;
-       }
+       status = netlink_register_notifier(&nfqnl_rtnl_notifier);
+       if (status < 0)
+              goto cleanup_rtnl_notifier;
 
        status = register_netdevice_notifier(&nfqnl_dev_notifier);
-       if (status < 0) {
-               pr_err("failed to register netdevice notifier\n");
-               goto cleanup_netlink_subsys;
-       }
+       if (status < 0)
+               goto cleanup_dev_notifier;
+
+       status = nfnetlink_subsys_register(&nfqnl_subsys);
+       if (status < 0)
+               goto cleanup_nfqnl_subsys;
 
        nf_register_queue_handler(&nfqh);
 
        return status;
 
-cleanup_netlink_subsys:
-       nfnetlink_subsys_unregister(&nfqnl_subsys);
-cleanup_netlink_notifier:
+cleanup_nfqnl_subsys:
+       unregister_netdevice_notifier(&nfqnl_dev_notifier);
+cleanup_dev_notifier:
        netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+cleanup_rtnl_notifier:
        unregister_pernet_subsys(&nfnl_queue_net_ops);
-cleanup_rhashtable:
-       rhashtable_destroy(&nfqnl_packet_map);
+cleanup_pernet_subsys:
+       destroy_workqueue(nfq_cleanup_wq);
        return status;
 }
 
@@ -1924,9 +1885,7 @@ static void __exit nfnetlink_queue_fini(void)
        nfnetlink_subsys_unregister(&nfqnl_subsys);
        netlink_unregister_notifier(&nfqnl_rtnl_notifier);
        unregister_pernet_subsys(&nfnl_queue_net_ops);
-
-       rhashtable_destroy(&nfqnl_packet_map);
-
+       destroy_workqueue(nfq_cleanup_wq);
        rcu_barrier(); /* Wait for completion of call_rcu()'s */
 }