]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
rhashtable: Bounce deferred worker kick through irq_work
authorTejun Heo <tj@kernel.org>
Tue, 21 Apr 2026 06:03:26 +0000 (20:03 -1000)
committerTejun Heo <tj@kernel.org>
Tue, 21 Apr 2026 06:10:50 +0000 (20:10 -1000)
Inserts past 75% load call schedule_work(&ht->run_work) to kick an
async resize. If a caller holds a raw spinlock (e.g. an
insecure_elasticity user), schedule_work() under that lock records

  caller_lock -> pool->lock -> pi_lock -> rq->__lock

A cycle forms if any of these locks is acquired in the reverse
direction elsewhere. sched_ext, the only current insecure_elasticity
user, hits this: it holds scx_sched_lock across rhashtable inserts of
sub-schedulers, while scx_bypass() takes rq->__lock -> scx_sched_lock.
Exercising the resize path produces:

  Chain exists of:
    &pool->lock --> &rq->__lock --> scx_sched_lock

Bounce the kick from the insert paths through irq_work so
schedule_work() runs from hard IRQ context with the caller's lock no
longer held. rht_deferred_worker()'s self-rearm on error stays on
schedule_work(&ht->run_work) - the worker runs in process context with
no caller lock held, and keeping the self-requeue on @run_work lets
cancel_work_sync() in rhashtable_free_and_destroy() drain it.

v3: Keep rht_deferred_worker()'s self-rearm on schedule_work(&run_work).
    Routing it through irq_work in v2 broke cancel_work_sync()'s
    self-requeue handling - an irq_work queued after irq_work_sync()
    returned but while cancel_work_sync() was still waiting could fire
    post-teardown.

v2: Bounce unconditionally instead of gating on insecure_elasticity,
    as suggested by Herbert.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
include/linux/rhashtable-types.h
include/linux/rhashtable.h
lib/rhashtable.c

index 72082428d6c6e64c203cc2277890f82a9d982518..fc2f596a6df1bf6a46cb4bc56041cf28629aa75b 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/alloc_tag.h>
 #include <linux/atomic.h>
 #include <linux/compiler.h>
+#include <linux/irq_work_types.h>
 #include <linux/mutex.h>
 #include <linux/workqueue_types.h>
 
@@ -77,6 +78,7 @@ struct rhashtable_params {
  * @p: Configuration parameters
  * @rhlist: True if this is an rhltable
  * @run_work: Deferred worker to expand/shrink asynchronously
+ * @run_irq_work: Bounces the @run_work kick through hard IRQ context.
  * @mutex: Mutex to protect current/future table swapping
  * @lock: Spin lock to protect walker list
  * @nelems: Number of elements in table
@@ -88,6 +90,7 @@ struct rhashtable {
        struct rhashtable_params        p;
        bool                            rhlist;
        struct work_struct              run_work;
+       struct irq_work                 run_irq_work;
        struct mutex                    mutex;
        spinlock_t                      lock;
        atomic_t                        nelems;
index 7def3f0f556b53a9a7fccebc8f0573f8df0b99c5..ef5230cece364b21db7cc0a296a6bcabc585d10e 100644 (file)
@@ -20,6 +20,7 @@
 
 #include <linux/err.h>
 #include <linux/errno.h>
+#include <linux/irq_work.h>
 #include <linux/jhash.h>
 #include <linux/list_nulls.h>
 #include <linux/workqueue.h>
@@ -847,7 +848,7 @@ slow_path:
        rht_assign_unlock(tbl, bkt, obj, flags);
 
        if (rht_grow_above_75(ht, tbl))
-               schedule_work(&ht->run_work);
+               irq_work_queue(&ht->run_irq_work);
 
        data = NULL;
 out:
index fb2b7bc137bae1692cb5e8df3d3fff6e4905dfc2..7a67ef5b67b666afdd638394f895d7732fb5e62e 100644 (file)
@@ -441,10 +441,33 @@ static void rht_deferred_worker(struct work_struct *work)
 
        mutex_unlock(&ht->mutex);
 
+       /*
+        * Re-arm via @run_work, not @run_irq_work.
+        * rhashtable_free_and_destroy() drains async work as irq_work_sync()
+        * followed by cancel_work_sync(). If this site queued irq_work while
+        * cancel_work_sync() was waiting for us, irq_work_sync() would already
+        * have returned and the stale irq_work could fire post-teardown.
+        * cancel_work_sync() natively handles self-requeue on @run_work.
+        */
        if (err)
                schedule_work(&ht->run_work);
 }
 
+/*
+ * Insert-path callers can run under a raw spinlock (e.g. an insecure_elasticity
+ * user). Calling schedule_work() under that lock records caller_lock ->
+ * pool->lock -> pi_lock -> rq->__lock, closing a locking cycle if any of
+ * these is acquired in the reverse direction elsewhere. Bounce through
+ * irq_work so the schedule_work() runs with the caller's lock no longer held.
+ */
+static void rht_deferred_irq_work(struct irq_work *irq_work)
+{
+       struct rhashtable *ht = container_of(irq_work, struct rhashtable,
+                                            run_irq_work);
+
+       schedule_work(&ht->run_work);
+}
+
 static int rhashtable_insert_rehash(struct rhashtable *ht,
                                    struct bucket_table *tbl)
 {
@@ -477,7 +500,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht,
                if (err == -EEXIST)
                        err = 0;
        } else
-               schedule_work(&ht->run_work);
+               irq_work_queue(&ht->run_irq_work);
 
        return err;
 
@@ -488,7 +511,7 @@ fail:
 
        /* Schedule async rehash to retry allocation in process context. */
        if (err == -ENOMEM)
-               schedule_work(&ht->run_work);
+               irq_work_queue(&ht->run_irq_work);
 
        return err;
 }
@@ -630,7 +653,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key,
                        rht_unlock(tbl, bkt, flags);
 
                        if (inserted && rht_grow_above_75(ht, tbl))
-                               schedule_work(&ht->run_work);
+                               irq_work_queue(&ht->run_irq_work);
                }
        } while (!IS_ERR_OR_NULL(new_tbl));
 
@@ -1085,6 +1108,7 @@ int rhashtable_init_noprof(struct rhashtable *ht,
        RCU_INIT_POINTER(ht->tbl, tbl);
 
        INIT_WORK(&ht->run_work, rht_deferred_worker);
+       init_irq_work(&ht->run_irq_work, rht_deferred_irq_work);
 
        return 0;
 }
@@ -1150,6 +1174,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
        struct bucket_table *tbl, *next_tbl;
        unsigned int i;
 
+       irq_work_sync(&ht->run_irq_work);
        cancel_work_sync(&ht->run_work);
 
        mutex_lock(&ht->mutex);